/*
   File: display.cc

   By: Alex de Jong (original by MSSG)
   Created: March 1996
   
   Description:
   Multi-Threaded display class. Displays class is able to 
   synchronize frames with a synchronoization object, which
   in turn is updates by a decoder timer.
*/

#ifdef __GNUG__
#pragma implementation
#endif

#include "athread.hh"

#include <stdio.h>
#include <fstream.h>
#include <sys/time.h>

#include "error.hh"
#include "debug.hh"
#include "util.hh"

#include "videoconst.hh"
#include "sync.hh"
#include "display.hh"

extern int quiet;
extern int coded_picture_width;
extern int coded_picture_height;
extern int prog_seq;
extern int chroma_format;
extern int chrom_width;
extern int pict_struct, topfirst;
extern int convmat[8][4];
extern int matrix_coefficients;
extern int playedlastframe;

// #define USE_DGA 1               /* enable this to use DGA extention */


#ifdef SH_MEM
// Dummies to get rid of warnings
extern "C" {
int XShmQueryExtension(Display*);
int XShmGetEventBase(Display*);
}
#ifdef USE_DGA
#include <X11/extensions/xf86dga.h>
#include <X11/extensions/xf86vmode.h>
#endif
#endif


#ifdef SOLARIS_SDK_XIL

Xil_boolean error_handler(XilError error){
  xil_call_next_error_handler(error);
  error("XIL received an error: exiting!");
  exit(1);
  return True;
}
#endif // SOLARIS_SDK_XIL


DisplayX11::DisplayX11(const char* title, Synchronization* s){
  TRACER("DisplayX11::DisplayX11(const char* title, Synchronization* s)");
  int crv, cbu, cgu, cgv;
  int y, u, v, r, g, b;
  int i, j;
  Colormap cmap;
  XColor xcolor;
  unsigned int fg, bg;
  XSizeHints hint;
  unsigned long tmp_pixel;
  XWindowAttributes xwa;
  int screen;
  
  // init to avoid invalid destroy in destructor
  ximage=0;
  ximage2=0;
  // Synchronization with decoder clock
  sync=s;

  // Init display lock/condition to prevent threads to pass eachother
  source=0;

#ifdef SOLARIS_SDK_XIL
  resized=0;  // window has not been resized
  horizontal_size=100;
  vertical_size=100;
  horizontal_factor=1.0;
  vertical_factor=1.0;
#else
#ifdef SH_MEM
  CompletionType = -1;
#endif
#endif

  // create clipping table
  clp=new unsigned char[1024];  // clip table
  clp += 384;
  for (i=-384; i<640; i++) 
    clp[i] = (i<0) ? 0 : ((i>255) ? 255 : i);

  if (!(display=XOpenDisplay(0))){
    error("Can not open display\n");
    athr_exit(0);
  }

#ifdef TRACE
  XSynchronize(display, 1);
#endif
    
  screen = DefaultScreen(display);

  // find best display
#ifdef TRUECOLORS      // this slows down performence because of 24 bits images (3x) 
#if (defined( SOLARIS_SDK_XIL) )
  if (XMatchVisualInfo(display, screen, 24, TrueColor, &vinfo)){
  } else
#endif
#endif
#if (defined(LINUX))
  if (XMatchVisualInfo(display, screen, 16, TrueColor, &vinfo)){
  } else
#endif
  if (XMatchVisualInfo(display, screen, 8, PseudoColor, &vinfo)){
  }
  else if (XMatchVisualInfo(display, screen, 8, GrayScale, &vinfo)){
  }
  else if (XMatchVisualInfo(display, screen, 8, StaticGray, &vinfo)){
  }
  else if (XMatchVisualInfo(display, screen, 1, StaticGray, &vinfo)) {
  }
#ifdef LINUX
  else error("requires 16 bit display\n");
#else
  else error("requires 8 bit display\n");
#endif

  // Make the window
  hint.x = 200;
  hint.y = 200;
  hint.width = 100;
  hint.height= 100;
  hint.flags = PPosition | PSize;
  bpp = vinfo.depth;
  if (vinfo.red_mask == 0x7c00)
    rgb_mode = 1;		// RGB555    for more modes see yuv12-rgb.s
  else
    rgb_mode = 0;		// RGB565
  if (vinfo.c_class==TrueColor && bpp == 24 ){
#ifdef SOLARIS_SDK_XIL
    bands=3;
#endif
    cmap=XCreateColormap(display, DefaultRootWindow(display), vinfo.visual, AllocNone);
    
    XSetWindowAttributes xswa;
    xswa.colormap = cmap;
    xswa.event_mask = StructureNotifyMask;
    xswa.border_pixel = BlackPixel(display, screen);
    
    window=XCreateWindow(display, DefaultRootWindow(display),
                           hint.x, hint.y, hint.width, hint.height,
                           0,
                           vinfo.depth, InputOutput, vinfo.visual,
                           CWBorderPixel | CWColormap | CWEventMask, &xswa);
  }
  else {
#ifdef SOLARIS_SDK_XIL
     bands=1;
#endif
    // Get some colors
    bg = WhitePixel(display, screen);
    fg = BlackPixel(display, screen);

    window=XCreateSimpleWindow(display, DefaultRootWindow (display),
                               hint.x, hint.y, hint.width, hint.height, 4, fg, bg);
  }

  // Tell other applications about this window
  XSetStandardProperties(display, window, title, title, None, NULL, 0, &hint);

  XSelectInput(display, window, StructureNotifyMask);

  // Map window
  XMapWindow(display, window);

  // Wait for map.
  do {
    XNextEvent(display, &event);
  }
  while (event.type != MapNotify || event.xmap.event != window);

  XSelectInput(display, window, NoEventMask);

  if (vinfo.c_class==PseudoColor){     // Do dithering before display
    int privte=0;
    
    // allocate colors
    gc = DefaultGC(display, screen);
    cmap = DefaultColormap(display, screen);

    // matrix coefficients
    crv = convmat[matrix_coefficients][0];
    cbu = convmat[matrix_coefficients][1];
    cgu = convmat[matrix_coefficients][2];
    cgv = convmat[matrix_coefficients][3];
    
    /* color allocation:
     * i is the (internal) 8 bit color number, it consists of separate
     * bit fields for Y, U and V: i = (yyyyuuvv), we don't use yyyy=0000
     * yyyy=0001 and yyyy=1111, this leaves 48 colors for other applications
     *
     * the allocated colors correspond to the following Y, U and V values:
     * Y:   40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, 232
     * U,V: -48, -16, 16, 48
     *
     * U and V values span only about half the color space; this gives
     * usually much better quality, although highly saturated colors can
     * not be displayed properly
     *
     * translation to R,G,B is implicitly done by the color look-up table
     */
    // THIS IS REALLY SLOW on a 24 Bit display. Please drop me a line if
    // you know how to do this faster (alex.dejong@nist.gov)

    // DON'T ALLOCATE 240 COLORS!! Just 215 will do and stops Solaris
    // from colors blinking when moving the cursor
#if (defined(SOLARIS))
    int number_of_colors=215;
#endif
#if (defined(IRIX)|| defined(LINUX))
    int number_of_colors=240;
#endif

    for (i=32; i<number_of_colors; i++){
      /* color space conversion */
      y = 16*((i>>4)&15) + 8;
      u = 32*((i>>2)&3)  - 48;
      v = 32*(i&3)       - 48;
      
      y = 76309 * (y - 16); /* (255/219)*65536 */
      
      r = clp[(y + crv*v + 32768)>>16];
      g = clp[(y - cgu*u -cgv*v + 32768)>>16];
      b = clp[(y + cbu*u + 32786)>>16];
      
      /* X11 colors are 16 bit */
      xcolor.red   = r << 8;
      xcolor.green = g << 8;
      xcolor.blue  = b << 8;
      
      if (XAllocColor(display, cmap, &xcolor) != 0) pixel[i] = xcolor.pixel;
      else {
        /* allocation failed, have to use a private colormap */
        
        if (privte) error("Couldn't allocate private colormap");
        
        privte = 1;
        
        if (!quiet)
          fprintf(stderr, "Using private colormap (%d colors were available)\n",
                  i-32);
        
        /* Free colors. */
        while (--i >= 32){
          tmp_pixel = pixel[i]; /* because XFreeColors expects unsigned long */
          XFreeColors(display, cmap, &tmp_pixel, 1, 0);
        }
         /* i is now 31, this restarts the outer loop */
        
        /* create private colormap */
        
        XGetWindowAttributes(display, window, &xwa);
        cmap = XCreateColormap(display, window, xwa.visual, AllocNone);
        XSetWindowColormap(display, window, cmap);
      }
    }
  }
  else
    gc = DefaultGC(display, screen);

/*
   Init dither
   4x4 ordered dither
   threshold pattern:
    0  8  2 10
   12  4 14  6
    3 11  1  9
   15  7 13  5
*/

  unsigned char ctab[256+32];
  
  for (i=0; i<256+16; i++){
    v = (i-8)>>4;
    if (v<2) v=2;
    else if (v>14) v=14;
    for (j=0; j<16; j++) 
      ytab[16*i+j] = pixel[(v<<4)+j];
  }

  for (i=0; i<256+32; i++){
    v = (i+48-128)>>5;
    if (v<0) v=0;
    else if (v>3) v=3;
    ctab[i]=v;
  }

  for (i=0; i<255+15; i++)
    for (j=0; j<255+15; j++)
      uvtab[256*i+j]=(ctab[i+16]<<6)|(ctab[j+16]<<4)|(ctab[i]<<2)|ctab[j];
}


DisplayX11::~DisplayX11(){ 
  TRACER("DisplayX11::~DisplayX11()");
  display_lock.lock();
  if (!terminated) athr_join(thread_id);
  exit_display();
  delete clp; // delete clipping table
  display_lock.unlock();
}


void* DisplayX11::dither_thread(DisplayX11* d){
  d->display_lock.lock();
  d->terminated=0;
  d->dither_image(d->source);
  d->display_image(d->ximage, d->dithered_image); 
  d->source=0;
  d->display_cond.signal();
  d->terminated=1;
  d->display_lock.unlock(); 
#if !defined(IRIX) && !defined(SOLARIS_SDK_XIL) && !defined(LINUX)
  athr_exit(0);
#endif
  return 0;
}


void* DisplayX11::display_thread(DisplayX11* d){
#ifndef LINUX
  d->display_lock.lock();
  d->terminated=0;
  d->display_image(d->ximage2, d->dithered_image2); 
  d->terminated=1;
  d->display_lock.unlock();
#if !defined(IRIX) && !defined(SOLARIS_SDK_XIL) && !defined(LINUX)
  athr_exit(0);
#endif
#endif
  return 0;
}


int DisplayX11::init(int h_size, int v_size){
  TRACER("void DisplayX11::init(int h_size, int v_size)");

  display_lock.lock();  // lock the display from others access for now

  // resize window
  horizontal_size=h_size;
  vertical_size=v_size;
  XResizeWindow(display, window, horizontal_size, vertical_size);

#ifdef SOLARIS_SDK_XIL
  if ((State = xil_open()) == NULL)
    exit(1);   // XIL sends an error message to stderr if xil_open fails 

  // Install error handler
  if (xil_install_error_handler(State, error_handler) == XIL_FAILURE)
    error("unable to install error handler for XIL");

  if (!(displayimage=xil_create_from_window(State, display, window)))
    exit(1); // XIL sends error message to stderr if xil_create_from_window fails

  ximage=xil_create(State, coded_picture_width, coded_picture_height, bands, XIL_BYTE);
  ximage2=xil_create(State, coded_picture_width, coded_picture_height, bands, XIL_BYTE);
  resized_image=xil_create(State, coded_picture_width, coded_picture_height, bands, XIL_BYTE);

/*
  // Only when using XIL colorspace conversion; this seems rather slow
  if (bands==3){
    XilColorspace cspace=xil_colorspace_get_by_name(State, "ycc601");
    xil_set_colorspace(ximage, cspace);
    cspace=xil_colorspace_get_by_name(State, "ycc601");
    xil_set_colorspace(resized_image, cspace);
    cspace=xil_colorspace_get_by_name(State, "rgb709");
    xil_set_colorspace(displayimage, cspace);
  }
*/

  XSelectInput(display, window, StructureNotifyMask);
#else

#ifdef SH_MEM
  char dummy;
  shmem_flag = 0;
#ifdef USE_DGA             /* enable this if you want to have DGA support */
  int EventBase, ErrorBase, flags,vp_width,vp_height,bank,ram;
  if (XF86DGAQueryExtension (display, &EventBase, &ErrorBase)) {
      XF86DGAQueryDirectVideo (display, XDefaultScreen (display), &flags);
      if ((flags & XF86DGADirectPresent) == 0) {
          if (!quiet) message ("dga: no direct present");
      }
      else {
          shmem_flag = 2;
          if (!quiet) message ("using DGA ");
      }

      XF86DGAGetVideo (display, XDefaultScreen (display), (char **) &dithered_image,
                        &xwidth, &bank, &ram);
      dithered_image2 = dithered_image;
      XF86DGAGetViewPortSize (display, XDefaultScreen (display),
                        &vp_width, &vp_height);
      if (vp_height < v_size || vp_width < h_size) {
        if (!quiet) message("View Port too small for DGA");
        shmem_flag = 0;
      }
      else {
        XF86DGASetViewPort (display, XDefaultScreen (display), 0, 0);
        XF86DGADirectVideo (display, XDefaultScreen (display),XF86DGADirectGraphics );
      }
  }
#endif
  if (!shmem_flag) 
    if (XShmQueryExtension(display)) shmem_flag = 1;
    else {
      shmem_flag = 0;
      if (!quiet){
        message("Shared memory not supported");
        message("Reverting to normal Xlib");
      }
    }

  if (shmem_flag==1) CompletionType = XShmGetEventBase(display) + ShmCompletion;

  InstallXErrorHandler();

  if (shmem_flag==1){
    ximage = XShmCreateImage(display, None, bpp, ZPixmap, NULL,
                             &shminfo1,
                             coded_picture_width, coded_picture_height);

    if (!prog_seq)
      ximage2 = XShmCreateImage(display, None, bpp, ZPixmap, NULL,
                                &shminfo2,
                                coded_picture_width, coded_picture_height);

    /* If no go, then revert to normal Xlib calls. */

    if (ximage==NULL || (!prog_seq && ximage2==NULL)){
      if (ximage!=NULL) XDestroyImage(ximage);
      if (!prog_seq && ximage2!=NULL) XDestroyImage(ximage2);
      if (!quiet)
        fprintf(stderr, "Shared memory error, disabling (Ximage error)\n");
      goto shmemerror;
    }

    /* Success here, continue. */

    shminfo1.shmid = shmget(IPC_PRIVATE, 
                            ximage->bytes_per_line * ximage->height,
                            IPC_CREAT | 0777);
    if (!prog_seq)
      shminfo2.shmid = shmget(IPC_PRIVATE, 
                              ximage2->bytes_per_line * ximage2->height,
                              IPC_CREAT | 0777);

    if (shminfo1.shmid<0 || (!prog_seq && shminfo2.shmid<0)){
      XDestroyImage(ximage);
      if (!prog_seq) XDestroyImage(ximage2);
      if (!quiet)
        fprintf(stderr, "Shared memory error, disabling (seg id error)\n");
      goto shmemerror;
    }

    shminfo1.shmaddr = (char *) shmat(shminfo1.shmid, 0, 0);
    shminfo2.shmaddr = (char *) shmat(shminfo2.shmid, 0, 0);

    if (shminfo1.shmaddr==((char *) -1) ||
        (!prog_seq && shminfo2.shmaddr==((char *) -1))){
      XDestroyImage(ximage);
      if (shminfo1.shmaddr!=((char *) -1)) shmdt(shminfo1.shmaddr);
      if (!prog_seq){
        XDestroyImage(ximage2);
        if (shminfo2.shmaddr!=((char *) -1))
          shmdt(shminfo2.shmaddr);
      }
      if (!quiet)
      {
        fprintf(stderr, "Shared memory error, disabling (address error)\n");
      }
      goto shmemerror;
    }

    ximage->data = shminfo1.shmaddr;
    dithered_image = (unsigned char *)ximage->data;
    shminfo1.readOnly = False;
    XShmAttach(display, &shminfo1);
    if (!prog_seq){
      ximage2->data = shminfo2.shmaddr;
      dithered_image2 = (unsigned char *)ximage2->data;
      shminfo2.readOnly = False;
      XShmAttach(display, &shminfo2);
    }

    XSync(display, False);

    if (gXErrorFlag){
      /* Ultimate failure here. */
      XDestroyImage(ximage);
      shmdt(shminfo1.shmaddr);
      if (!prog_seq){
        XDestroyImage(ximage2);
        shmdt(shminfo2.shmaddr);
      }
      if (!quiet)
        fprintf(stderr, "Shared memory error, disabling.\n");
      gXErrorFlag = 0;
      goto shmemerror;
    }
    else {
      shmctl(shminfo1.shmid, IPC_RMID, 0);
      if (!prog_seq)
        shmctl(shminfo2.shmid, IPC_RMID, 0);
    }

    if (!quiet){
      fprintf(stderr, "Sharing memory.\n");
    }
  }
  else if (shmem_flag != 2) {

shmemerror:
    shmem_flag = 0;
#endif

    ximage = XCreateImage(display,None,bpp,ZPixmap,0,&dummy,
                          coded_picture_width,coded_picture_height,8,0);

    if (!(dithered_image = new unsigned char[coded_picture_width*
                                                   coded_picture_height]))
      error("new failed");

    if (!prog_seq){
      ximage2 = XCreateImage(display,None,bpp,ZPixmap,0,&dummy,
                             coded_picture_width,coded_picture_height,8,0);

      if (!(dithered_image2 = new unsigned char[coded_picture_width*
                                                      coded_picture_height]))
        error("new failed");
    }

#ifdef SH_MEM
  }

  DeInstallXErrorHandler();
#endif

#endif // SOLARIS_SDK_XIL

  display_lock.unlock();
  return 1;
}



void DisplayX11::exit_display(){
#ifdef SOLARIS_SDK_XIL
  xil_destroy(displayimage);
  xil_destroy(resized_image);
  if (ximage) xil_destroy(ximage);
  if (ximage2) xil_destroy(ximage2);
  xil_close(State);
  XCloseDisplay(display);
#else
#ifdef SH_MEM
#ifdef USE_DGA
  if (shmem_flag==2)
    XF86DGADirectVideo (display, XDefaultScreen (display), 0);
#endif
  if (shmem_flag==1){
    XShmDetach(display, &shminfo1);
    shmdt(shminfo1.shmaddr);
    if (!prog_seq){
      XShmDetach(display, &shminfo2);
      XDestroyImage(ximage2);
      shmdt(shminfo2.shmaddr);
    }
  }
#else
  XDestroyImage(ximage);
#endif
#endif
}


#ifdef SOLARIS_SDK_XIL
void DisplayX11::display_image(XilImage ximg, unsigned char *dithered_img){
#else
void DisplayX11::display_image(XImage *ximg, unsigned char *dithered_img){
#endif
  DEBUGGER("void DisplayX11::display_image(ximage, unsigned char *dithered_image)");
  playedlastframe=1;  // indicate that frame is displayed
  if (sync)  {
    sync->wait(1);    // wait for presentation time stamp (PTS), id=1
  }
#ifdef SH_MEM                            // display dithered image
  if (shmem_flag==2)
    return;
#endif


#ifdef SOLARIS_SDK_XIL
  xil_import(ximg, TRUE);
  if (resized){
    // , general and linear are Ok (These is no good for this: bilinear, bicubic)
    xil_scale(ximage, resized_image, "general", horizontal_factor, vertical_factor);
    // if (bands==1)   // Use XIL for color conversion
    xil_copy(resized_image, displayimage);
    // else 
    //   xil_color_convert(resized_image, displayimage);
  }
  else {
    // if (bands==1)   // Use XIL for color conversion
    xil_copy(ximg, displayimage);
    // else 
    //   xil_color_convert(ximg, displayimage);
  }

  // check for geometry changes
  if (XCheckWindowEvent(display, window, StructureNotifyMask, &event))  resize();
#else
#ifdef SH_MEM                            // display dithered image
  if (shmem_flag==1){
    XShmPutImage(display, window, gc, ximg, 0,0,0,0,ximg->width,ximg->height,True);
    XFlush(display);
  }
  else 
#endif // SH_MEM
  {
    ximg->data=(char*) dithered_img;
    XPutImage(display, window, gc, ximg, 0, 0, 0, 0, ximg->width, ximg->height);
  }
#endif // SOLARIS_SDK_XIL
}


#ifdef SOLARIS_SDK_XIL

int DisplayX11::dither_image_rgb24(unsigned char* src[], unsigned char* dithered_img){
  register int h=0, w=-1;
  register unsigned char *Y, *Cb, *Cr;  
  Y=src[0];
  Cb=src[1];
  Cr=src[2];
  register Xil_unsigned8* data;
  data=dithered_img;
  
  //  map data in XIL image  
  for (h=0; h<coded_picture_height; h++){
    for (w=0; w<coded_picture_width; w++){
      *data++ = clp[(int)(*Y + 1.371*(*Cr-128))];  
      *data++ = clp[(int)(*Y - 0.698*(*Cr-128) - 0.336*(*Cr-128))]; 
      *data++ = clp[(int)(*Y++ + 1.732*(*Cb-128))];
      if ((w % 4)==0){
        Cr++;
        Cb++;
      }
    }
  }
  return 0;
};

#endif

#ifdef LINUX
extern "C" void yuv_2_rgb(void *,void *,void *,int,int,int,int,int,void *,int,int,int,int);
static long long MMX_0 = 0L;
static unsigned long  MMX_10w[]         = {0x00100010, 0x00100010};                     //dd    00010 0010h, 000100010h
static unsigned long  MMX_80w[]         = {0x00800080, 0x00800080};                     //dd    00080 0080h, 000800080h

static unsigned long  MMX_00FFw[]       = {0x00ff00ff, 0x00ff00ff};                     //dd    000FF 00FFh, 000FF00FFh

static unsigned short MMX_Ublucoeff[]   = {0x81, 0x81, 0x81, 0x81};                     //dd    00081 0081h, 000810081h
static unsigned short MMX_Vredcoeff[]   = {0x66, 0x66, 0x66, 0x66};                     //dd    00066 0066h, 000660066h

static unsigned short MMX_Ugrncoeff[]   = {0xffe8, 0xffe8, 0xffe8, 0xffe8};             //dd    0FFE7 FFE7h, 0FFE7FFE7h
static unsigned short MMX_Vgrncoeff[]   = {0xffcd, 0xffcd, 0xffcd, 0xffcd};             //dd    0FFCC FFCCh, 0FFCCFFCCh

static unsigned short MMX_Ycoeff[]      = {0x4a, 0x4a, 0x4a, 0x4a};                     //dd    0004A 004Ah, 0004A004Ah
static unsigned short MMX_redmask[]     = {0xf800, 0xf800, 0xf800, 0xf800};             //dd    07c00 7c00h, 07c007c00h

static unsigned short MMX_grnmask[]     = {0x7e0, 0x7e0, 0x7e0, 0x7e0};                 //dd    003e0 03e0h, 003e003e0h
// static unsigned short MMX_blumask[]  = {0x1f, 0x1f, 0x1f, 0x1f};                     //dd    0001f 001fh, 0001f001fh


/* Need to have access to the memory after
*/
void
Color16DitherImageMod(  unsigned char *lum, unsigned char *cr, unsigned char *cb,
                                                                unsigned char *out, int rows, int cols, int mod)

{
   unsigned short *row1;
   int x;
        unsigned char *y;
        int col1;

   row1 = (unsigned short *)out;
        col1 = cols +mod;
        mod += cols +mod;
        mod *=2;
   y = lum +cols*rows;
        x = 0;
        __asm__ __volatile__(
                ".align 8\n"
                "1:\n"
                        "movd           (%1),                   %%mm0\n"        // 4 Cb         0  0  0  0 u3 u2 u1 u0
                        "pxor           %%mm7,                  %%mm7\n"
                        "movd           (%0),                   %%mm1\n" // 4 Cr                0  0  0  0 v3 v2 v1 v0
                        "punpcklbw      %%mm7,                  %%mm0\n" // 4 W cb   0 u3  0 u2  0 u1  0 u0
                        "punpcklbw      %%mm7,                  %%mm1\n" // 4 W cr   0 v3  0 v2  0 v1  0 v0
                        "psubw          MMX_80w,                %%mm0\n"
                        "psubw          MMX_80w,                %%mm1\n"
                        "movq           %%mm0,                  %%mm2\n"        // Cb                   0 u3  0 u2  0 u1  0 u0
                        "movq           %%mm1,                  %%mm3\n" // Cr
                        "pmullw         MMX_Ugrncoeff,          %%mm2\n" // Cb2green 0 R3  0 R2  0 R1  0 R0
                        "movq           (%2),                   %%mm6\n"        // L1      l7 L6 L5 L4 L3 L2 L1 L0
                        "pmullw         MMX_Ublucoeff,          %%mm0\n" // Cb2blue
                        "pand           MMX_00FFw,              %%mm6\n" // L1      00 L6 00 L4 00 L2 00 L0
                        "pmullw         MMX_Vgrncoeff,          %%mm3\n" // Cr2green
                        "movq           (%2),                   %%mm7\n" // L2
                        "pmullw         MMX_Vredcoeff,          %%mm1\n" // Cr2red
//                      "psubw          MMX_10w,                %%mm6\n"
                        "psrlw          $8,                     %%mm7\n"        // L2           00 L7 00 L5 00 L3 00 L1
                        "pmullw         MMX_Ycoeff,             %%mm6\n" // lum1
//                      "psubw          MMX_10w,                %%mm7\n" // L2
                        "paddw          %%mm3,                  %%mm2\n" // Cb2green + Cr2green == green
                        "pmullw         MMX_Ycoeff,             %%mm7\n"  // lum2

                        "movq           %%mm6,                  %%mm4\n"  // lum1
                        "paddw          %%mm0,                  %%mm6\n"  // lum1 +blue 00 B6 00 B4 00 B2 00 B0
                        "movq           %%mm4,                  %%mm5\n"  // lum1
                        "paddw          %%mm1,                  %%mm4\n"  // lum1 +red  00 R6 00 R4 00 R2 00 R0
                        "paddw          %%mm2,                  %%mm5\n"  // lum1 +green 00 G6 00 G4 00 G2 00 G0
                        "psraw          $6,                     %%mm4\n"  // R1 0 .. 64
                        "movq           %%mm7,                  %%mm3\n"  // lum2                       00 L7 00 L5 00 L3 00 L1
                        "psraw          $6,                     %%mm5\n"  // G1  - .. +
                        "paddw          %%mm0,                  %%mm7\n"  // Lum2 +blue 00 B7 00 B5 00 B3 00 B1
                        "psraw          $6,                     %%mm6\n"  // B1         0 .. 64
                        "packuswb       %%mm4,                  %%mm4\n"  // R1 R1
                        "packuswb       %%mm5,                  %%mm5\n"  // G1 G1
                        "packuswb       %%mm6,                  %%mm6\n"  // B1 B1
                        "punpcklbw      %%mm4,                  %%mm4\n"
                        "punpcklbw      %%mm5,                  %%mm5\n"

                        "pand           MMX_redmask,            %%mm4\n"
                        "psllw          $3,                     %%mm5\n"  // GREEN       1
                        "punpcklbw      %%mm6,                  %%mm6\n"
                        "pand           MMX_grnmask,            %%mm5\n"
                        "pand           MMX_redmask,            %%mm6\n"
                        "por            %%mm5,                  %%mm4\n" //
                        "psrlw          $11,                    %%mm6\n"                // BLUE        1
                        "movq           %%mm3,                  %%mm5\n" // lum2
                        "paddw          %%mm1,                  %%mm3\n"        // lum2 +red      00 R7 00 R5 00 R3 00 R1
                        "paddw          %%mm2,                  %%mm5\n" // lum2 +green 00 G7 00 G5 00 G3 00 G1
                        "psraw          $6,                     %%mm3\n" // R2
                        "por            %%mm6,                  %%mm4\n" // MM4
                        "psraw          $6,                     %%mm5\n" // G2
                        "movq           (%2, %3),               %%mm6\n"  // L3
                        "psraw          $6,                     %%mm7\n"
                        "packuswb       %%mm3,                  %%mm3\n"
                        "packuswb       %%mm5,                  %%mm5\n"
                        "packuswb       %%mm7,                  %%mm7\n"
                        "pand                   MMX_00FFw,              %%mm6\n"  // L3
                        "punpcklbw      %%mm3,                  %%mm3\n"
//                              "psubw          MMX_10w,                        %%mm6\n"  // L3
                        "punpcklbw      %%mm5,                  %%mm5\n"
                        "pmullw         MMX_Ycoeff,             %%mm6\n"  // lum3
                        "punpcklbw      %%mm7,                  %%mm7\n"
                        "psllw          $3,                             %%mm5\n"  // GREEN 2
                        "pand                   MMX_redmask,    %%mm7\n"
                        "pand                   MMX_redmask,    %%mm3\n"
                        "psrlw          $11,                            %%mm7\n"  // BLUE  2
                        "pand                   MMX_grnmask,    %%mm5\n"
                        "por                    %%mm7,                  %%mm3\n"
                        "movq                   (%2,%3),                        %%mm7\n"  // L4
                        "por                    %%mm5,                  %%mm3\n"     //
                        "psrlw          $8,                             %%mm7\n"    // L4
                        "movq                   %%mm4,                  %%mm5\n"
//                              "psubw          MMX_10w,                        %%mm7\n"                // L4
                        "punpcklwd      %%mm3,                  %%mm4\n"
                        "pmullw         MMX_Ycoeff,             %%mm7\n"    // lum4
                        "punpckhwd      %%mm3,                  %%mm5\n"

                        "movq                   %%mm4,                  (%4)\n"
                        "movq                   %%mm5,                  8(%4)\n"

                        "movq                   %%mm6,                  %%mm4\n"        // Lum3
                        "paddw          %%mm0,                  %%mm6\n"                // Lum3 +blue

                        "movq                   %%mm4,                  %%mm5\n"                        // Lum3
                        "paddw          %%mm1,                  %%mm4\n"       // Lum3 +red
                        "paddw          %%mm2,                  %%mm5\n"                        // Lum3 +green
                        "psraw          $6,                             %%mm4\n"
                        "movq                   %%mm7,                  %%mm3\n"                        // Lum4
                        "psraw          $6,                             %%mm5\n"
                        "paddw          %%mm0,                  %%mm7\n"                   // Lum4 +blue
                        "psraw          $6,                             %%mm6\n"                        // Lum3 +blue
                        "movq                   %%mm3,                  %%mm0\n"  // Lum4
                        "packuswb       %%mm4,                  %%mm4\n"
                        "paddw          %%mm1,                  %%mm3\n"  // Lum4 +red
                        "packuswb       %%mm5,                  %%mm5\n"
                        "paddw          %%mm2,                  %%mm0\n"         // Lum4 +green
                        "packuswb       %%mm6,                  %%mm6\n"
                        "punpcklbw      %%mm4,                  %%mm4\n"
                        "punpcklbw      %%mm5,                  %%mm5\n"
                        "punpcklbw      %%mm6,                  %%mm6\n"
                        "psllw          $3,                             %%mm5\n" // GREEN 3
                        "pand                   MMX_redmask,    %%mm4\n"
                        "psraw          $6,                             %%mm3\n" // psr 6
                        "psraw          $6,                             %%mm0\n"
                        "pand                   MMX_redmask,    %%mm6\n" // BLUE
                        "pand                   MMX_grnmask,    %%mm5\n"
                        "psrlw          $11,                            %%mm6\n"  // BLUE  3
                        "por                    %%mm5,                  %%mm4\n"
                        "psraw          $6,                             %%mm7\n"
                        "por                    %%mm6,                  %%mm4\n"
                        "packuswb       %%mm3,                  %%mm3\n"
                        "packuswb       %%mm0,                  %%mm0\n"
                        "packuswb       %%mm7,                  %%mm7\n"
                        "punpcklbw      %%mm3,                  %%mm3\n"
                        "punpcklbw      %%mm0,                  %%mm0\n"
                        "punpcklbw      %%mm7,                  %%mm7\n"
                        "pand                   MMX_redmask,    %%mm3\n"
                        "pand                   MMX_redmask,    %%mm7\n" // BLUE
                        "psllw          $3,                             %%mm0\n" // GREEN 4
                        "psrlw          $11,                            %%mm7\n"
                        "pand                   MMX_grnmask,    %%mm0\n"
                        "por                    %%mm7,                  %%mm3\n"
                        "addl                   $8,                             %6\n"
                        "por                    %%mm0,                  %%mm3\n"

                        "movq                   %%mm4,                  %%mm5\n"

                        "punpcklwd      %%mm3,                  %%mm4\n"
                        "punpckhwd      %%mm3,                  %%mm5\n"

                        "movq                   %%mm4,                  (%4,%5,2)\n"
                        "movq                   %%mm5,                  8(%4,%5,2)\n"

                        "addl                   $8,                             %2\n"
                        "addl                   $4,                             %0\n"
                        "addl                   $4,                             %1\n"
                        "cmpl                   %3,                             %6\n"
                        "leal                   16(%4),                 %4\n"
                "jl             1b\n"
                "addl           %3,     %2\n"                   /* lum += cols */
                "addl           %7,     %4\n"                   /* row1 += mod */
                "movl           $0,     %6\n"
                "cmpl           %8,     %2\n"
                "jl             1b\n"
                :
                : "r" (cr), "r" (cb), "r" (lum), "r" (cols), "r" (row1) ,"r" (col1), "m" (x), "m" (mod)
                        , "m" (y)
                );
        __asm__ __volatile__(
                "emms\n"
        );

}


int DisplayX11::dither_image_rgb16(unsigned char* src[], unsigned char* dithered_img){
  int height,width,display_width;
  height=coded_picture_height;
  if (shmem_flag == 2)
    display_width = xwidth;
  else
    display_width=coded_picture_width;
  width=coded_picture_width;
  if (rgb_mode == 0)
    Color16DitherImageMod(src[0], src[2], src[1], dithered_img, height, width,display_width -width);
  else
    yuv_2_rgb(src[0],src[1],src[2],width,height,width,width/2,0,dithered_img,0,0,display_width*2,rgb_mode);
  return 0;
}
#endif


void DisplayX11::dither_image(unsigned char *src[]){
#ifdef SOLARIS_SDK_XIL
  dithered_image=getImage1Data();
#endif
  if (prog_seq){
    if (chroma_format!=CHROMA444){
#ifdef SOLARIS_SDK_XIL
      if (vinfo.c_class==TrueColor && vinfo.depth==24){
        dither_image_rgb24(src, dithered_image);
        return;
      }
#endif
#ifdef LINUX 
      if (bpp == 16 ) {
        dither_image_rgb16(src, dithered_image);
        return;
      }
#endif
      ditherframe(src);
    }
    else
      ditherframe444(src);
  }
  else {
#ifdef SOLARIS_SDK_XIL
    dithered_image2=getImage2Data();
#endif
#ifdef LINUX 
      if (bpp == 16 ){
        dither_image_rgb16(src, dithered_image);
        return;
      }
#endif
    if ((pict_struct==FRAME_PICTURE && topfirst) || pict_struct==BOTTOM_FIELD){
      /* top field first */
      if (chroma_format!=CHROMA444){
        dithertop(src,dithered_image);
        ditherbot(src,dithered_image2);
      }
      else {
        dithertop444(src,dithered_image);
        ditherbot444(src,dithered_image2);
      }
    }
    else {
      /* bottom field first */
      if (chroma_format!=CHROMA444){
        ditherbot(src,dithered_image);
        dithertop(src,dithered_image2);
      }
      else {
        ditherbot444(src,dithered_image);
        dithertop444(src,dithered_image2);
      }
    }
  }
}


/* only for 4:2:0 and 4:2:2! */

void DisplayX11::ditherframe(unsigned char *src[]){
  int i;
  register unsigned int uv;
  register unsigned char *py,*pu,*pv,*dst;
  py = src[0];
  pu = src[1];
  pv = src[2];
  dst = dithered_image;

  for (int j=0; j<coded_picture_height; j+=4){
    /* line j + 0 */
    for (i=0; i<coded_picture_width; i+=8){
      uv = uvtab[(*pu++<<8)|*pv++];
      *dst++ = ytab[((*py++)<<4)|(uv&15)];
      *dst++ = ytab[((*py++ +8)<<4)|(uv>>4)];
      uv = uvtab[((*pu++<<8)|*pv++)+1028];
      *dst++ = ytab[((*py++ +2)<<4)|(uv&15)];
      *dst++ = ytab[((*py++ +10)<<4)|(uv>>4)];
      uv = uvtab[(*pu++<<8)|*pv++];
      *dst++ = ytab[((*py++)<<4)|(uv&15)];
      *dst++ = ytab[((*py++ +8)<<4)|(uv>>4)];
      uv = uvtab[((*pu++<<8)|*pv++)+1028];
      *dst++ = ytab[((*py++ +2)<<4)|(uv&15)];
      *dst++ = ytab[((*py++ +10)<<4)|(uv>>4)];
    }

    if (chroma_format==CHROMA420){
      pu -= chrom_width;
      pv -= chrom_width;
    }

    /* line j + 1 */
    for (i=0; i<coded_picture_width; i+=8){
      uv = uvtab[((*pu++<<8)|*pv++)+2056];
      *dst++ = ytab[((*py++ +12)<<4)|(uv>>4)];
      *dst++ = ytab[((*py++ +4)<<4)|(uv&15)];
      uv = uvtab[((*pu++<<8)|*pv++)+3084];
      *dst++ = ytab[((*py++ +14)<<4)|(uv>>4)];
      *dst++ = ytab[((*py++ +6)<<4)|(uv&15)];
      uv = uvtab[((*pu++<<8)|*pv++)+2056];
      *dst++ = ytab[((*py++ +12)<<4)|(uv>>4)];
      *dst++ = ytab[((*py++ +4)<<4)|(uv&15)];
      uv = uvtab[((*pu++<<8)|*pv++)+3084];
      *dst++ = ytab[((*py++ +14)<<4)|(uv>>4)];
      *dst++ = ytab[((*py++ +6)<<4)|(uv&15)];
    }

    /* line j + 2 */
    for (i=0; i<coded_picture_width; i+=8){
      uv = uvtab[((*pu++<<8)|*pv++)+1542];
      *dst++ = ytab[((*py++ +3)<<4)|(uv&15)];
      *dst++ = ytab[((*py++ +11)<<4)|(uv>>4)];
      uv = uvtab[((*pu++<<8)|*pv++)+514];
      *dst++ = ytab[((*py++ +1)<<4)|(uv&15)];
      *dst++ = ytab[((*py++ +9)<<4)|(uv>>4)];
      uv = uvtab[((*pu++<<8)|*pv++)+1542];
      *dst++ = ytab[((*py++ +3)<<4)|(uv&15)];
      *dst++ = ytab[((*py++ +11)<<4)|(uv>>4)];
      uv = uvtab[((*pu++<<8)|*pv++)+514];
      *dst++ = ytab[((*py++ +1)<<4)|(uv&15)];
      *dst++ = ytab[((*py++ +9)<<4)|(uv>>4)];
    }

    if (chroma_format==CHROMA420){
      pu -= chrom_width;
      pv -= chrom_width;
    }

    /* line j + 3 */
    for (i=0; i<coded_picture_width; i+=8){
      uv = uvtab[((*pu++<<8)|*pv++)+3598];
      *dst++ = ytab[((*py++ +15)<<4)|(uv>>4)];
      *dst++ = ytab[((*py++ +7)<<4)|(uv&15)];
      uv = uvtab[((*pu++<<8)|*pv++)+2570];
      *dst++ = ytab[((*py++ +13)<<4)|(uv>>4)];
      *dst++ = ytab[((*py++ +5)<<4)|(uv&15)];
      uv = uvtab[((*pu++<<8)|*pv++)+3598];
      *dst++ = ytab[((*py++ +15)<<4)|(uv>>4)];
      *dst++ = ytab[((*py++ +7)<<4)|(uv&15)];
      uv = uvtab[((*pu++<<8)|*pv++)+2570];
      *dst++ = ytab[((*py++ +13)<<4)|(uv>>4)];
      *dst++ = ytab[((*py++ +5)<<4)|(uv&15)];
    }
  }
}


void DisplayX11::dithertop(unsigned char *src[], unsigned char *dst){
  int i;
  unsigned int y,uv1,uv2;
  unsigned char *py,*py2,*pu,*pv,*dst2;

  py = src[0];
  py2 = src[0] + (coded_picture_width<<1);
  pu = src[1];
  pv = src[2];
  dst2 = dst + coded_picture_width;

  for (int j=0; j<coded_picture_height; j+=4){
    /* line j + 0, j + 1 */
    for (i=0; i<coded_picture_width; i+=4){
      y = *py++;
      uv2 = (*pu++<<8)|*pv++;
      uv1 = uvtab[uv2];
      uv2 = uvtab[uv2+2056];
      *dst++  = ytab[((y)<<4)|(uv1&15)];
      *dst2++ = ytab[((((y + *py2++)>>1)+12)<<4)|(uv2>>4)];

      y = *py++;
      *dst++  = ytab[((y+8)<<4)|(uv1>>4)];
      *dst2++ = ytab[((((y + *py2++)>>1)+4)<<4)|(uv2&15)];

      y = *py++;
      uv2 = (*pu++<<8)|*pv++;
      uv1 = uvtab[uv2+1028];
      uv2 = uvtab[uv2+3072];
      *dst++  = ytab[((y+2)<<4)|(uv1&15)];
      *dst2++ = ytab[((((y + *py2++)>>1)+14)<<4)|(uv2>>4)];

      y = *py++;
      *dst++  = ytab[((y+10)<<4)|(uv1>>4)];
      *dst2++ = ytab[((((y + *py2++)>>1)+6)<<4)|(uv2&15)];
    }

    py += coded_picture_width;

    if (j!=(coded_picture_height-4)) py2 += coded_picture_width;
    else                             py2 -= coded_picture_width;

    dst += coded_picture_width;
    dst2 += coded_picture_width;

    if (chroma_format==CHROMA420){
      pu -= chrom_width;
      pv -= chrom_width;
    }
    else {
      pu += chrom_width;
      pv += chrom_width;
    }

    /* line j + 2, j + 3 */
    for (i=0; i<coded_picture_width; i+=4){
      y = *py++;
      uv2 = (*pu++<<8)|*pv++;
      uv1 = uvtab[uv2+1542];
      uv2 = uvtab[uv2+3598];
      *dst++  = ytab[((y+3)<<4)|(uv1&15)];
      *dst2++ = ytab[((((y + *py2++)>>1)+15)<<4)|(uv2>>4)];

      y = *py++;
      *dst++  = ytab[((y+11)<<4)|(uv1>>4)];
      *dst2++ = ytab[((((y + *py2++)>>1)+7)<<4)|(uv2&15)];

      y = *py++;
      uv2 = (*pu++<<8)|*pv++;
      uv1 = uvtab[uv2+514];
      uv2 = uvtab[uv2+2570];
      *dst++  = ytab[((y+1)<<4)|(uv1&15)];
      *dst2++ = ytab[((((y + *py2++)>>1)+13)<<4)|(uv2>>4)];

      y = *py++;
      *dst++  = ytab[((y+9)<<4)|(uv1>>4)];
      *dst2++ = ytab[((((y + *py2++)>>1)+5)<<4)|(uv2&15)];
    }

    py += coded_picture_width;
    py2 += coded_picture_width;
    dst += coded_picture_width;
    dst2 += coded_picture_width;
    pu += chrom_width;
    pv += chrom_width;
  }
}


void DisplayX11::ditherbot(unsigned char *src[], unsigned char *dst){
  int i;
  unsigned int y2,uv1,uv2;
  unsigned char *py,*py2,*pu,*pv,*dst2;

  py = src[0] + coded_picture_width;
  py2 = py;
  pu = src[1] + chrom_width;
  pv = src[2] + chrom_width;
  dst2 = dst + coded_picture_width;

  for (int j=0; j<coded_picture_height; j+=4){
    /* line j + 0, j + 1 */
    for (i=0; i<coded_picture_width; i+=4){
      y2 = *py2++;
      uv2 = (*pu++<<8)|*pv++;
      uv1 = uvtab[uv2];
      uv2 = uvtab[uv2+2056];
      *dst++  = ytab[((((*py++ + y2)>>1))<<4)|(uv1&15)];
      *dst2++ = ytab[((y2+12)<<4)|(uv2>>4)];

      y2 = *py2++;
      *dst++  = ytab[((((*py++ + y2)>>1)+8)<<4)|(uv1>>4)];
      *dst2++ = ytab[((y2+4)<<4)|(uv2&15)];

      y2 = *py2++;
      uv2 = (*pu++<<8)|*pv++;
      uv1 = uvtab[uv2+1028];
      uv2 = uvtab[uv2+3072];
      *dst++  = ytab[((((*py++ + y2)>>1)+2)<<4)|(uv1&15)];
      *dst2++ = ytab[((y2+14)<<4)|(uv2>>4)];

      y2 = *py2++;
      *dst++  = ytab[((((*py++ + y2)>>1)+10)<<4)|(uv1>>4)];
      *dst2++ = ytab[((y2+6)<<4)|(uv2&15)];
    }

    if (j==0) py -= coded_picture_width;
    else      py += coded_picture_width;

    py2 += coded_picture_width;
    dst += coded_picture_width;
    dst2 += coded_picture_width;

    if (chroma_format==CHROMA420){
      pu -= chrom_width;
      pv -= chrom_width;
    }
    else {
      pu += chrom_width;
      pv += chrom_width;
    }

    /* line j + 2, j + 3 */
    for (i=0; i<coded_picture_width; i+=4){
      y2 = *py2++;
      uv2 = (*pu++<<8)|*pv++;
      uv1 = uvtab[uv2+1542];
      uv2 = uvtab[uv2+3598];
      *dst++  = ytab[((((*py++ + y2)>>1)+3)<<4)|(uv1&15)];
      *dst2++ = ytab[((y2+15)<<4)|(uv2>>4)];

      y2 = *py2++;
      *dst++  = ytab[((((*py++ + y2)>>1)+11)<<4)|(uv1>>4)];
      *dst2++ = ytab[((y2+7)<<4)|(uv2&15)];

      y2 = *py2++;
      uv2 = (*pu++<<8)|*pv++;
      uv1 = uvtab[uv2+514];
      uv2 = uvtab[uv2+2570];
      *dst++  = ytab[((((*py++ + y2)>>1)+1)<<4)|(uv1&15)];
      *dst2++ = ytab[((y2+13)<<4)|(uv2>>4)];

      y2 = *py2++;
      *dst++  = ytab[((((*py++ + y2)>>1)+9)<<4)|(uv1>>4)];
      *dst2++ = ytab[((y2+5)<<4)|(uv2&15)];
    }

    py += coded_picture_width;
    py2 += coded_picture_width;
    dst += coded_picture_width;
    dst2 += coded_picture_width;
    pu += chrom_width;
    pv += chrom_width;
  }
}

/* only for 4:4:4 */

void DisplayX11::ditherframe444(unsigned char *src[]){
  int i;
  unsigned char *py=src[0], *pu=src[1], *pv=src[2], *dst=dithered_image;

  for (int j=0; j<coded_picture_height; j+=4){
    /* line j + 0 */
    for (i=0; i<coded_picture_width; i+=8){
      *dst++ = ytab[((*py++)<<4)|(uvtab[(*pu++<<8)|*pv++]&15)];
      *dst++ = ytab[((*py++ +8)<<4)|(uvtab[(*pu++<<8)|*pv++]>>4)];
      *dst++ = ytab[((*py++ +2)<<4)|(uvtab[((*pu++<<8)|*pv++)+1028]&15)];
      *dst++ = ytab[((*py++ +10)<<4)|(uvtab[((*pu++<<8)|*pv++)+1028]>>4)];
      *dst++ = ytab[((*py++)<<4)|(uvtab[(*pu++<<8)|*pv++]&15)];
      *dst++ = ytab[((*py++ +8)<<4)|(uvtab[(*pu++<<8)|*pv++]>>4)];
      *dst++ = ytab[((*py++ +2)<<4)|(uvtab[((*pu++<<8)|*pv++)+1028]&15)];
      *dst++ = ytab[((*py++ +10)<<4)|(uvtab[((*pu++<<8)|*pv++)+1028]>>4)];
    }

    /* line j + 1 */
    for (i=0; i<coded_picture_width; i+=8){
      *dst++ = ytab[((*py++ +12)<<4)|(uvtab[((*pu++<<8)|*pv++)+2056]>>4)];
      *dst++ = ytab[((*py++ +4)<<4)|(uvtab[((*pu++<<8)|*pv++)+2056]&15)];
      *dst++ = ytab[((*py++ +14)<<4)|(uvtab[((*pu++<<8)|*pv++)+3084]>>4)];
      *dst++ = ytab[((*py++ +6)<<4)|(uvtab[((*pu++<<8)|*pv++)+3084]&15)];
      *dst++ = ytab[((*py++ +12)<<4)|(uvtab[((*pu++<<8)|*pv++)+2056]>>4)];
      *dst++ = ytab[((*py++ +4)<<4)|(uvtab[((*pu++<<8)|*pv++)+2056]&15)];
      *dst++ = ytab[((*py++ +14)<<4)|(uvtab[((*pu++<<8)|*pv++)+3084]>>4)];
      *dst++ = ytab[((*py++ +6)<<4)|(uvtab[((*pu++<<8)|*pv++)+3084]&15)];
    }

    /* line j + 2 */
    for (i=0; i<coded_picture_width; i+=8){
      *dst++ = ytab[((*py++ +3)<<4)|(uvtab[((*pu++<<8)|*pv++)+1542]&15)];
      *dst++ = ytab[((*py++ +11)<<4)|(uvtab[((*pu++<<8)|*pv++)+1542]>>4)];
      *dst++ = ytab[((*py++ +1)<<4)|(uvtab[((*pu++<<8)|*pv++)+514]&15)];
      *dst++ = ytab[((*py++ +9)<<4)|(uvtab[((*pu++<<8)|*pv++)+514]>>4)];
      *dst++ = ytab[((*py++ +3)<<4)|(uvtab[((*pu++<<8)|*pv++)+1542]&15)];
      *dst++ = ytab[((*py++ +11)<<4)|(uvtab[((*pu++<<8)|*pv++)+1542]>>4)];
      *dst++ = ytab[((*py++ +1)<<4)|(uvtab[((*pu++<<8)|*pv++)+514]&15)];
      *dst++ = ytab[((*py++ +9)<<4)|(uvtab[((*pu++<<8)|*pv++)+514]>>4)];
    }

    /* line j + 3 */
    for (i=0; i<coded_picture_width; i+=8){
      *dst++ = ytab[((*py++ +15)<<4)|(uvtab[((*pu++<<8)|*pv++)+3598]>>4)];
      *dst++ = ytab[((*py++ +7)<<4)|(uvtab[((*pu++<<8)|*pv++)+3598]&15)];
      *dst++ = ytab[((*py++ +13)<<4)|(uvtab[((*pu++<<8)|*pv++)+2570]>>4)];
      *dst++ = ytab[((*py++ +5)<<4)|(uvtab[((*pu++<<8)|*pv++)+2570]&15)];
      *dst++ = ytab[((*py++ +15)<<4)|(uvtab[((*pu++<<8)|*pv++)+3598]>>4)];
      *dst++ = ytab[((*py++ +7)<<4)|(uvtab[((*pu++<<8)|*pv++)+3598]&15)];
      *dst++ = ytab[((*py++ +13)<<4)|(uvtab[((*pu++<<8)|*pv++)+2570]>>4)];
      *dst++ = ytab[((*py++ +5)<<4)|(uvtab[((*pu++<<8)|*pv++)+2570]&15)];
    }
  }
}


void DisplayX11::dithertop444(unsigned char *src[], unsigned char *dst){
  int i;
  unsigned int y,uv;
  unsigned char *py,*py2,*pu,*pv,*dst2;

  py = src[0];
  py2 = src[0] + (coded_picture_width<<1);
  pu = src[1];
  pv = src[2];
  dst2 = dst + coded_picture_width;

  for (int j=0; j<coded_picture_height; j+=4){
    /* line j + 0, j + 1 */
    for (i=0; i<coded_picture_width; i+=4){
      y = *py++;
      uv = (*pu++<<8)|*pv++;
      *dst++  = ytab[((y)<<4)|(uvtab[uv]&15)];
      *dst2++ = ytab[((((y + *py2++)>>1)+12)<<4)|(uvtab[uv+2056]>>4)];

      y = *py++;
      uv = (*pu++<<8)|*pv++;
      *dst++  = ytab[((y+8)<<4)|(uvtab[uv]>>4)];
      *dst2++ = ytab[((((y + *py2++)>>1)+4)<<4)|(uvtab[uv+2056]&15)];

      y = *py++;
      uv = (*pu++<<8)|*pv++;
      *dst++  = ytab[((y+2)<<4)|(uvtab[uv+1028]&15)];
      *dst2++ = ytab[((((y + *py2++)>>1)+14)<<4)|(uvtab[uv+3072]>>4)];

      y = *py++;
      uv = (*pu++<<8)|*pv++;
      *dst++  = ytab[((y+10)<<4)|(uvtab[uv+1028]>>4)];
      *dst2++ = ytab[((((y + *py2++)>>1)+6)<<4)|(uvtab[uv+3072]&15)];
    }

    py += coded_picture_width;

    if (j!=(coded_picture_height-4))
      py2 += coded_picture_width;
    else
      py2 -= coded_picture_width;

    dst += coded_picture_width;
    dst2 += coded_picture_width;

    pu += chrom_width;
    pv += chrom_width;

    /* line j + 2, j + 3 */
    for (i=0; i<coded_picture_width; i+=4){
      y = *py++;
      uv = (*pu++<<8)|*pv++;
      *dst++  = ytab[((y+3)<<4)|(uvtab[uv+1542]&15)];
      *dst2++ = ytab[((((y + *py2++)>>1)+15)<<4)|(uvtab[uv+3598]>>4)];

      y = *py++;
      uv = (*pu++<<8)|*pv++;
      *dst++  = ytab[((y+11)<<4)|(uvtab[uv+1542]>>4)];
      *dst2++ = ytab[((((y + *py2++)>>1)+7)<<4)|(uvtab[uv+3598]&15)];

      y = *py++;
      uv = (*pu++<<8)|*pv++;
      *dst++  = ytab[((y+1)<<4)|(uvtab[uv+514]&15)];
      *dst2++ = ytab[((((y + *py2++)>>1)+13)<<4)|(uvtab[uv+2570]>>4)];

      y = *py++;
      uv = (*pu++<<8)|*pv++;
      *dst++  = ytab[((y+9)<<4)|(uvtab[uv+514]>>4)];
      *dst2++ = ytab[((((y + *py2++)>>1)+5)<<4)|(uvtab[uv+2570]&15)];
    }

    py += coded_picture_width;
    py2 += coded_picture_width;
    dst += coded_picture_width;
    dst2 += coded_picture_width;
    pu += chrom_width;
    pv += chrom_width;
  }
}


void DisplayX11::ditherbot444(unsigned char *src[], unsigned char *dst){
  int i;
  unsigned int y2,uv;
  unsigned char *py,*py2,*pu,*pv,*dst2;

  py = src[0] + coded_picture_width;
  py2 = py;
  pu = src[1] + chrom_width;
  pv = src[2] + chrom_width;
  dst2 = dst + coded_picture_width;

  for (int j=0; j<coded_picture_height; j+=4){
    /* line j + 0, j + 1 */
    for (i=0; i<coded_picture_width; i+=4){
      y2 = *py2++;
      uv = (*pu++<<8)|*pv++;
      *dst++  = ytab[((((*py++ + y2)>>1))<<4)|(uvtab[uv]&15)];
      *dst2++ = ytab[((y2+12)<<4)|(uvtab[uv+2056]>>4)];

      y2 = *py2++;
      uv = (*pu++<<8)|*pv++;
      *dst++  = ytab[((((*py++ + y2)>>1)+8)<<4)|(uvtab[uv]>>4)];
      *dst2++ = ytab[((y2+4)<<4)|(uvtab[uv+2056]&15)];

      y2 = *py2++;
      uv = (*pu++<<8)|*pv++;
      *dst++  = ytab[((((*py++ + y2)>>1)+2)<<4)|(uvtab[uv+1028]&15)];
      *dst2++ = ytab[((y2+14)<<4)|(uvtab[uv+3072]>>4)];

      y2 = *py2++;
      uv = (*pu++<<8)|*pv++;
      *dst++  = ytab[((((*py++ + y2)>>1)+10)<<4)|(uvtab[uv+1028]>>4)];
      *dst2++ = ytab[((y2+6)<<4)|(uvtab[uv+3072]&15)];
    }

    if (j==0)
      py -= coded_picture_width;
    else
      py += coded_picture_width;

    py2 += coded_picture_width;
    dst += coded_picture_width;
    dst2 += coded_picture_width;

    pu += chrom_width;
    pv += chrom_width;

    /* line j + 2, j + 3 */
    for (i=0; i<coded_picture_width; i+=4){
      y2 = *py2++;
      uv = (*pu++<<8)|*pv++;
      *dst++  = ytab[((((*py++ + y2)>>1)+3)<<4)|(uvtab[uv+1542]&15)];
      *dst2++ = ytab[((y2+15)<<4)|(uvtab[uv+3598]>>4)];

      y2 = *py2++;
      uv = (*pu++<<8)|*pv++;
      *dst++  = ytab[((((*py++ + y2)>>1)+11)<<4)|(uvtab[uv+1542]>>4)];
      *dst2++ = ytab[((y2+7)<<4)|(uvtab[uv+3598]&15)];

      y2 = *py2++;
      uv = (*pu++<<8)|*pv++;
      *dst++  = ytab[((((*py++ + y2)>>1)+1)<<4)|(uvtab[uv+514]&15)];
      *dst2++ = ytab[((y2+13)<<4)|(uvtab[uv+2570]>>4)];

      y2 = *py2++;
      uv = (*pu++<<8)|*pv++;
      *dst++  = ytab[((((*py++ + y2)>>1)+9)<<4)|(uvtab[uv+514]>>4)];
      *dst2++ = ytab[((y2+5)<<4)|(uvtab[uv+2570]&15)];
    }

    py += coded_picture_width;
    py2 += coded_picture_width;
    dst += coded_picture_width;
    dst2 += coded_picture_width;
    pu += chrom_width;
    pv += chrom_width;
  }
}

#ifdef SOLARIS_SDK_XIL

int DisplayX11::resize(){
  TRACER("int DisplayX11::resize()");
  Window root;
  int x, y;
  unsigned int w, h, b, d;
  XGetGeometry(display, window, &root, &x, &y, &w, &h, &b, &d);

  // indicate resize
  resized=1;
  horizontal_factor=((float) w)/horizontal_size;
  vertical_factor=((float) h)/vertical_size;

  // to avoid new events for the time being
  XSelectInput(display, window, NoEventMask);
  
  // Create new image with new geometry
  xil_destroy(displayimage);
  displayimage=xil_create_from_window(State, display, window);
  xil_destroy(resized_image);
  resized_image=xil_create(State, w, h, bands, XIL_BYTE);
/*
  if (bands==3){
    XilColorspace cspace=xil_colorspace_get_by_name(State, "ycc601");
    xil_set_colorspace(resized_image, cspace);
    cspace=xil_colorspace_get_by_name(State, "rgb709");
    xil_set_colorspace(displayimage, cspace);
  }
*/

  XSelectInput(display, window, StructureNotifyMask);
  return 1;
}

#endif // SOLARIS_SDK_XIL


