// MYDIB.cpp: implementation of the MYDIB class.
//
//////////////////////////////////////////////////////////////////////
#include <windows.h>
#include <stdio.h>
#include <math.h>
#include <excpt.h>
#include "MYDIB.h"

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
//extern bool AbortOperation;
static bool AbortOperation;

char EOF_STR[] = "Premature end of file";
char INV_HDR[] = "Unsupported image header";
char INV_CDEP[] = "Unsupported color depth";
char INV_BPLAN[] = "Unsupported number of bit planes";
char NO_MEM[] = "Insufficient memory to load image";
//USE for MMX RGBtoYUV
static short int ycoef[7][4] = {     /* 32768 scaled y table, bgra order */
    {2363, 23442, 6963, 0},
    {3736, 19235, 9798, 0},
    {3736, 19235, 9798, 0},
    {3604, 19333, 9830, 0},
    {3736, 19235, 9798, 0},
    {3736, 19235, 9798, 0},
    {2851, 22970, 6947, 0}};

static short int ucoef[7][4] = {
    {16384, -12648, -3768, 0},
    {16384, -10846, -5538, 0},
    {16384, -10846, -5538, 0},
    {16384, -10846, -5538, 0},
    {16384, -10846, -5538, 0},
    {16384, -10846, -5538, 0},
    {16384, -12583, -3801, 0}};

static short int vcoef[7][4] = {
    {-1507, -14877, 16384, 0},
    {-2654, -13730, 16384, 0},
    {-2654, -13730, 16384, 0},
    {-2589, -13795, 16384, 0},
    {-2654, -13730, 16384, 0},
    {-2654, -13730, 16384, 0},
    {-1802, -14582, 16384, 0}};
short int *ycoefs, *ucoefs, *vcoefs;	//use asm

MYDIB::MYDIB()
{
	int i;  
	AbortOperation=false;
	MMXMode=1;
	UseFP=0;

  Info = 0;
  Bits = NULL;
  Bits2 = NULL;
  u444=NULL;
  v444=NULL;
  u422=NULL;
  v422=NULL;
  NumClrs = 0;
  Mode = DIB_RGB_COLORS;
  W = 0;
  H = 0;
  BitsPerPixel = 0;
  EndScan = 0;
  strcpy(ErrStr, "");
  strcpy(fName, "");
  Handle = NULL;
  Handle2 = NULL;

  video_type=MPEG_MPEG1; 
  prog_frame=1;//MPEG1 VCD :1  //MPEG2 DVD :2 
  prog_seq = 1;//MPEG1 VCD :1
  fieldpic = 0;//MPEG1 VCD :0
  chroma_format = CHROMA420;
  matrix_coefficients = 5;  



  clp = (unsigned char *)malloc(1024);
  if (clp == NULL)
  {
    DisplayError("Cannot allocate memory for clip table.");
    //return FALSE;
  }
  orgclp = clp;
  clp+= 384;
  for (i = -384; i < 640; i++)
    clp[i] = (unsigned char) ((i<0) ? 0 : ((i>255) ? 255 : i));
	for (i=0; i<3; i++)
		neworgframe[i] = NULL;
  init_rgb_to_yuv_mmx(matrix_coefficients);
  init(176,144);
  
}

MYDIB::~MYDIB()
{
	int i;
    if (Handle)
    {
      ::GlobalUnlock(Handle);
      ::GlobalFree(Handle);
    }
    if (Handle2)
    {
      ::GlobalUnlock(Handle2);
      ::GlobalFree(Handle2);
    }
	if (orgclp)
	{
		free(orgclp);
		orgclp = NULL;
		clp = NULL;
	}
	for (i=0; i<3; i++)
	{
		if (neworgframe[i])free(neworgframe[i]);
		neworgframe[i] = NULL;
	}
	if (u444)free(u444);
	if (v444)free(v444);
	if (u422)free(u422);
	if (v422)free(v422);
	
}


bool MYDIB::convert(BITMAPINFO *bitmapInfo,int wcx,int wcy)
{
	if(horizontal_size!=wcx ||  vertical_size!=wcy)
		init(wcx,wcy);
	if(!LoadFromBMP(bitmapInfo))
		return false;
	if(!Rescale(wcx,wcy,wcx,wcy))
		return false;
	if(!readframe(neworgframe))
		return false;

	return true;

}


bool MYDIB::init(int wcx,int wcy)
{
  int i,size;
  horizontal_size=wcx;
  vertical_size=wcy;
  mb_width = (horizontal_size+15)/16;
  mb_height = prog_seq ? (vertical_size+15)/16 : 2*((vertical_size+31)/32);
  width = 16*mb_width;
  height = 16*mb_height;

  chrom_width = (chroma_format==CHROMA444) ? width : width>>1;
  chrom_height = (chroma_format!=CHROMA420) ? height : height>>1;

  for (i=0; i<3; i++)
  {
	  if (neworgframe[i]!=NULL)
	  {
		  free(neworgframe[i]);
		neworgframe[i] = NULL;
	  }
	  	size = (i==0) ? width*height : chrom_width*chrom_height;
		neworgframe[i] = (unsigned char *)malloc(size);
		if (neworgframe[i] == NULL)
		{
		DisplayError("Cannot allocate memory for new org frame.");
		return FALSE;
	
		}
  }

	return true;
}


inline long ScanBytes(int pixWidth, int bitsPixel) {
  return (((long)pixWidth*bitsPixel+31) / 32) * 4;
}

bool MYDIB::CreateDib(bool isBMP_BI_BITFIELDS)
{
  BITMAPINFOHEADER InfoHeader;
  int  colorAlloc;

  if (Handle)
  {
    if ((Info->bmiHeader.biBitCount != BitsPerPixel) ||
        (Info->bmiHeader.biWidth != W) ||
        (Info->bmiHeader.biHeight != H))
    {
      ::GlobalUnlock(Handle);
      ::GlobalFree(Handle);
    }
    else
      return true;
  }
  switch (BitsPerPixel)
  {
	 case 1:
		NumClrs = 2;
		break;
	 case 4:
		NumClrs = 16;
		break;
	 case 8:
		NumClrs = 256;
		break;
    case 15:
    case 16:
	 case 24:
    case 32:
		NumClrs = 0;
		break;
	 default:
		strcpy(ErrStr, INV_CDEP);
		return false;
  }
  InfoHeader.biSize = sizeof(BITMAPINFOHEADER);
  InfoHeader.biWidth = W;
  InfoHeader.biHeight = H;
  if (BitsPerPixel == 15)
    InfoHeader.biBitCount = 16;
  else
    InfoHeader.biBitCount = (WORD) BitsPerPixel;
  InfoHeader.biPlanes = 1;
  InfoHeader.biXPelsPerMeter = 0;
  InfoHeader.biYPelsPerMeter = 0;
  InfoHeader.biClrUsed = 0;
  InfoHeader.biClrImportant = 0;  // 0 = all colors
  if (isBMP_BI_BITFIELDS)
    InfoHeader.biCompression = BI_BITFIELDS;
  else
    InfoHeader.biCompression = BI_RGB;
  InfoHeader.biSizeImage = ScanBytes(W, InfoHeader.biBitCount) * H;

  if (isBMP_BI_BITFIELDS)
    colorAlloc = 3 * sizeof(DWORD);
  else
    colorAlloc = NumClrs * sizeof(RGBQUAD); // size of color tables
  BitsAllocated = (long) InfoHeader.biSize + (long) colorAlloc + (long) InfoHeader.biSizeImage;

  if (Handle)
    Handle = GlobalFree(Handle);
  Handle = GlobalAlloc(GMEM_MOVEABLE | GMEM_ZEROINIT, BitsAllocated);
  if (Handle)
  {
	 Info = (LPBITMAPINFO)::GlobalLock(Handle);
	 if (Info)
	 {
		Info->bmiHeader = InfoHeader;
		switch (InfoHeader.biBitCount)
		{
		  case 1 :
			 xSizeNoPad = W >> 3;
			 if (W % 8 != 0)
				xSizeNoPad++;
			 break;
		  case 4 :
			 xSizeNoPad = W >> 1;
			 if (W % 2 != 0)
				xSizeNoPad++;
			 break;
		  case 8 :
			 xSizeNoPad = W;
			 break;
        case 15:
        case 16:
          xSizeNoPad = W << 1;
          break;
		  case 24:
			 xSizeNoPad = W * 3;
			 break;
        case 32:
          xSizeNoPad = W << 2;
          break;
		}
		xSize = xSizeNoPad;
		while ((xSize & 3) != 0)
		  xSize++;
		Mode = DIB_RGB_COLORS;
		Bits = (char far *) Info + ((int)InfoHeader.biSize + colorAlloc);
      if (NumClrs)
        //Colors = (TRgbQuad *) Info + (int) InfoHeader.biSize;
		Colors = (RGBQUAD *) ((char far *)Info + (int) InfoHeader.biSize);
	  else
        Colors = NULL;
		return true;
	 }
  }
  strcpy(ErrStr, NO_MEM);
  return false;
}
void MYDIB::YieldTime()
{
//  MSG msg;

//  int i;
//  for (i = 0; i < 100000; i++);

//  if (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE))
//  {
//    TranslateMessage(&msg);
//    DispatchMessage(&msg);
//  }
}

bool MYDIB::CreateDib2(int w, int h, int bitsPerPixel)
{
  BITMAPINFOHEADER InfoHeader;
  int  colorAlloc;

  W2 = w;
  H2 = h;
  BitsPerPixel2 = bitsPerPixel;
  switch (BitsPerPixel2)
  {
	 case 1:
		NumClrs2 = 2;
		break;
	 case 4:
		NumClrs2 = 16;
		break;
	 case 8:
		NumClrs2 = 256;
		break;
    case 15:
    case 16:
	 case 24:
    case 32:
		NumClrs2 = 0;
		break;
	 default:
		strcpy(ErrStr, INV_CDEP);
		return false;
  }
  InfoHeader.biSize = sizeof(BITMAPINFOHEADER);
  InfoHeader.biWidth = W2;
  InfoHeader.biHeight = H2;
  InfoHeader.biBitCount = (WORD) BitsPerPixel2;
  InfoHeader.biPlanes = 1;
  InfoHeader.biXPelsPerMeter = 0;
  InfoHeader.biYPelsPerMeter = 0;
  InfoHeader.biClrUsed = 0;
  InfoHeader.biClrImportant = 0;  // 0 = all colors
  InfoHeader.biCompression = BI_RGB;
  InfoHeader.biSizeImage = ScanBytes(W2, InfoHeader.biBitCount) * H2;

  colorAlloc = NumClrs2 * sizeof(RGBQUAD); // size of color tables
  BitsAllocated2 = (long) InfoHeader.biSize + (long) colorAlloc + (long) InfoHeader.biSizeImage;

  Handle2 = GlobalAlloc(GMEM_MOVEABLE | GMEM_ZEROINIT, BitsAllocated2);
  if (Handle2)
  {
	 Info2 = (LPBITMAPINFO)::GlobalLock(Handle2);
	 if (Info2)
	 {
		Info2->bmiHeader = InfoHeader;
		switch (InfoHeader.biBitCount)
		{
		  case 1 :
			 xSizeNoPad2 = W2 >> 3;
			 if (W2 % 8 != 0)
				xSizeNoPad2++;
			 break;
		  case 4 :
			 xSizeNoPad2 = W2 >> 1;
			 if (W2 % 2 != 0)
				xSizeNoPad2++;
			 break;
		  case 8 :
			 xSizeNoPad2 = W2;
			 break;
        case 15:
        case 16:
          xSizeNoPad2 = W2 << 1;
          break;
		  case 24:
			 xSizeNoPad2 = W2 * 3;
			 break;
        case 32:
          xSizeNoPad2 = W2 << 2;
          break;
		}
		xSize2 = xSizeNoPad2;
		while ((xSize2 & 3) != 0)
		  xSize2++;
		Bits2 = (char far *) Info2 + ((int)InfoHeader.biSize + colorAlloc);
		return true;
	 }
  }
  strcpy(ErrStr, NO_MEM);
  return false;
}


static int oneBitMasks[8] = {0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01};
static int oneBitShift[8] = {7, 6, 5, 4, 3, 2, 1, 0};

bool MYDIB::ChangeDepth(int bitsPerPixel)
{
  bool retcode;

  if ((bitsPerPixel != 24) && (bitsPerPixel != 32))
    return false;
  if (BitsPerPixel == bitsPerPixel)
    return true;
  if (!CreateDib2(W, H, bitsPerPixel))
    return false;
  if (bitsPerPixel == 24)
    retcode = To24Bits();
  else
    retcode = To32Bits();
  if (!retcode)
  {
    if (Handle2)
    {
      ::GlobalUnlock(Handle2);
      ::GlobalFree(Handle2);
    }
  }
  else
    SwitchDibs();
  return retcode;
}

bool MYDIB::To24Bits()
{
  unsigned char *srcbits, *srcbase;
  unsigned char *destbits, *destbase;
  RGBQUAD *colorMap;
  int i, j, k;

  srcbits = (unsigned char *) GetBits();
  destbits = (unsigned char *) Bits2;
  if (NumClrs)
    colorMap = (RGBQUAD *) (&Info->bmiColors[0]);

  i = 0;
  while ((i < H) && (!AbortOperation))
  {
    srcbase = srcbits + (i * xSize);
    destbase = destbits + (i * xSize2);
    switch (BitsPerPixel)
    {
      case 1:
        for (j = 0; j < W; j++)
        {
          k = j % 8;
          k = (srcbase[0] & oneBitMasks[k]) >> oneBitShift[k];
          if (j % 8 == 7)
            srcbase++;
          destbase[0] = colorMap[k].rgbBlue;
          destbase[1] = colorMap[k].rgbGreen;
          destbase[2] = colorMap[k].rgbRed;
          destbase += 3;
        }
        break;

      case 4:
        for (j = 0; j < W; j++)
        {
          if (j % 2)
          {
            k = srcbase[0] & 0x0F;
            srcbase++;
          }
          else
            k = (srcbase[0] & 0xF0) >> 4;
          destbase[0] = colorMap[k].rgbBlue;
          destbase[1] = colorMap[k].rgbGreen;
          destbase[2] = colorMap[k].rgbRed;
          destbase += 3;
        }
        break;

      case 8:
        for (j = 0; j < W; j++)
        {
          k = srcbase[0];
          destbase[0] = colorMap[k].rgbBlue;
          destbase[1] = colorMap[k].rgbGreen;
          destbase[2] = colorMap[k].rgbRed;
          srcbase++;
          destbase += 3;
        }
        break;

      case 16:
        for (j = 0; j < W; j++)
        {
          k = srcbase[0] | (srcbase[1] << 8);
          destbase[0] = (k & 0x001F) << 3;
          destbase[1] = (k & 0x03E0) >> 2;
          destbase[2] = (k & 0x7C00) >> 7;
          srcbase += 2;
          destbase += 3;
        }
        break;

      case 32:
        for (j = 0; j < W; j++)
        {
          destbase[0] = srcbase[0];
          destbase[1] = srcbase[1];
          destbase[2] = srcbase[2];
          srcbase += 4;
          destbase += 3;
        }
    }
    YieldTime();
    i++;
  }
  return !AbortOperation;
}

bool MYDIB::To32Bits()
{
  unsigned char *srcbits, *srcbase;
  unsigned char *destbits, *destbase;
  RGBQUAD *colorMap;
  int i, j, k;

  srcbits = (unsigned char *) GetBits();
  destbits = (unsigned char *) Bits2;
  if (NumClrs)
    colorMap = (RGBQUAD *) (&Info->bmiColors[0]);

  i = 0;
  while ((i < H) && (!AbortOperation))
  {
    srcbase = srcbits + (i * xSize);
    destbase = destbits + (i * xSize2);
    switch (BitsPerPixel)
    {
      case 1:
        for (j = 0; j < W; j++)
        {
          k = j % 8;
          k = (srcbase[0] & oneBitMasks[k]) >> oneBitShift[k];
          if (j % 8 == 7)
            srcbase++;
          destbase[0] = colorMap[k].rgbBlue;
          destbase[1] = colorMap[k].rgbGreen;
          destbase[2] = colorMap[k].rgbRed;
          destbase += 4;
        }
        break;

      case 4:
        for (j = 0; j < W; j++)
        {
          if (j % 2)
          {
            k = srcbase[0] & 0x0F;
            srcbase++;
          }
          else
            k = (srcbase[0] & 0xF0) >> 4;
          destbase[0] = colorMap[k].rgbBlue;
          destbase[1] = colorMap[k].rgbGreen;
          destbase[2] = colorMap[k].rgbRed;
          destbase += 4;
        }
        break;

      case 8:
        for (j = 0; j < W; j++)
        {
          k = srcbase[0];
          destbase[0] = colorMap[k].rgbBlue;
          destbase[1] = colorMap[k].rgbGreen;
          destbase[2] = colorMap[k].rgbRed;
          srcbase++;
          destbase += 4;
        }
        break;

      case 16:
        for (j = 0; j < W; j++)
        {
          k = srcbase[0] | (srcbase[1] << 8);
          destbase[0] = (k & 0x001F) << 3;
          destbase[1] = (k & 0x03E0) >> 2;
          destbase[2] = (k & 0x7C00) >> 7;
          srcbase += 2;
          destbase += 4;
        }
        break;

      case 24:
        for (j = 0; j < W; j++)
        {
          destbase[0] = srcbase[0];
          destbase[1] = srcbase[1];
          destbase[2] = srcbase[2];
          srcbase += 3;
          destbase += 4;
        }
    }
    YieldTime();
    i++;
  }
  return !AbortOperation;
}

void MYDIB::ScaleLine(unsigned char *sp, unsigned char *dp, int sw, int dw)
{
  int i, x0, y01, y02, y03, y11, y12, y13, v1, v2, v3;
  long count, total1, total2, total3;

  count = total1 = total2 = total3 = 0;
  x0 = 0;
  if (sw == dw)
    memcpy(dp, sp, sw * 4);
  else
    if (dw > sw)
    {
      y01 = (int)(sp[0]);
      y02 = (int)(sp[1]);
      y03 = (int)(sp[2]);
      y11 = (int)(sp[4]);
      y12 = (int)(sp[5]);
      y13 = (int)(sp[6]);
      for (i = 0; i < dw; i++)
      {
        *dp++ = (unsigned char)(y01 + (int)((count * (long)(y11 - y01)) / (long)dw));
        *dp++ = (unsigned char)(y02 + (int)((count * (long)(y12 - y02)) / (long)dw));
        *dp++ = (unsigned char)(y03 + (int)((count * (long)(y13 - y03)) / (long)dw));
        *dp++ = 0;
        count += sw;
        if (count >= dw)
        {
          count -= dw;
          x0+=4;
          y01 = y11;
          y02 = y12;
          y03 = y13;
          if (x0 < (sw-1) * 4)
          {
            y11 = sp[x0+4];
            y12 = sp[x0+5];
            y13 = sp[x0+6];
          }
        }
      }
    }
    else
    {
      for (i = 0; i < sw; i++)
      {
        count += dw;
        if (count >= sw)
        {
          count -= sw;
          v1 = (int)(*sp++);
          v2 = (int)(*sp++);
          v3 = (int)(*sp++);
          sp++;
          total1 += ((long)v1 * ((long)dw - count)) / (long)dw;
          total2 += ((long)v2 * ((long)dw - count)) / (long)dw;
          total3 += ((long)v3 * ((long)dw - count)) / (long)dw;
          *dp++ = (unsigned char)((total1 * (long)dw) / (long)sw);
          *dp++ = (unsigned char)((total2 * (long)dw) / (long)sw);
          *dp++ = (unsigned char)((total3 * (long)dw) / (long)sw);
          *dp++ = 0;
          total1 = ((long)v1 * count) / (long)dw;
          total2 = ((long)v2 * count) / (long)dw;
          total3 = ((long)v3 * count) / (long)dw;
        }
        else
        {
          total1 += *sp++;
          total2 += *sp++;
          total3 += *sp++;
          sp++;
        }
      }
    }
}

bool MYDIB::Rescale(int w, int h, int realW, int realH)
{
  unsigned char *srcbits, *destbits, *srcbase, *destbase;
  int i, j;
  long *tots, count;
  unsigned char *vp, *vp2;
  int lines;
  tots=NULL;
  vp = NULL;
  vp2 = NULL;

	srcbits = (unsigned char *) GetBits();
	if(srcbits==NULL)return false;

  if (BitsPerPixel != 32)
    if (!ChangeDepth(32))
      return false;

  if (!CreateDib2(w, h, 32))
    return false;

  
  destbits = (unsigned char *) Bits2;

  count = 0;
  if (H == H2)
  {
    lines = 0;
    while ((lines < H) && (!AbortOperation))
    {
      srcbase = srcbits + (lines * xSize);
      destbase = destbits + (lines * xSize2);
      ScaleLine(srcbase, destbase, W, W2);
      YieldTime();
      lines++;
    }
  }
  else
    if (H < H2)
    {
      vp = (unsigned char *) malloc(xSize2);
      vp2 = (unsigned char *)malloc(xSize2);
      ScaleLine(srcbits, vp, W, W2);
      srcbase = srcbits + xSize;
      ScaleLine(srcbase, vp2, W, W2);

      j = 2;
      lines = 0;
      while ((lines < H2) && (!AbortOperation))
      {
        destbase = destbits + (lines * xSize2);
        for (i = 0; i < W2 * 4; i += 4)
        {
          *destbase++ = (unsigned char)((int)(vp[i]) +
            (int)((count * ((long)(vp2[i]) - (long)(vp[i]))) / (long)H2));
          *destbase++ = (unsigned char)((int)(vp[i + 1]) +
            (int)((count * ((long)(vp2[i + 1]) - (long)(vp[i + 1]))) / (long)H2));
          *destbase++ = (unsigned char)((int)(vp[i + 2]) +
            (int)((count * ((long)(vp2[i + 2]) - (long)(vp[i + 2]))) / (long)H2));
          *destbase++ = 0;
        }
        if ((count += H) >= H2)
        {
          count -= H2;
          destbase = vp;
          vp = vp2;
          vp2 = destbase;
          if (j < H)
          {
            srcbase = srcbits + (j++ * xSize);
            ScaleLine(srcbase, vp2, W, W2);
          }
          else
            memcpy(vp2, vp, xSize2);
        }
        YieldTime();
        lines++;
      }
    }
    else
    {
      tots = (long *)malloc(xSize2 * sizeof(long));
      memset((char *)tots, 0, xSize2 * sizeof(long));
      vp = (unsigned char *)malloc(xSize2);
      j = 0;
      lines = 0;
      while ((lines < H) && (!AbortOperation))
      {
        srcbase = srcbits + (lines * xSize);
        ScaleLine(srcbase, vp, W, W2);
        count += H2;
        if (count >= H)
        {
          count -= H;
          destbase = destbits + (j * xSize2);
          for (i = 0; i < W2 * 4; i += 4)
          {
            tots[i] += ((long)(vp[i]) * ((long)H2 - count)) / (long)H2;
            tots[i + 1] += ((long)(vp[i + 1]) * ((long)H2 - count)) / (long)H2;
            tots[i + 2] += ((long)(vp[i + 2]) * ((long)H2 - count)) / (long)H2;
            *destbase++ = (unsigned char)((tots[i] * (long)H2) / (long)H);
            *destbase++ = (unsigned char)((tots[i + 1] * (long)H2) / (long)H);
            *destbase++ = (unsigned char)((tots[i + 2] * (long)H2) / (long)H);
            *destbase++ = 0;
            tots[i] = ((long)(vp[i]) * count) / (long)H2;
            tots[i + 1] = ((long)(vp[i + 1]) * count) / (long)H2;
            tots[i + 2] = ((long)(vp[i + 2]) * count) / (long)H2;
          }
          j++;
        }
        else
        {
          for (i = 0; i < W2 * 4; i += 4)
          {
            tots[i] += vp[i];
            tots[i + 1] += vp[i + 1];
            tots[i + 2] += vp[i + 2];
          }
        }
        YieldTime();
        lines++;
      }
    }
  if (vp)
    free(vp);
  if (vp2)
    free(vp2);
  if (tots)
    free(tots);


  if (!AbortOperation)
  {
    SwitchDibs();
/*    if (W != realW || H != realH)
      return PadDib(realW, realH);
    else*/
      return true;
  }
  else
    return false;
}

bool MYDIB::CreateBlankDib(int w, int h)
{
  W = w;
  H = h;
  BitsPerPixel = 32;
  return CreateDib(false);
}
void MYDIB::SwitchDibs()
{
  ::GlobalUnlock(Handle);
  ::GlobalFree(Handle);
  W = W2;
  H = H2;
  Handle = Handle2;
  Info = Info2;
  BitsPerPixel = BitsPerPixel2;
  NumClrs = NumClrs2;
  xSize = xSize2;
  xSizeNoPad = xSizeNoPad2;
  Bits = Bits2;
  BitsAllocated = BitsAllocated2;
  mapentrysize = 4;
  if (NumClrs)
    //Colors = (TRgbQuad *) &Info->bmiColors[0];
	Colors = (RGBQUAD *) &Info->bmiColors[0];
  else
    Colors = NULL;
  
}

void *MYDIB::GetBits(void)
{
	return Bits;
}
void *MYDIB::GetInfo(void)
{
	return Info;
}
/*
void MYDIB::DibCreateFromInfo(BITMAPINFO * pbmi)
{
     
	 Info=pbmi;
	 picWidth=W=PackedDibGetWidth (Info);
	 picHeight=H=PackedDibGetHeight (Info);
	 Bits=PackedDibGetBitsPtr (Info);
	 picBitsPerPixel = BitsPerPixel=PackedDibGetBitCount (Info);
	 BMPPlanes=PackedDibGetPlanes(Info);
     BMPHeaderSize=PackedDibGetInfoHeaderSize (Info);
     if (BMPHeaderSize == 12)
	 mapentrysize = 3;		// OS/2 uses RGBTRIPLE colormap 
	 else
	 mapentrysize = 4;		// Windows uses RGBQUAD colormap 
	 biClrUsed=PackedDibGetColorsUsed (Info);
     biCompression = Info->bmiHeader.biCompression; 
     RLECompressed = ((biCompression == BI_RLE4) | (biCompression == BI_RLE8));
     xSizeNoPad=PackedDibGetBitCount (Info)*W;
     xSize=PackedDibGetRowLength (Info);
	 NumClrs=PackedDibGetNumColors (Info);
	 

}
*/

/*----------------------------------------------
   Functions to get information from Packed Dib
  ----------------------------------------------*/
/*
int MYDIB::PackedDibGetWidth (BITMAPINFO * pPackedDib)
{
     if (pPackedDib->bmiHeader.biSize == sizeof (BITMAPCOREHEADER))
          return ((PBITMAPCOREINFO)pPackedDib)->bmciHeader.bcWidth ;
     else
          return pPackedDib->bmiHeader.biWidth ;
}

int MYDIB::PackedDibGetHeight (BITMAPINFO * pPackedDib)
{
     if (pPackedDib->bmiHeader.biSize == sizeof (BITMAPCOREHEADER))
          return ((PBITMAPCOREINFO)pPackedDib)->bmciHeader.bcHeight ;
     else
          return abs (pPackedDib->bmiHeader.biHeight) ;
}

int MYDIB::PackedDibGetPlanes (BITMAPINFO * pPackedDib)
{
     if (pPackedDib->bmiHeader.biSize == sizeof (BITMAPCOREHEADER))
          return ((PBITMAPCOREINFO)pPackedDib)->bmciHeader.bcPlanes  ;
     else
          return pPackedDib->bmiHeader.biPlanes ;
}

int MYDIB::PackedDibGetBitCount (BITMAPINFO * pPackedDib)
{
     if (pPackedDib->bmiHeader.biSize == sizeof (BITMAPCOREHEADER))
          return ((PBITMAPCOREINFO)pPackedDib)->bmciHeader.bcBitCount ;
     else
          return pPackedDib->bmiHeader.biBitCount ;
}


int MYDIB::PackedDibGetRowLength (BITMAPINFO * pPackedDib)
{
     return ((PackedDibGetWidth (pPackedDib) * 
              PackedDibGetBitCount (pPackedDib) + 31) & ~31) >> 3 ;
}
*/
/*-----------------------------------------------------------
   PackedDibGetInfoHeaderSize includes possible color masks!
  -----------------------------------------------------------*/
/*
int MYDIB::PackedDibGetInfoHeaderSize (BITMAPINFO * pPackedDib)
{
     if (pPackedDib->bmiHeader.biSize == sizeof (BITMAPCOREHEADER))
          return ((PBITMAPCOREINFO)pPackedDib)->bmciHeader.bcSize ;

     else if (pPackedDib->bmiHeader.biSize == sizeof (BITMAPINFOHEADER))
          return pPackedDib->bmiHeader.biSize + 
                    (pPackedDib->bmiHeader.biCompression == 
                                        BI_BITFIELDS ? 12 : 0) ;

     else return pPackedDib->bmiHeader.biSize ;
}
*/
/*-------------------------------------------------------------
   PackedDibGetColorsUsed returns value in information header;
          could be 0 to indicate non-truncated color table!
  -------------------------------------------------------------*/
/*
int MYDIB::PackedDibGetColorsUsed (BITMAPINFO * pPackedDib)
{
     if (pPackedDib->bmiHeader.biSize == sizeof (BITMAPCOREHEADER))
          return 0 ;
     else
          return pPackedDib->bmiHeader.biClrUsed ;
}
*/
/*------------------------------------------------------------------
   PackedDibGetNumColors is actual number of entries in color table
  ------------------------------------------------------------------*/
/*
int MYDIB::PackedDibGetNumColors (BITMAPINFO * pPackedDib)
{
     int iNumColors ;

     iNumColors = PackedDibGetColorsUsed (pPackedDib) ;

     if (iNumColors == 0 && PackedDibGetBitCount (pPackedDib) < 16)
          iNumColors = 1 << PackedDibGetBitCount (pPackedDib) ;

     return iNumColors ;
}

int MYDIB::PackedDibGetColorTableSize (BITMAPINFO * pPackedDib)
{
     if (pPackedDib->bmiHeader.biSize == sizeof (BITMAPCOREHEADER))
          return PackedDibGetNumColors (pPackedDib) * sizeof (RGBTRIPLE) ;
     else
          return PackedDibGetNumColors (pPackedDib) * sizeof (RGBQUAD) ;
}

RGBQUAD * MYDIB::PackedDibGetColorTablePtr (BITMAPINFO * pPackedDib)
{
     if (PackedDibGetNumColors (pPackedDib) == 0)
          return 0 ;

     return (RGBQUAD *) (((BYTE *) pPackedDib) + 
                                   PackedDibGetInfoHeaderSize (pPackedDib)) ;
}

RGBQUAD * MYDIB::PackedDibGetColorTableEntry (BITMAPINFO * pPackedDib, int i)
{
     if (PackedDibGetNumColors (pPackedDib) == 0)
          return 0 ;

     if (pPackedDib->bmiHeader.biSize == sizeof (BITMAPCOREHEADER))
          return (RGBQUAD *) 
               (((RGBTRIPLE *) PackedDibGetColorTablePtr (pPackedDib)) + i) ;
     else
          return PackedDibGetColorTablePtr (pPackedDib) + i ;
}
*/
/*------------------------------
   PackedDibGetBitsPtr finally!
  ------------------------------*/
/*
char * MYDIB::PackedDibGetBitsPtr (BITMAPINFO * pPackedDib)
{
     return ((char *) pPackedDib) + PackedDibGetInfoHeaderSize (pPackedDib) + PackedDibGetColorTableSize (pPackedDib) ;
}
*/
/*----------------------------------------------------------------------- 
   PackedDibGetBitsSize can be calculated from the height and row length
          if it's not explicitly in the biSizeImage field
  -----------------------------------------------------------------------*/
/*
int MYDIB::PackedDibGetBitsSize (BITMAPINFO * pPackedDib)
{
     if ((pPackedDib->bmiHeader.biSize != sizeof (BITMAPCOREHEADER)) &&
         (pPackedDib->bmiHeader.biSizeImage != 0))
         return pPackedDib->bmiHeader.biSizeImage ;

     return PackedDibGetHeight (pPackedDib) * 
            PackedDibGetRowLength (pPackedDib) ;
}
*/
bool MYDIB::LoadFromBMP(BITMAPINFO *bitmapInfo)
{
  unsigned char *bits, *dest, *prow;
  int i, j, k, rowsize, row, endadder;
  RGBQUAD *colorTable;
  bool topDown = false;

  W = bitmapInfo->bmiHeader.biWidth;
  H = bitmapInfo->bmiHeader.biHeight;
  if (H < 0)
  {
    H = -H;
    topDown = true;
  }
  picBitsPerPixel = bitmapInfo->bmiHeader.biBitCount;
  BitsPerPixel = 32;

  if (!CreateDib(false))
  {
    strcpy(ErrStr, NO_MEM);
    return false;
  }

  if (topDown)
  {
    row = H - 1;
    endadder = -1;
  }
  else
  {
    row = 0;
    endadder = 1;
  }

  bits = (unsigned char *) bitmapInfo + bitmapInfo->bmiHeader.biSize;
  /*if(Bits==NULL)Bits=(char *)malloc(W*H*4);*/
  dest = (unsigned char *) Bits;
  switch (picBitsPerPixel)
  {
    case 8:
      colorTable = (RGBQUAD *) bits;
      bits += sizeof(RGBQUAD) * bitmapInfo->bmiHeader.biClrUsed;

      rowsize = W;
      while (rowsize % 4)
        rowsize++;

      for (i = 0; i < H; i++)
      {
        prow = bits + (row * rowsize);
        for (j = 0; j < W; j++)
        {
          k = prow[j];
          *dest++ = colorTable[k].rgbBlue;
          *dest++ = colorTable[k].rgbGreen;
          *dest++ = colorTable[k].rgbRed;
          dest++;
        }
        row += endadder;
      }
      break;

    case 16:
      rowsize = W << 1;
      while (rowsize % 4)
        rowsize++;

      for (i = 0; i < H; i++)
      {
        prow = bits + (row * rowsize);
        for (j = 0; j < W; j++)
        {
          *dest++ = (prow[0] & 0x1F) << 3;
          *dest++ = ((prow[0] & 0xE0) >> 2) | ((prow[1] & 0x3) << 6);
          *dest++ = (prow[1] & 0x7C) << 1;
          dest++;
          prow += 2;
        }
        row += endadder;
      }
      break;

    case 24:
      rowsize = W * 3;
      while (rowsize % 4)
        rowsize++;

      for (i = 0; i < H; i++)
      {
        prow = bits + (row * rowsize);
        for (j = 0; j < W; j++)
        {
          *dest++ = *prow++;
          *dest++ = *prow++;
          *dest++ = *prow++;
          dest++;
        }
        row += endadder;
      }
      break;

    case 32:
      if (topDown)
      {
        rowsize = W << 2;
        for (i = 0; i < H; i++)
        {
          prow = bits + (row * rowsize);
          memcpy(dest, prow, rowsize);
          dest += rowsize;
          row += endadder;
        }
      }
      else
        memcpy(dest, bits, W * H * 4);
  }
  return true;
}

bool MYDIB::readframe(unsigned char *frame[])
{
  unsigned char *yp, *up, *vp;
  unsigned char *src, *prow;
  int i, j;
  int r, g, b;
  int y, u, v;
  double fy, fu, fv;
  int cyr, cyg, cyb, cur, cug, cub, cvr, cvg, cvb;
  int rowbytes;
  double fcyr, fcyg, fcyb, fcur, fcug, fcub, fcvr, fcvg, fcvb;
  static int acoef[7][9] = {     /* alternate 32768 scaled table */
    {6963, 23442, 2363, -3768, -12648, 16384, 16384, -14877, -1507},
    {9798, 19235, 3736, -5538, -10846, 16384, 16384, -13730, -2654},
    {9798, 19235, 3736, -5538, -10846, 16384, 16384, -13730, -2654},
    {9830, 19333, 3604, -5538, -10846, 16384, 16384, -13795, -2589},
    {9798, 19235, 3736, -5538, -10846, 16384, 16384, -13730, -2654},
    {9798, 19235, 3736, -5538, -10846, 16384, 16384, -13730, -2654},
    {6947, 22970, 2851, -3801, -12583, 16384, 16384, -14582, -1802}};
  static double afycoef[7][3] = {
    {0.2125,0.7154,0.0721}, /* ITU-R Rec. 709 (1990) */
    {0.299, 0.587, 0.114},  /* unspecified */
    {0.299, 0.587, 0.114},  /* reserved */
    {0.30,  0.59,  0.11},   /* FCC */
    {0.299, 0.587, 0.114},  /* ITU-R Rec. 624-4 System B, G */
    {0.299, 0.587, 0.114},  /* SMPTE 170M */
    {0.212, 0.701, 0.087}}; /* SMPTE 240M (1987) */
  static double afucoef[7][3] = {
    {-0.115, -0.386, 0.500}, /* ITU-R Rec. 709 (1990) */
    {-0.169, -0.331, 0.500},  /* unspecified */
    {-0.169, -0.331, 0.500},  /* reserved */
    {-0.169, -0.331, 0.500},   /* FCC */
    {-0.169, -0.331, 0.500},  /* ITU-R Rec. 624-4 System B, G */
    {-0.169, -0.331, 0.500},  /* SMPTE 170M */
    {-0.116, -0.384, 0.500}}; /* SMPTE 240M (1987) */
  static double afvcoef[7][3] = {
    { 0.500, -0.454, -0.046}, /* ITU-R Rec. 709 (1990) */
    { 0.500, -0.419, -0.081},  /* unspecified */
    { 0.500, -0.419, -0.081},  /* reserved */
    { 0.500, -0.421, -0.079},   /* FCC */
    { 0.500, -0.419, -0.081},  /* ITU-R Rec. 624-4 System B, G */
    { 0.500, -0.419, -0.081},  /* SMPTE 170M */
    { 0.500, -0.445, -0.055}}; /* SMPTE 240M (1987) */

	buffer=(unsigned char *)GetBits(); //MYADD
	rowbytes=xSize;

  src = (unsigned char *) buffer + (vertical_size - 1) * rowbytes;

  if (chroma_format==CHROMA444)
  {
    u444 = frame[1];
    v444 = frame[2];
  }
  else
  {
    if (!u444)
    {
      u444 = (unsigned char *)malloc(width*height);
      if (u444 == NULL)
      {
        DisplayError("Could not allocate memory for u444.");
        return FALSE;
      }
      v444 = (unsigned char *)malloc(width*height);
      if (v444 == NULL)
      {
        DisplayError("Could not allocate memory for v444.");
        return FALSE;
      }
      if (chroma_format==CHROMA420)
      {
        u422 = (unsigned char *)malloc((width>>1)*height);
        if (u422 == NULL)
        {
          DisplayError("Could not allocate memory for u422.");
          return FALSE;
        }
        v422 = (unsigned char *)malloc((width>>1)*height);
        if (v422 == NULL)
        {
          DisplayError("Could not allocate memory for v422.");
          return FALSE;
        }
      }
    }
  }

  if (MMXMode && !UseFP)
  {
    switch (MMXMode)
    {
      case 5:   // AMD 3DNOW extensions, use MMX for now
      case 4:   // AMD 3DNOW, use MMX for now
      case 3:   // AMD MMX extensions, use MMX for now
      case 2:   // Intel SIMD or Athlon 3DNOW ext, use MMX for now
      case 1:   // Intel or AMD MMX
        RGBtoYUVmmx(src, frame[0], u444, v444, rowbytes, width,
		               horizontal_size, vertical_size);
		  break;
    }
  }
  else
  {
    i = matrix_coefficients;
    if (i>8)
      i = 3;

    if (UseFP)
    {
      fcyr = afycoef[i-1][0];
      fcyg = afycoef[i-1][1];
      fcyb = afycoef[i-1][2];
      fcur = afucoef[i-1][0];
      fcug = afucoef[i-1][1];
      fcub = afucoef[i-1][2];
      fcvr = afvcoef[i-1][0];
      fcvg = afvcoef[i-1][1];
      fcvb = afvcoef[i-1][2];
//      fcur = 0.5/(1.0-fcyb);
//      fcvr = 0.5/(1.0-fcyr);
      for (i=0; i<vertical_size; i++)
      {
        yp = frame[0] + i*width;
        up = u444 + i*width;
        vp = v444 + i*width;
        prow = src - (i * rowbytes);
        for (j=0; j<horizontal_size; j++)
        {
          b = *(prow++);
          g = *prow++;
          r = *prow++;
          prow++;
          /* convert to YUV */
          /* floating point version */
          fy = fcyr*r + fcyg*g + fcyb*b;
          fu = fcur*r + fcug*g + fcub*b;
          fv = fcvr*r + fcvg*g + fcvb*b;
//          fu = fcur*(b-fy);
//          fv = fcvr*(r-fy);
          yp[j] = (219.0/256.0)*fy + 16.5;  /* nominal range: 16..235 */
          up[j] = (224.0/256.0)*fu + 128.5; /* 16..240 */
          vp[j] = (224.0/256.0)*fv + 128.5; /* 16..240 */
        }
      }
    }
    else
    {
      cyr = acoef[i-1][0];
      cyg = acoef[i-1][1];
      cyb = acoef[i-1][2];
      cur = acoef[i-1][3];
      cug = acoef[i-1][4];
      cub = acoef[i-1][5];
      cvr = acoef[i-1][6];
      cvg = acoef[i-1][7];
      cvb = acoef[i-1][8];
//      cyr = coef[i-1][0];
//      cyg = coef[i-1][1];
//      cyb = coef[i-1][2];
//      cur = coef[i-1][3];
//      cvr = coef[i-1][4];
      for (i=0; i<vertical_size; i++)
      {
        yp = frame[0] + i*width;
        up = u444 + i*width;
        vp = v444 + i*width;
        prow = src - (i * rowbytes);
        for (j=0; j<horizontal_size; j++)
        {
          b = *prow++;
          g = *prow++;
          r = *prow++;
          prow++;
          /* convert to YUV */
          /* 32768 scaled version */
          y = cyr*r + cyg*g + cyb*b;
          u = cur*r + cug*g + cub*b;
          v = cvr*r + cvg*g + cvb*b;
          yp[j] = (((219 * y) >> 8) + 540672) >> 15;  /* nominal range: 16..235 */
          up[j] = (((224 * u) >> 8) + 4210688) >> 15; /* 16..240 */
          vp[j] = (((224 * v) >> 8) + 4210688) >> 15; /* 16..240 */
//          u = cur*(((b << 15) - y)>>15);
//          v = cvr*(((r << 15) - y)>>15);
//          yp[j] = (((219 * y) >> 8) + 540672) >> 15;  /* nominal range: 16..235 */
//          up[j] = ((u >> 8) + 4210688) >> 15; /* 16..240 */
//          vp[j] = ((v >> 8) + 4210688) >> 15; /* 16..240 */
        }
      }
    }
  }

  border_extend(frame[0],horizontal_size,vertical_size,width,height);
  border_extend(u444,horizontal_size,vertical_size,width,height);
  border_extend(v444,horizontal_size,vertical_size,width,height);

  if (chroma_format==CHROMA422)
  {
    conv444to422(u444,frame[1]);
    conv444to422(v444,frame[2]);
  }

  if (chroma_format==CHROMA420)
  {
    conv444to422(u444,u422);
    conv444to422(v444,v422);
    conv422to420(u422,frame[1]);
    conv422to420(v422,frame[2]);
  }
  return TRUE;
}

void MYDIB::border_extend(unsigned char *frame, int w1, int h1, int w2, int h2)
{
  int i, j;
  unsigned char *fp;

  /* horizontal pixel replication (right border) */

  for (j=0; j<h1; j++)
  {
    fp = frame + j*w2;
    for (i=w1; i<w2; i++)
      fp[i] = fp[i-1];
  }

  /* vertical pixel replication (bottom border) */

  for (j=h1; j<h2; j++)
  {
    fp = frame + j*w2;
    for (i=0; i<w2; i++)
      fp[i] = fp[i-w2];
  }
}

/* horizontal filter and 2:1 subsampling */
void MYDIB::conv444to422(unsigned char *src, unsigned char *dst)
{
  int i, j, w; //, im5, im4, im3, im2, im1, ip1, ip2, ip3, ip4, ip5, ip6;

  w = width >> 1;
  if (video_type < MPEG_MPEG2)
  {
    for (j=0; j<height; j++)
    {
      dst[0] = clp[(int)(228*(src[0]+src[1])
                         +70*(src[0]+src[2])
                         -37*(src[0]+src[3])
                         -21*(src[0]+src[4])
                         +11*(src[0]+src[5])
                         + 5*(src[0]+src[6])+256)>>9];

      dst[1] = clp[(int)(228*(src[2]+src[3])
                         +70*(src[1]+src[4])
                         -37*(src[0]+src[5])
                         -21*(src[0]+src[6])
                         +11*(src[0]+src[7])
                         + 5*(src[0]+src[8])+256)>>9];

      dst[2] = clp[(int)(228*(src[4]+src[5])
                         +70*(src[3]+src[6])
                         -37*(src[2]+src[7])
                         -21*(src[1]+src[8])
                         +11*(src[0]+src[9])
                         + 5*(src[0]+src[10])+256)>>9];

      for (i=6; i<width-6; i+=2)
      {
//      im5 = (i<5) ? 0 : i-5;
//      im4 = (i<4) ? 0 : i-4;
//      im3 = (i<3) ? 0 : i-3;
//      im2 = (i<2) ? 0 : i-2;
//      im1 = (i<1) ? 0 : i-1;
//      ip1 = (i<width-1) ? i+1 : width-1;
//      ip2 = (i<width-2) ? i+2 : width-1;
//      ip3 = (i<width-3) ? i+3 : width-1;
//      ip4 = (i<width-4) ? i+4 : width-1;
//      ip5 = (i<width-5) ? i+5 : width-1;
//      ip6 = (i<width-6) ? i+6 : width-1;

        /* FIR filter with 0.5 sample interval phase shift */
        dst[i>>1] = clp[(int)(228*(src[i]+src[i+1])
                         +70*(src[i-1]+src[i+2])
                         -37*(src[i-2]+src[i+3])
                         -21*(src[i-3]+src[i+4])
                         +11*(src[i-4]+src[i+5])
                         + 5*(src[i-5]+src[i+6])+256)>>9];
//        dst[i>>1] = clp[(int)(228*(src[i]+src[ip1])
//                         +70*(src[im1]+src[ip2])
//                         -37*(src[im2]+src[ip3])
//                         -21*(src[im3]+src[ip4])
//                         +11*(src[im4]+src[ip5])
//                         + 5*(src[im5]+src[ip6])+256)>>9];
      }
      dst[w-3] = clp[(int)(228*(src[width-6]+src[width-5])
                           +70*(src[width-7]+src[width-4])
                           -37*(src[width-8]+src[width-3])
                           -21*(src[width-9]+src[width-2])
                           +11*(src[width-10]+src[width-1])
                           + 5*(src[width-11]+src[width-1])+256)>>9];

      dst[w-2] = clp[(int)(228*(src[width-4]+src[width-3])
                           +70*(src[width-5]+src[width-2])
                           -37*(src[width-6]+src[width-1])
                           -21*(src[width-7]+src[width-1])
                           +11*(src[width-8]+src[width-1])
                           + 5*(src[width-9]+src[width-1])+256)>>9];

      dst[w-1] = clp[(int)(228*(src[width-2]+src[width-1])
                           +70*(src[width-3]+src[width-1])
                           -37*(src[width-4]+src[width-1])
                           -21*(src[width-5]+src[width-1])
                           +11*(src[width-6]+src[width-1])
                           + 5*(src[width-7]+src[width-1])+256)>>9];

      src+= width;
      dst+= width>>1;
    }
  }
  else
  {
    /* MPEG-2 */
    for (j=0; j<height; j++)
    {
      dst[0] = clp[(int)(  22*(src[0]+src[5])-52*(src[0]+src[3])
                       +159*(src[0]+src[1])+256*src[0]+256)>>9];

      dst[1] = clp[(int)(  22*(src[0]+src[7])-52*(src[0]+src[5])
                       +159*(src[1]+src[3])+256*src[2]+256)>>9];

      dst[2] = clp[(int)(  22*(src[0]+src[9])-52*(src[1]+src[7])
                       +159*(src[3]+src[5])+256*src[4]+256)>>9];

      for (i=6; i<width-6; i+=2)
      {
//        im5 = (i<5) ? 0 : i-5;
//        im3 = (i<3) ? 0 : i-3;
//        im1 = (i<1) ? 0 : i-1;
//        ip1 = (i<width-1) ? i+1 : width-1;
//        ip3 = (i<width-3) ? i+3 : width-1;
//        ip5 = (i<width-5) ? i+5 : width-1;

        /* FIR filter coefficients (*512): 22 0 -52 0 159 256 159 0 -52 0 22 */
        dst[i>>1] = clp[(int)(  22*(src[i-5]+src[i+5])-52*(src[i-3]+src[i+3])
                         +159*(src[i-1]+src[i+1])+256*src[i]+256)>>9];
//        dst[i>>1] = clp[(int)(  22*(src[im5]+src[ip5])-52*(src[im3]+src[ip3])
//                         +159*(src[im1]+src[ip1])+256*src[i]+256)>>9];
      }
      dst[w-3] = clp[(int)(  22*(src[width-11]+src[width-1])-52*(src[width-9]+src[width-3])
                      +159*(src[width-7]+src[width-5])+256*src[width-6]+256)>>9];

      dst[w-2] = clp[(int)(  22*(src[width-9]+src[width-1])-52*(src[width-7]+src[width-1])
                      +159*(src[width-5]+src[width-3])+256*src[width-4]+256)>>9];

      dst[w-1] = clp[(int)(  22*(src[width-7]+src[width-1])-52*(src[width-5]+src[width-1])
                      +159*(src[width-3]+src[width-1])+256*src[width-2]+256)>>9];

      src+= width;
      dst+= w;
//      dst+= width>>1;
    }
  }
}

/* vertical filter and 2:1 subsampling */
void MYDIB::conv422to420(unsigned char *src, unsigned char *dst)
{
  int w, h2, i, j; // , jm6, jm5, jm4, jm3, jm2, jm1;
//  int jp1, jp2, jp3, jp4, jp5, jp6;

  w = width>>1;
  h2 = height>>1;

  if (prog_frame)
  {
    /* intra frame */
    for (i=0; i<w; i++)
    {
      dst[0] = clp[(int)(228*(src[0]+src[w])
                         +70*(src[0]+src[w<<1])
                         -37*(src[0]+src[w*3])
                         -21*(src[0]+src[w<<2])
                         +11*(src[0]+src[w*5])
                         + 5*(src[0]+src[w*6])+256)>>9];

      dst[w] = clp[(int)(228*(src[w<<1]+src[w*3])
                         +70*(src[w*1]+src[w<<2])
                         -37*(src[0]+src[w*5])
                         -21*(src[0]+src[w*6])
                         +11*(src[0]+src[w*7])
                         + 5*(src[0]+src[w<<3])+256)>>9];

      dst[width] = clp[(int)(228*(src[w<<2]+src[w*5])
                             +70*(src[w*3]+src[w*6])
                             -37*(src[w<<1]+src[w*7])
                             -21*(src[w*1]+src[w<<3])
                             +11*(src[0]+src[w*9])
                             + 5*(src[0]+src[w*10])+256)>>9];


      for (j=6; j<height-6; j+=2)
      {
//        jm5 = (j<5) ? 0 : j-5;
//        jm4 = (j<4) ? 0 : j-4;
//        jm3 = (j<3) ? 0 : j-3;
//        jm2 = (j<2) ? 0 : j-2;
//        jm1 = (j<1) ? 0 : j-1;
//        jp1 = (j<height-1) ? j+1 : height-1;
//        jp2 = (j<height-2) ? j+2 : height-1;
//        jp3 = (j<height-3) ? j+3 : height-1;
//        jp4 = (j<height-4) ? j+4 : height-1;
//        jp5 = (j<height-5) ? j+5 : height-1;
//        jp6 = (j<height-6) ? j+6 : height-1;

        /* FIR filter with 0.5 sample interval phase shift */
        dst[w*(j>>1)] = clp[(int)(228*(src[w*j]+src[w*(j+1)])
                                  +70*(src[w*(j-1)]+src[w*(j+2)])
                                  -37*(src[w*(j-2)]+src[w*(j+3)])
                                  -21*(src[w*(j-3)]+src[w*(j+4)])
                                  +11*(src[w*(j-4)]+src[w*(j+5)])
                                  + 5*(src[w*(j-5)]+src[w*(j+6)])+256)>>9];
//        dst[w*(j>>1)] = clp[(int)(228*(src[w*j]+src[w*jp1])
//                             +70*(src[w*jm1]+src[w*jp2])
//                             -37*(src[w*jm2]+src[w*jp3])
//                             -21*(src[w*jm3]+src[w*jp4])
//                             +11*(src[w*jm4]+src[w*jp5])
//                             + 5*(src[w*jm5]+src[w*jp6])+256)>>9];
      }
      dst[w*(h2-3)] = clp[(int)(228*(src[w*(height-6)]+src[w*(height-5)])
                                +70*(src[w*(height-7)]+src[w*(height-4)])
                                -37*(src[w*(height-8)]+src[w*(height-3)])
                                -21*(src[w*(height-9)]+src[w*(height-2)])
                                +11*(src[w*(height-10)]+src[w*(height-1)])
                                + 5*(src[w*(height-11)]+src[w*(height-1)])+256)>>9];

      dst[w*(h2-2)] = clp[(int)(228*(src[w*(height-4)]+src[w*(height-3)])
                                +70*(src[w*(height-5)]+src[w*(height-2)])
                                -37*(src[w*(height-6)]+src[w*(height-1)])
                                -21*(src[w*(height-7)]+src[w*(height-1)])
                                +11*(src[w*(height-8)]+src[w*(height-1)])
                                + 5*(src[w*(height-9)]+src[w*(height-1)])+256)>>9];

      dst[w*(h2-1)] = clp[(int)(228*(src[w*(height-2)]+src[w*(height-1)])
                                +70*(src[w*(height-3)]+src[w*(height-1)])
                                -37*(src[w*(height-4)]+src[w*(height-1)])
                                -21*(src[w*(height-5)]+src[w*(height-1)])
                                +11*(src[w*(height-6)]+src[w*(height-1)])
                                + 5*(src[w*(height-7)]+src[w*(height-1)])+256)>>9];

      src++;
      dst++;
    }
  }
  else
  {
    /* intra field */
    for (i=0; i<w; i++)
    {

      dst[0] = clp[(int)(8*src[0] +5*src[0] -30*src[0]
                          -18*src[0] +113*src[0] +242*src[0]
                         +192*src[w<<1] +35*src[w<<2] -38*src[w*6]
                          -10*src[w<<3] +11*src[w*10] +2*src[w*12]+256)>>9];

      dst[w] = clp[(int)(8*src[w*13] +5*src[w*11] -30*src[w*9]
                              -18*src[w*7] +113*src[w*5] +242*src[w*3]
                             +192*src[w] +35*src[w] -38*src[w]
                              -10*src[w] +11*src[w] +2*src[w]+256)>>9];

      dst[w<<1] = clp[(int)(8*src[0] +5*src[0] -30*src[0]
                          -18*src[0] +113*src[w<<1] +242*src[w<<2]
                         +192*src[w*6] +35*src[w<<3] -38*src[w*10]
                          -10*src[w*12] +11*src[w*14] +2*src[w<<4]+256)>>9];

      dst[w*3] = clp[(int)(8*src[w*17] +5*src[w*15] -30*src[w*13]
                              -18*src[w*11] +113*src[w*9] +242*src[w*7]
                             +192*src[w*5] +35*src[w*3] -38*src[w]
                              -10*src[w] +11*src[w] +2*src[w]+256)>>9];

      dst[w<<2] = clp[(int)(8*src[0] +5*src[0] -30*src[w<<1]
                          -18*src[w<<2] +113*src[w*6] +242*src[w<<3]
                         +192*src[w*10] +35*src[w*12] -38*src[w*14]
                          -10*src[w<<4] +11*src[w*18] +2*src[w*20]+256)>>9];

      dst[w*5] = clp[(int)(8*src[w*21] +5*src[w*19] -30*src[w*17]
                              -18*src[w*15] +113*src[w*13] +242*src[w*11]
                             +192*src[w*9] +35*src[w*7] -38*src[w*5]
                              -10*src[w*3] +11*src[w] +2*src[w]+256)>>9];

      for (j=12; j<height-14; j+=4)
      {
        /* top field */
//        jm5 = (j<10) ? 0 : j-10;
//        jm4 = (j<8) ? 0 : j-8;
//        jm3 = (j<6) ? 0 : j-6;
//        jm2 = (j<4) ? 0 : j-4;
//        jm1 = (j<2) ? 0 : j-2;
//        jp1 = (j<height-2) ? j+2 : height-2;
//        jp2 = (j<height-4) ? j+4 : height-2;
//        jp3 = (j<height-6) ? j+6 : height-2;
//        jp4 = (j<height-8) ? j+8 : height-2;
//        jp5 = (j<height-10) ? j+10 : height-2;
//        jp6 = (j<height-12) ? j+12 : height-2;

        /* FIR filter with 0.25 sample interval phase shift */
        dst[w*(j>>1)] = clp[(int)(8*src[w*(j-10)] +5*src[w*(j-8)] -30*src[w*(j-6)]
                           -18*src[w*(j-4)] +113*src[w*(j-2)] +242*src[w*j]
                          +192*src[w*(j+2)] +35*src[w*(j+4)] -38*src[w*(j+6)]
                           -10*src[w*(j+8)] +11*src[w*(j+10)] +2*src[w*(j+12)]+256)>>9];
//        dst[w*(j>>1)] = clp[(int)(8*src[w*jm5] +5*src[w*jm4] -30*src[w*jm3]
//                           -18*src[w*jm2] +113*src[w*jm1] +242*src[w*j]
//                          +192*src[w*jp1] +35*src[w*jp2] -38*src[w*jp3]
//                           -10*src[w*jp4] +11*src[w*jp5] +2*src[w*jp6]+256)>>9];

        /* bottom field */
//        jm6 = (j<9) ? 1 : j-9;
//        jm5 = (j<7) ? 1 : j-7;
//        jm4 = (j<5) ? 1 : j-5;
//        jm3 = (j<3) ? 1 : j-3;
//        jm2 = (j<1) ? 1 : j-1;
//        jm1 = (j<height-1) ? j+1 : height-1;
//        jp1 = (j<height-3) ? j+3 : height-1;
//        jp2 = (j<height-5) ? j+5 : height-1;
//        jp3 = (j<height-7) ? j+7 : height-1;
//        jp4 = (j<height-9) ? j+9 : height-1;
//        jp5 = (j<height-11) ? j+11 : height-1;
//        jp6 = (j<height-13) ? j+13 : height-1;

        /* FIR filter with 0.25 sample interval phase shift */
        dst[w*((j>>1)+1)] = clp[(int)(8*src[w*(j+13)] +5*src[w*(j+11)]
                               -30*src[w*(j+9)] -18*src[w*(j+7)]
                              +113*src[w*(j+5)] +242*src[w*(j+3)]
                              +192*src[w*(j+1)] +35*src[w*(j-1)]
                               -38*src[w*(j-3)] -10*src[w*(j-5)]
                               +11*src[w*(j-7)] +2*src[w*(j-9)]+256)>>9];
//        dst[w*((j>>1)+1)] = clp[(int)(8*src[w*jp6] +5*src[w*jp5]
//                             -30*src[w*jp4] -18*src[w*jp3]
//                            +113*src[w*jp2] +242*src[w*jp1]
//                            +192*src[w*jm1] +35*src[w*jm2]
//                             -38*src[w*jm3] -10*src[w*jm4]
//                             +11*src[w*jm5] +2*src[w*jm6]+256)>>9];
      }



      dst[w*(h2-6)] = clp[(int)(8*src[w*(height-22)] +5*src[w*(height-20)] -30*src[w*(height-18)]
                              -18*src[w*(height-16)] +113*src[w*(height-14)] +242*src[w*(height-12)]
                         +192*src[w*(height-10)] +35*src[w*(height-8)] -38*src[w*(height-6)]
                          -10*src[w*(height-4)] +11*src[w*(height-2)] +2*src[w*(height-2)]+256)>>9];

      dst[w*(h2-5)] = clp[(int)(8*src[w*(height-1)] +5*src[w*(height-1)] -30*src[w*(height-3)]
                              -18*src[w*(height-5)] +113*src[w*(height-7)] +242*src[w*(height-9)]
                             +192*src[w*(height-11)] +35*src[w*(height-13)] -38*src[w*(height-15)]
                              -10*src[w*(height-17)] +11*src[w*(height-19)] +2*src[w*(height-21)]+256)>>9];

      dst[w*(h2-4)] = clp[(int)(8*src[w*(height-18)] +5*src[w*(height-16)] -30*src[w*(height-14)]
                          -18*src[w*(height-12)] +113*src[w*(height-10)] +242*src[w*(height-8)]
                         +192*src[w*(height-6)] +35*src[w*(height-4)] -38*src[w*(height-2)]
                          -10*src[w*(height-2)] +11*src[w*(height-2)] +2*src[w*(height-2)]+256)>>9];

      dst[w*(h2-3)] = clp[(int)(8*src[w*(height-1)] +5*src[w*(height-1)] -30*src[w*(height-1)]
                              -18*src[w*(height-1)] +113*src[w*(height-3)] +242*src[w*(height-5)]
                             +192*src[w*(height-7)] +35*src[w*(height-9)] -38*src[w*(height-11)]
                              -10*src[w*(height-13)] +11*src[w*(height-15)] +2*src[w*(height-17)]+256)>>9];

      dst[w*(h2-2)] = clp[(int)(8*src[w*(height-14)] +5*src[w*(height-12)] -30*src[w*(height-10)]
                          -18*src[w*(height-8)] +113*src[w*(height-6)] +242*src[w*(height-4)]
                         +192*src[w*(height-2)] +35*src[w*(height-2)] -38*src[w*(height-2)]
                          -10*src[w*(height-2)] +11*src[w*(height-2)] +2*src[w*(height-2)]+256)>>9];

      dst[w*(h2-1)] = clp[(int)(8*src[w*(height-1)] +5*src[w*(height-1)] -30*src[w*(height-1)]
                              -18*src[w*(height-1)] +113*src[w*(height-1)] +242*src[w*(height-1)]
                             +192*src[w*(height-3)] +35*src[w*(height-5)] -38*src[w*(height-7)]
                              -10*src[w*(height-9)] +11*src[w*(height-11)] +2*src[w*(height-13)]+256)>>9];



      src++;
      dst++;
    }
  }
}


void MYDIB::DisplayError(char *txt)
{
/*  char tmpStr[30];

  sprintf(tmpStr, "%s - error", AppName);
  if (MPEGDialog)
    MPEGDialog->MessageBox(txt, tmpStr, MB_OK | MB_ICONERROR);
  else
    ::MessageBox(NULL, txt, tmpStr, MB_OK | MB_ICONERROR);*/
}


void MYDIB::init_rgb_to_yuv_mmx(int coeffs)
{
  int i;

  i = coeffs;
  if (i > 8)
    i = 3;

  ycoefs = &ycoef[i-1][0];
  ucoefs = &ucoef[i-1][0];
  vcoefs = &vcoef[i-1][0];
}

void MYDIB::RGBtoYUVmmx(unsigned char *src, unsigned char *desty, unsigned char *destu,
                 unsigned char *destv, int srcrowsize, int destrowsize,
                 int width, int height)
{
  unsigned char *yp, *up, *vp;
  unsigned char *prow;
  int i, j;

  _asm {
	xor       edx, edx
    	mov       eax, width
	sar       eax,1
        cmp       edx, eax
        jge       yuvexit

	mov       j, eax
	mov       eax, height
        mov       i, eax
	cmp       edx, eax
	jge       yuvexit

	mov       eax, desty
	mov       yp, eax
	mov       eax, destu
	mov       up, eax
	mov       eax, destv
	mov       vp, eax
	mov       eax, src
	mov       prow, eax
        pxor      MM7, MM7
        mov       eax, i

      heighttop:

        mov       i, eax
        mov       edi, j
        mov       ebx, prow
        mov       ecx, yp
        mov       edx, up
        mov       esi, vp

      widthtop:
        movq      MM5, [ebx]  // MM5 has 0 r2 g2 b2 0 r1 g1 b1, two pixels
        add       ebx, 8
        movq      MM6, MM5
        punpcklbw MM5, MM7 // MM5 has 0 r1 g1 b1
        punpckhbw MM6, MM7 // MM6 has 0 r2 g2 b2

        movq      MM0, MM5
        movq      MM1, MM6
        mov       eax, ycoefs
        pmaddwd   MM0, [eax] // MM0 has r1*cr and g1*cg+b1*cb
        movq      MM2, MM0
        psrlq     MM2, 32
        paddd     MM0, MM2   // MM0 has y1 in lower 32 bits
        pmaddwd   MM1, [eax] // MM1 has r2*cr and g2*cg+b2*cb
        movq      MM2, MM1
        psrlq     MM2, 32
        paddd     MM1, MM2   // MM1 has y2 in lower 32 bits
        movd      eax, MM0
        imul      eax, 219
        shr       eax, 8
        add       eax, 540672
        shr       eax, 15
        mov       [ecx], al
        inc       ecx
        movd      eax, MM1
        imul      eax, 219
        shr       eax, 8
        add       eax, 540672
        shr       eax, 15
        mov       [ecx], al
        inc       ecx

        movq      MM0, MM5
        movq      MM1, MM6
        mov       eax, ucoefs
        pmaddwd   MM0, [eax] // MM0 has r1*cr and g1*cg+b1*cb
        movq      MM2, MM0
        psrlq     MM2, 32
        paddd     MM0, MM2   // MM0 has u1 in lower 32 bits
        pmaddwd   MM1, [eax] // MM1 has r2*cr and g2*cg+b2*cb
        movq      MM2, MM1
        psrlq     MM2, 32
        paddd     MM1, MM2   // MM1 has u2 in lower 32 bits
        movd      eax, MM0
        imul      eax, 224
        sar       eax, 8
        add       eax, 4210688
        shr       eax, 15
        mov       [edx], al
        inc       edx
        movd      eax, MM1
        imul      eax, 224
        sar       eax, 8
        add       eax, 4210688
        shr       eax, 15
        mov       [edx], al
        inc       edx

        mov       eax, vcoefs
        pmaddwd   MM5, [eax] // MM5 has r1*cr and g1*cg+b1*cb
        movq      MM2, MM5
        psrlq     MM2, 32
        paddd     MM5, MM2   // MM5 has v1 in lower 32 bits
        pmaddwd   MM6, [eax] // MM6 has r2*cr and g2*cg+b2*cb
        movq      MM2, MM6
        psrlq     MM6, 32
        paddd     MM6, MM2   // MM6 has v2 in lower 32 bits
        movd      eax, MM5
        imul      eax, 224
        sar       eax, 8
        add       eax, 4210688
        shr       eax, 15
        mov       [esi], al
        inc       esi
        movd      eax, MM6
        imul      eax, 224
        sar       eax, 8
        add       eax, 4210688
        shr       eax, 15
        mov       [esi], al
        inc       esi

        dec       edi
        jnz       widthtop

        mov       eax, destrowsize
        add       yp, eax
        add       up, eax
        add       vp, eax
        mov       eax, srcrowsize
        sub       prow, eax
        mov       eax, i
        dec       eax
        jnz       heighttop

      yuvexit:
        emms
      }
}

