[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Patch for "mga" driver to enable XVideo support for Matrox Millennium II cards



Hi there Daniel and other Debian developers. I'm not sure if you know about this,
but there is a patch for mga driver to support XVideo extension Matrox Millennium II cards (both PCI and AGP versions). Homepage for this project is at http://www.penguintown.net/~gorlik/millennium.html
I just hope that debian xserver can have this hopefully before sarge. I think it has Debian FSG compatible licence. There is some info about if on Freshmeat.
Version stands at rc1 I think. Patch is meant for 4.2.0 source tree.
 
Many regards,
Safir Secerovic
Linux Users Group of Bosnia and Herzegovina
www.linux.org.ba


Do you Yahoo!?
Yahoo! Small Business $15K Web Design Giveaway - Enter today
62a63,66
> static XF86VideoAdaptorPtr MGASetupImageVideoILOAD(ScreenPtr pScreen);
> static int MGAPutImageILOAD(ScrnInfoPtr pScrn, short src_x, short src_y, short drw_x, short drw_y,
>   short src_w, short src_h, short drw_w, short drw_h, int id, unsigned char* buf, short width, short height, 
>   Bool Sync, RegionPtr clipBoxes, pointer data);
78c82,83
<        ((pMga->Chipset == PCI_CHIP_MGAG200) ||
---
>        ((pMga->Chipset == PCI_CHIP_MGA2164) ||
>         (pMga->Chipset == PCI_CHIP_MGAG200) ||     
83c88,98
< 	if((pMga->Overlay8Plus24 || pMga->TexturedVideo) &&
---
>       if( (pMga->Chipset == PCI_CHIP_MGA2164) ) {
> 	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Using MGA 2164W ILOAD video\n");
> 	xf86DrvMsg(pScrn->scrnIndex, X_INFO, 
> 	"This is an experimenteal driver and may not work on your machine.\n");
> 	xf86DrvMsg(pScrn->scrnIndex, X_INFO, 
> 	"YUV to RGB color space conversion and horizontal scaling will be hardware accelerated.\n");
> 	newAdaptor = MGASetupImageVideoILOAD(pScreen);
> 	pMga->TexturedVideo = TRUE; 
> 	/* ^^^ this is not really true but the ILOAD scaler shares 
> 	much more code with the textured video than the overlay */
>       } else if((pMga->Overlay8Plus24 || pMga->TexturedVideo) &&
1279a1295,2006
> 
> /* Matrox MGA 2164W Xv extension support.
> *  The extension is implemented as a HOST->FB image load in YUV format. 
> *  I decided not to use real hardware overlay since on the Millennium II
> *  it would limit the size of the frame buffer to 4Mb (even on a 16Mb
> *  card) due to an hardware limitation.
> *  Author: Gabriele Gorla (gorlik@acm.org)
> *  Based on the MGA-Gxxx Xv extension by: Mark Vojkovich
>    */
> 
> /* This code is still in alpha stage. Only YUV->RGB conversion
>    and horizontal scaling are hardware accelerated.
>    All 4 FOURCC formats supported by X should be supported.
>    It has been tested only on my DEC DPW 500a at 1400x1050x32 under
>    linux 2.4.18 with XFree86 4.2.0
> 
>    Bug reports and success/failure stories are greatly appreciated.
> */
> 
> /* #define DEBUG_MGA2164 */
> #define CUSTOM_MEMCOPY
> #define MGA2164_SWFILTER
> 
> 
> static XF86VideoAdaptorPtr
> MGASetupImageVideoILOAD(ScreenPtr pScreen)
> {
>     ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
>     XF86VideoAdaptorPtr adapt;
>     MGAPtr pMga = MGAPTR(pScrn);
> 
>     adapt = MGAAllocAdaptor(pScrn, FALSE);
> 
>     adapt->type = XvWindowMask | XvInputMask | XvImageMask;
>     adapt->flags = 0;
>     adapt->name = "Matrox Millennium II ILOAD Video Engine";
>     adapt->nEncodings = 1;
>     adapt->pEncodings = &DummyEncoding[1];
>     adapt->nFormats = NUM_FORMATS;
>     adapt->pFormats = Formats;
>     adapt->nPorts = MGA_MAX_PORTS;
>     adapt->pAttributes = NULL;
>     adapt->nAttributes = 0;
>     
>     /* number of supported color formats */
>     adapt->pImages = Images;
>     adapt->nImages = 4;
> 
>     adapt->PutVideo = NULL;
>     adapt->PutStill = NULL;
>     adapt->GetVideo = NULL;
>     adapt->GetStill = NULL;
>     adapt->StopVideo = MGAStopVideo;
>     
>     adapt->SetPortAttribute = MGASetPortAttributeTexture;
>     adapt->GetPortAttribute = MGAGetPortAttributeTexture;
>     adapt->QueryBestSize = MGAQueryBestSize;
>     adapt->PutImage = MGAPutImageILOAD;
>     adapt->QueryImageAttributes = MGAQueryImageAttributes;
> 
>     REGION_INIT(pScreen, &(pMga->portPrivate->clip), NullBox, 0);
> 
>     return adapt;
> }
> 
> /* this function is optimized for alpha. It might be better also for 
> other load/store risc architectures but I never tested on anything else 
> than my ev56 */
> static void CopyMungedScanline_AXP(unsigned int *fb_ptr, short src_w,
> 	unsigned int *tsp, unsigned int *tpu, unsigned int *tpv)
> {	
>   unsigned int k,y0,y1,u,v;
>   
>   for(k=src_w/8;k;k--) {
>     y0=*tsp; y1=*(tsp+1);
>     u=*tpu;	v=*tpv;
>     tsp+=2; tpu++; tpv++;
>     
>     *(fb_ptr)=(y0&0x000000ff)|((y0&0x0000ff00)<<8) |
>       (v&0x000000ff)<<24 | (u&0x000000ff)<<8;
>     *(fb_ptr+1)=((y0&0x00ff0000)>>16)|((y0&0xff000000)>>8) |
>       (v&0x0000ff00)<<16 | (u&0x0000ff00);
>     *(fb_ptr+2)=(y1&0x000000ff)|((y1&0x0000ff00)<<8) |
>       (v&0x00ff0000)<<8 | (u&0x00ff0000)>>8;
>     *(fb_ptr+3)=((y1&0x00ff0000)>>16)|((y1&0xff000000)>>8) |
>       (v&0xff000000) | (u&0xff000000)>>16;
>     
>     fb_ptr+=4;
>   }
> }
> 
> static void CopyMungedScanlineFilter_AXP(unsigned int *fb_ptr, short src_w,
> 	unsigned int *tsp1, unsigned int *tpu1, unsigned int *tpv1,
> 	unsigned int *tsp2, unsigned int *tpu2, unsigned int *tpv2, int beta, int xds )
> {	
>   unsigned int k,y0_1,y1_1,y0_2,y1_2,u,v;
>   int yf[8], uf[4], vf[4];
>   int oneminbeta = 0xff - beta;
> 
>   for(k=xds*src_w/8;k;k--) {
>     y0_1=*tsp1; y1_1=*(tsp1+1);
>     y0_2=*tsp2; y1_2=*(tsp2+1);
>     u=*tpu1;	v=*tpv1;
> 
>     tsp1+=2; tsp2+=2; tpu1++; tpv1++; 
>     yf[0] = ((y0_1&0x000000ff)*oneminbeta + (y0_2&0x000000ff)*beta )>>8;
>     yf[1] = (((y0_1&0x0000ff00)>>8)*oneminbeta + ((y0_2&0x0000ff00)>>8)*beta )>>8;
>     yf[2] = (((y0_1&0x00ff0000)>>16)*oneminbeta + ((y0_2&0x00ff0000)>>16)*beta )>>8;
>     yf[3] = (((y0_1&0xff000000)>>24)*oneminbeta + ((y0_2&0xff000000)>>24)*beta )>>8;
>     yf[4] = ((y1_1&0x000000ff)*oneminbeta + (y1_2&0x000000ff)*beta )>>8;
>     yf[5] = (((y1_1&0x0000ff00)>>8)*oneminbeta + ((y1_2&0x0000ff00)>>8)*beta )>>8;
>     yf[6] = (((y1_1&0x00ff0000)>>16)*oneminbeta + ((y1_2&0x00ff0000)>>16)*beta )>>8;
>     yf[7] = (((y1_1&0xff000000)>>24)*oneminbeta + ((y1_2&0xff000000)>>24)*beta )>>8;
> 
>     /* FIXME: there is still no filtering on u and v */
>     uf[0]=(u&0x000000ff);
>     uf[1]=(u&0x0000ff00)>>8;
>     uf[2]=(u&0x00ff0000)>>16;
>     uf[3]=(u&0xff000000)>>24;
> 
>     vf[0]=(v&0x000000ff);
>     vf[1]=(v&0x0000ff00)>>8;
>     vf[2]=(v&0x00ff0000)>>16;
>     vf[3]=(v&0xff000000)>>24;
> 
>     switch(xds) {
>     case 1:
>     *(fb_ptr)=(yf[0]) | (yf[1]<<16) |
>       vf[0]<<24 | uf[0]<<8;
>     *(fb_ptr+1)=(yf[2]) | (yf[3]<<16) |
>       vf[1]<<24 | uf[1]<<8;
>     *(fb_ptr+2)=(yf[4]) | (yf[5]<<16) |
>       vf[2]<<24 | uf[2]<<8;
>     *(fb_ptr+3)=(yf[6]) | (yf[7]<<16) |
>       vf[3]<<24 | uf[3]<<8;
>     fb_ptr+=4;
>     break;
> 
>     case 2:
>     *(fb_ptr)=(yf[0]+yf[1])/2 | (((yf[2]+yf[3])/2)<<16) |
>       ((vf[0]+vf[1])/2 )<<24 | ((uf[0]+uf[1])/2)<<8;
>     *(fb_ptr+1)=(yf[4]+yf[5])/2 | ( ((yf[6]+yf[7])/2) <<16) |
>       ((vf[2]+vf[3])/2 )<<24 | ((uf[2]+uf[3])/2)<<8;
>     fb_ptr+=2;
>       break;
> 
>     case 4:
>     *(fb_ptr)=(yf[0]+yf[1]+yf[2]+yf[3])/4 | (((yf[4]+yf[5]+yf[6]+yf[7])/4)<<16) |
>       ((vf[0]+vf[1]+vf[2]+vf[3])/4 )<<24 | ((uf[0]+uf[1]+uf[2]+uf[3])/4)<<8;
>     fb_ptr+=1;
>       break;
> 
>     default:
>       break;
>     }
> 
> 
>   }
> }
> 
> static void CopyMungedScanlineFilterDown_AXP(unsigned int *fb_ptr, short src_w,
> 	unsigned int *tsp1, unsigned int *tpu1, unsigned int *tpv1,
> 	unsigned int *tsp2, unsigned int *tpu2, unsigned int *tpv2, int beta , int xds)
> {	
>   unsigned int k,y0_1,y1_1,y0_2,y1_2,u,v;
>   int yf[8], uf[4], vf[4];
>   
>   for(k=src_w/8;k;k--) {
>     y0_1=*tsp1; y1_1=*(tsp1+1);
>     y0_2=*tsp2; y1_2=*(tsp2+1);
>     u=*tpu1;	v=*tpv1;
> 
>     tsp1+=2; tsp2+=2; tpu1++; tpv1++;
>     yf[0] = ((y0_1&0x000000ff) + (y0_2&0x000000ff))>>8;
>     yf[1] = (((y0_1&0x0000ff00)>>8) + ((y0_2&0x0000ff00)>>8))>>8;
>     yf[2] = (((y0_1&0x00ff0000)>>16) + ((y0_2&0x00ff0000)>>16))>>8;
>     yf[3] = (((y0_1&0x000000ff)>>24) + ((y0_2&0x000000ff)>>24))>>8;
>     yf[4] = ((y1_1&0x000000ff) + (y1_2&0x000000ff))>>8;
>     yf[5] = (((y1_1&0x0000ff00)>>8) + ((y1_2&0x0000ff00)>>8))>>8;
>     yf[6] = (((y1_1&0x00ff0000)>>16) + ((y1_2&0x00ff0000)>>16))>>8;
>     yf[7] = (((y1_1&0x000000ff)>>24) + ((y1_2&0x000000ff)>>24))>>8;
> 
>     *(fb_ptr)=(yf[0]) | (yf[1]<<16) |
>       (v&0x000000ff)<<24 | (u&0x000000ff)<<8;
>     *(fb_ptr+1)=(yf[2]) | (yf[3]<<16) |
>       (v&0x0000ff00)<<16 | (u&0x0000ff00);
>     *(fb_ptr+2)=(yf[4]) | (yf[5]<<16) |
>       (v&0x00ff0000)<<8 | (u&0x00ff0000)>>8;
>     *(fb_ptr+3)=(yf[6]) | (yf[7]<<16) |
>       (v&0xff000000) | (u&0xff000000)>>16;
>     
>     fb_ptr+=4;
>   }
> }
> 
> static void MGACopyScaledILOAD(
> 			       ScrnInfoPtr pScrn,
> 			       int id, unsigned char *buf,
> 			       BoxPtr pbox,
> 			       int width, int height, int pitch,
> 			       short src_x, short src_y,
> 			       short src_w, short src_h,
> 			       short drw_x, short drw_y,
> 			       short drw_w, short drw_h
> 			       )
> {
>   MGAPtr pMga = MGAPTR(pScrn);
>   unsigned int *fb_ptr;
>   unsigned char *ubuf, *vbuf, *tbuf;
>   unsigned int *pu, *pv;
>   int k,l, pl, dl, xds, yds;
>   short box_h;
>   short scr_pitch = ( pScrn->virtualX + 15) & ~15;
>   
> #ifdef DEBUG_MGA2164
>   char sbuf[255];
>   
>   sprintf(sbuf,"---- PBOX: x1=%d y1=%d w=%d h=%d (x2=%d y2=%d)\n",
> 	  pbox->x1,pbox->y1,pbox->x2-pbox->x1,pbox->y2-pbox->y1,
> 	  pbox->x2,pbox->y2);
>   xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
>   
>   sprintf(sbuf,"in src: src_x=%d src_y=%d src_w=%d src_h=%d\n",
> 	  src_x,src_y,src_w,src_h);
>   xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
>   sprintf(sbuf,"in drw: drw_x=%d drw_y=%d drw_w=%d drw_h=%d\n",
> 	  drw_x,drw_y,drw_w,drw_h);
>   xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
> #endif
>   
>   /* scaling yuv->rgb */
>   
>   /* hack to force width and src image to be 8 pixel aligned */
>   src_x&=~0x7;
>   src_w&=~0x7; 
>   
>   box_h=pbox->y2-pbox->y1;
>   
>   if(src_w>drw_w) {
>     if(src_w/2<drw_w) { 
>       xds=2;
>     } else if(src_w/4<drw_w) {
>       xds=4;
>     } else { xds=8; }
>   } else xds = 1;
> 
>   /* prevent crashing when dragging window outside left boundary of screen */
>   /* FIXME: need to implement per pixel left start to avoid undesired effects when dragging 
>      window outside left screen boundary */
> 
>   if(drw_x<0) {
>     src_x=( -(drw_x*src_w)/drw_w + 0x7)&~0x7;
>     src_w-=src_x;
>     drw_w+=drw_x;
>     drw_x=0; 
>   }
> 
>   src_w/=xds;
> 
>   if(src_h>drw_h) {
>     if(src_h/2<drw_h) { 
>       yds=2;
>     } else if(src_h/4<drw_h) {
>       yds=4;
>     } else { yds=8; }
>   } else yds = 1;
> 
>   {
>   char sbuf[255];
>   
> #ifdef DEBUG_MGA2164
>   sprintf(sbuf,"---- xds = %d\n",
> 	  xds);
>   xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
> #endif
> }
> 
> #ifdef DEBUG_MGA2164
>   sprintf(sbuf,"out src: src_x=%d src_y=%d src_w=%d src_h=%d\n",
> 	  src_x,src_y,src_w,src_h);
>   xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
>   sprintf(sbuf,"out drw: drw_x=%d drw_y=%d drw_w=%d drw_h=%d\n",
> 	  drw_x,drw_y,drw_w,drw_h);
>   xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
> #endif
>   
>   CHECK_DMA_QUIESCENT(pMga, pScrn);
>   
>   /* scaling ILOAD */   
>   
>   vbuf=buf+width*height;
>   ubuf=vbuf+width*height/4;
>   pu = (unsigned int *)(ubuf+(src_y/2)*(width/2));
>   pv = (unsigned int *)(vbuf+(src_y/2)*(width/2));  
>   
>   for(pl=-1,dl=0;dl<box_h;dl++) {
>     int beta;
>     l=(dl+(pbox->y1-drw_y))*src_h/drw_h;
>     /* FIXME: check the math */
>     beta = ((dl+(pbox->y1-drw_y))*src_h*0xff/drw_h) - ((dl+(pbox->y1-drw_y))*src_h/drw_h*0xff);
>     
> #ifdef MGA2164_BLIT_DUP
>     if(l!=pl) 
> #else      
>       if(1) 
> #endif
> 	{
> 
> 	  /*
> #ifdef DEBUG_MGA2164
>   sprintf(sbuf,"new line: scr_dst %d   img_src %d   prev %d\n",
> 	  dl,l,pl);
>   xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
> #endif
> 	  */
> 
>     OUTREG(MGAREG_DWGCTL, MGADWG_ILOAD_HIQH | MGADWG_BUYUV | MGADWG_SHIFTZERO
> 	   | MGADWG_SGNZERO | 0xc0000);
>     
>     OUTREG(MGAREG_AR0, pbox->x1 + drw_w -1);    /* SRC LINE END   why -1 ? */
>     OUTREG(MGAREG_AR2, ( ( (src_w-1)<<16) / (drw_w-1)) + 1 ); /* ((SRC_X_DIM -1)<<16) / (DST_X_DIM-1) +1 */
>     OUTREG(MGAREG_AR3, pbox->x1 );                            /* SRC LINE START*/
>     OUTREG(MGAREG_AR5, scr_pitch);                                 /* DST_Y_INCR = PITCH? */
>     OUTREG(MGAREG_AR6, ((src_w-drw_w)<<16) / (drw_w-1) );     /* */
>     OUTREG(MGAREG_FXBNDRY, drw_x|((drw_x+drw_w-1)<<16) );     /* why -1 ? */
>     OUTREG(MGAREG_CXBNDRY, pbox->x1 | ((pbox->x2-1)<<16 ) );     
>     OUTREG(MGAREG_YDST , pbox->y1+dl );                             /* Y_START_POS */
>     OUTREG(MGAREG_LEN + MGAREG_EXEC , 1);                 /* # of LINES */
>     
>     /* xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Data finished\n"); */
>     
>     fb_ptr=(unsigned int *)pMga->ILOADBase;
>   
>     switch(id) {
>     case FOURCC_YV12:
>     case FOURCC_I420:
>       tbuf=buf+(l+src_y)*width;
>       {
> 	unsigned int *tpu=pu+src_x/8+l/2*width/8;
> 	unsigned int *tpv=pv+src_x/8+l/2*width/8;
> 	unsigned int *tsp=(unsigned int *)(tbuf+src_x), *tsp2;
> 	
> 	if((l+src_y)<(src_h-1)) tsp2=(unsigned int *)(tbuf+src_x+width);
> 	else tsp2=(unsigned int *)(tbuf+src_x);	
> 
> 	/* it is not clear if waiting is actually good for performance */
> 	/*	 WAITFIFO(pMga->FifoSize);*/
> 	/* should try to get MGACopyMunged data to work here */
> 	/*		CopyMungedScanline_AXP(fb_ptr,src_w,tsp,tpu,tpv); */
> 
> 	/* Filter does not work yet */
> 	CopyMungedScanlineFilter_AXP(fb_ptr,src_w,tsp,tpu,tpv,tsp2,tpu,tpv, beta, xds); 
> 	/*	if(l&1) {
> 	  pu+=width/8;
> 	  pv+=width/8;
> 	  } */
>       }
>       break;
>     case FOURCC_UYVY:
>     case FOURCC_YUY2:
>       tbuf=buf+(l+src_y)*width*2;
> 
> #ifndef MGA2164_SWFILTER
> 	WAITFIFO(pMga->FifoSize/2);
> 	memcpy(fb_ptr, tbuf+src_x*2, src_w*2);
> 	fb_ptr+=src_w*2;   /* pointer in the pseudo dma window */
> #else
> 	{
> 	  unsigned int *tsp=(unsigned int *)(tbuf+src_x*2), *tsp2;
> 	  
> 	  if((l+src_y)<(src_h-1)) tsp2=(unsigned int *)(tbuf+src_x*2+width*2);
> 	  else tsp2=(unsigned int *)(tbuf+src_x*2);
> 	  /*	  {
> 		  char sbuf [256];
> 		  sprintf(sbuf,"dst line: %d   src_line: %d    beta: %x\n",
> 		  dl, l, beta );
> 		  xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
> 		  }  */
> 	  
> 	  WAITFIFO(pMga->FifoSize/4);
> 	  for(k=xds*src_w/8;k;k--) {
> 	    int oneminbeta = 0xff-beta;
> 	    int y[8], u[4], v[4], ya[4], ua[2], va[2], p;
> 
> 	    switch(yds) {
> 	    case 1:
> 	      /* upscale y filter */
> 	    for(p=0;p<4;p++) { 
> 	      y[2*p]=(((*(tsp+p)&0x000000ff))*oneminbeta+((*(tsp2+p)&0x000000ff))*beta)>>8;
> 	      y[2*p+1]=(((*(tsp+p)&0x00ff0000)>>16)*oneminbeta+((*(tsp2+p)&0x00ff0000)>>16)*beta)>>8;
> 	      u[p]=(((*(tsp+p)&0x0000ff00)>>8)*oneminbeta+((*(tsp2+p)&0x0000ff00)>>8)*beta)>>8;
> 	      v[p]=(((*(tsp+p)&0xff000000)>>24)*oneminbeta+((*(tsp2+p)&0xff000000)>>24)*beta)>>8;
> 	    }
> 	    break;
> 	    /* downscale y filter */
> 	    case 2:
> 	    case 3:
> 	    case 4:
> 	    default:
> 	      for(p=0;p<4;p++) {
> 		y[2*p]=(((*(tsp+p)&0x000000ff)));
> 		y[2*p+1]=(((*(tsp+p)&0x00ff0000)>>16));
> 		u[p]=(((*(tsp+p)&0x0000ff00)>>8));
> 		v[p]=(((*(tsp+p)&0xff000000)>>24));
> 	      }
> 	      break;
> 	    }
> 	    
> 	    switch (xds) {
> 	    case 1: /* simple copy */
> 	      *(fb_ptr++)=y[0]|y[1]<<16|u[0]<<8|v[0]<<24;
> 	      *(fb_ptr++)=y[2]|y[3]<<16|u[1]<<8|v[1]<<24;
> 	      *(fb_ptr++)=y[4]|y[5]<<16|u[2]<<8|v[2]<<24;
> 	      *(fb_ptr++)=y[6]|y[7]<<16|u[3]<<8|v[3]<<24;
> 	      break;
> 	    case 2: /* dowscale by 2 */
> 	      ya[0]=(y[0]+y[1])>>1;
> 	      ya[1]=(y[2]+y[3])>>1;
> 	      ya[2]=(y[4]+y[5])>>1;
> 	      ya[3]=(y[6]+y[7])>>1;
> 	      ua[0]=(u[0]+u[1])>>1;
> 	      ua[1]=(u[2]+u[3])>>1;
> 	      va[0]=(v[0]+v[1])>>1;
> 	      va[1]=(v[2]+v[3])>>1;
> 	      *(fb_ptr++)=ya[0]|ya[1]<<16|ua[0]<<8|va[0]<<24;
> 	      *(fb_ptr++)=ya[2]|ya[3]<<16|ua[1]<<8|va[1]<<24;
> 	      break;
> 	    case 4: /* downscale by 4 */
> 	      ya[0]=(y[0]+y[1]+y[2]+y[3])>>2;
> 	      ya[1]=(y[4]+y[5]+y[6]+y[7])>>2;
> 	      ua[0]=(u[0]+u[1]+u[2]+u[3])>>2;
> 	      va[0]=(v[0]+v[1]+v[2]+v[3])>>2;	      
> 	      *(fb_ptr++)=ya[0]|ya[1]<<16|ua[0]<<8|va[0]<<24;
> 	      break;
> 	    case 8:
> 	    default:
> 	      break;
> 	    }
> 
> 	    /* fb_ptr+=4; */ 
> 	    tsp+=4; tsp2+=4;
> 	  }
> 	  
> 	}
> #endif /* MGA2164_SWFILTER */
> 	break; 
>     default:
>       
>       break;     
>     }
>     pl=l;
> 	} else {
> 	  /* dup lines */
> 	  
> #ifdef DEBUG_MGA2164
> 	  sprintf(sbuf,"dup line: scr_src %d   scr_dst %d\n",
> 		  dl-1,dl);
> 	  xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
> #endif
> 	  
> 	  OUTREG(MGAREG_DWGCTL, 0x040C6008);
> 	  OUTREG(MGAREG_FXBNDRY, pbox->x1|((pbox->x2-1)<<16) );     /* why -1 ? */
> 	  OUTREG(MGAREG_AR3, (pbox->y1+dl-1)*scr_pitch+pbox->x1 );                            /* SRC LINE START*/
> 	  OUTREG(MGAREG_AR0, (pbox->y1+dl-1)*scr_pitch+pbox->x2 -1);    /* SRC LINE END   why -1 ? */
> 	  OUTREG(MGAREG_AR5, scr_pitch);                                 /* DST_Y_INCR = PITCH? */
> 	  OUTREG(MGAREG_YDST , pbox->y1+dl);                             /* Y_START_POS */
> 	  OUTREG(MGAREG_LEN + MGAREG_EXEC , 1);                 /* # of LINES */
> 	}
>   }
>   OUTREG(MGAREG_CXBNDRY, 0xFFFF0000);     
> }
>  
> static void MGACopyILOAD(
> 			 ScrnInfoPtr pScrn,
> 			 int id, unsigned char *buf,
> 			 BoxPtr pbox,
> 			 int width, int height, int pitch,
> 			 short src_x, short src_y,
> 			 short src_w, short src_h,
> 			 short drw_x, short drw_y,
> 			 short drw_w, short drw_h
> 			 )
> {
>   MGAPtr pMga = MGAPTR(pScrn);
>   unsigned int *fb_ptr;
>   unsigned char *ubuf, *vbuf;
>   unsigned int *pu, *pv;
>   int k,l;
>   short clip_x1, clip_x2, tmp_w;
> 
> #ifdef DEBUG_MGA2164
>   char sbuf[255];
> 
>   sprintf(sbuf,"---- PBOX: x1=%d y1=%d w=%d h=%d (x2=%d y2=%d)\n",
>     pbox->x1,pbox->y1,pbox->x2-pbox->x1,pbox->y2-pbox->y1,
>     pbox->x2,pbox->y2);
>     xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
> 
>   sprintf(sbuf,"in src: src_x=%d src_y=%d src_w=%d src_h=%d\n",
>     src_x,src_y,src_w,src_h);
>     xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
>   sprintf(sbuf,"in drw: drw_x=%d drw_y=%d drw_w=%d drw_h=%d\n",
>     drw_x,drw_y,drw_w,drw_h);
>     xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
> #endif
> 
>   /* non-scaling yuv->rgb */
>   
>   /* hack to force width and src image to be 8 pixel aligned */
>   src_x&=~0x7;
>   src_w&=~0x7; 
>   drw_w&=~0x7;
>   tmp_w=drw_w;
>   clip_x1=drw_x;
>   clip_x2=drw_x+drw_w;
> 
>   /* hack for clipping in non scaling version */
>   /* this works only if no scaling */
>   if(pbox->x1 > drw_x) {              /* left side X clipping*/
>     src_x+=((pbox->x1-drw_x)&~0x7);
>     src_w-=((pbox->x1-drw_x)&~0x7); 
>     clip_x1=pbox->x1;
>     drw_x+=src_x;
>     drw_w=src_w;
>   }
> 
>   if( (pbox->x2) < (drw_x+drw_w) ) {     /* right side X clipping */
>     tmp_w=( (pbox->x2) - drw_x );
>     drw_w= tmp_w & (~0x7);
>     if(drw_w!=tmp_w) drw_w+=8; 
>     clip_x2=drw_x+tmp_w-1; /* not sure why needs -1 */
>     src_w=drw_w;
>   }
> 
>   if(pbox->y1 > drw_y) {             /* top side Y clipping */
>     src_y+=(pbox->y1-drw_y);
>     src_h-=(pbox->y1-drw_y);
>     drw_y+=src_y;
>     drw_h=src_h;
>   }
>   if((pbox->y2)<(drw_y+drw_h)) {     /* bottom side Y clipping */
>     drw_h=(pbox->y2)-drw_y;
>     src_h=drw_h;
>   }
> 
>   if(drw_x<0) drw_x=0;
> 
> #ifdef DEBUG_MGA2164
>   sprintf(sbuf,"out src: src_x=%d src_y=%d src_w=%d src_h=%d\n",
>     src_x,src_y,src_w,src_h);
>     xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
>   sprintf(sbuf,"out drw: drw_x=%d drw_y=%d drw_w=%d drw_h=%d\n",
>     drw_x,drw_y,drw_w,drw_h);
>     xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
> #endif
>   
>   /* ready to draw */
>   if(drw_w==0||drw_h==0) return;
> 
>   if(drw_w<0||drw_h<0) {
> 	/* actually until scaling is working this might happen 
> 	during normal operation */
> /*  sprintf(sbuf,"drw_w or drw_h are negative (this should never 
> happen)\n");
>     xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); */
> 	return;
> }
> 
>   CHECK_DMA_QUIESCENT(pMga, pScrn);
>   
>   /* non scaling ILOAD */   
>   WAITFIFO(6);
>   OUTREG(MGAREG_AR5, 0);
>   OUTREG(MGAREG_DWGCTL, MGADWG_ILOAD | MGADWG_BUYUV | MGADWG_SHIFTZERO
> 	 | MGADWG_SGNZERO | 0xc0000);
>   OUTREG(MGAREG_AR0, (drw_w)-1 );
>   OUTREG(MGAREG_AR3, 0);
>   OUTREG(MGAREG_CXBNDRY, clip_x1|(clip_x2<<16)); 
>   OUTREG(MGAREG_FXBNDRY, drw_x|((drw_x+drw_w-1)<<16)); 
>   OUTREG(MGAREG_YDSTLEN + MGAREG_EXEC , (drw_y<<16)|drw_h); 
>   
>   fb_ptr=(unsigned int *)pMga->ILOADBase;
>   vbuf=buf+width*height;
>   ubuf=vbuf+width*height/4;
>     
>   switch(id) {
>   case FOURCC_YV12:
>   case FOURCC_I420:
>     pu = (unsigned int *)(ubuf+(src_y/2)*(width/2));
>     pv = (unsigned int *)(vbuf+(src_y/2)*(width/2));
>     buf+=src_y*width;
>     
>     for(l=0;l<drw_h;l++) {
>       unsigned int *tpu=pu+src_x/8;
>       unsigned int *tpv=pv+src_x/8;
>       unsigned int *tsp=(unsigned int *)(buf+src_x);
>       
>      /* it is not clear if waiting is actually good for performance */
>       /*	WAITFIFO(pMga->FifoSize);*/
>       /* should try to get MGACopyMunged data to work here */
>       CopyMungedScanline_AXP(fb_ptr,src_w,tsp,tpu,tpv);
>       buf+=width;
>       if(l&1) {
> 	pu+=width/8;
> 	pv+=width/8;
>       }
>     }
>     break;
>   case FOURCC_UYVY:
>   case FOURCC_YUY2:
>     buf+=src_y*width*2;
>     for(l=0;l<drw_h;l++) {
> 
> #ifndef CUSTOM_MEMCOPY
>       WAITFIFO(pMga->FifoSize/2); /* not sure what's the value for best performance */
>       memcpy(fb_ptr, buf+src_x*2, src_w*2);
>       fb_ptr+=src_w*2; 
> #else
>       unsigned int *tsp=(unsigned int *)(buf+src_x*2);
>       WAITFIFO(pMga->FifoSize/4);
>       for(k=src_w/8;k;k--) {
> 	*(fb_ptr)=*(tsp);
> 	*(fb_ptr+1)=*(tsp+1);
> 	*(fb_ptr+2)=*(tsp+2);
> 	*(fb_ptr+3)=*(tsp+3);
> 	fb_ptr+=4; tsp+=4;
> 	      } 
> #endif /* CUSTOM_MEMCOPY */
>       buf+=width*2;
>     }
>     break; 
>   default:
>     break;     
>   }
>   OUTREG(MGAREG_CXBNDRY, 0xFFFF0000);    /* put clipping back to normal */ 
> }
>  
> 
> 
> static int 
> MGAPutImageILOAD( 
> 		 ScrnInfoPtr pScrn, 
> 		 short src_x, short src_y, 
> 		 short drw_x, short drw_y,
> 		 short src_w, short src_h, 
> 		 short drw_w, short drw_h,
> 		 int id, unsigned char* buf, 
> 		 short width, short height, 
> 		 Bool Sync,
> 		 RegionPtr clipBoxes, pointer data
> 		 ){
>   MGAPtr pMga = MGAPTR(pScrn);
>   MGAPortPrivPtr pPriv = pMga->portPrivate;
>   INT32 x1, x2, y1, y2;
>   int dstPitch = 0;
>   int bpp;
>   BoxRec dstBox;
>   int nbox;
>   BoxPtr pbox;
>   
>   /* Clip */
>   x1 = src_x; x2 = src_x + src_w;
>   y1 = src_y; y2 = src_y + src_h;
>   
>   dstBox.x1 = drw_x; dstBox.x2 = drw_x + drw_w;
>   dstBox.y1 = drw_y; dstBox.y2 = drw_y + drw_h;
>   
>   if(!MGAClipVideo(&dstBox, &x1, &x2, &y1, &y2, clipBoxes, width, height))
>     return Success;
>   
>   bpp = pScrn->bitsPerPixel >> 3;
>   
>   if( pMga->AccelInfoRec->NeedToSync && ((long)data != pPriv->lastPort) ) {
>     MGAStormSync(pScrn);
>   }
>   
>   pPriv->lastPort = (long)data;
>   nbox=REGION_NUM_RECTS(clipBoxes);
>   pbox=REGION_RECTS(clipBoxes);
>   
>   while(nbox--) {
> #if 0
>     if ( (drw_w==src_w) && (drw_h==src_h) && (drw_x >= 0 ) ) {
>       /* special case 1: non scaling optimization */
>       MGACopyILOAD(pScrn,id,buf,pbox,
> 		   width, height, dstPitch, src_x, src_y, src_w, src_h,
> 		   drw_x, drw_y, drw_w, drw_h);
>     } else if ( (drw_w>src_w) && (drw_h>src_h) && (drw_x >= 0 ) ) {
>       /* special case 2: upscaling for full screen apps */
>       /* FIXME: to do */
>       MGACopyScaledILOAD(pScrn,id,buf,pbox,
> 			 width, height, dstPitch, src_x, src_y, src_w, src_h,
> 			 drw_x, drw_y, drw_w, drw_h);
> 
>     } else /* generic fallback case */
> #endif
>       MGACopyScaledILOAD(pScrn,id,buf,pbox,
> 			 width, height, dstPitch, src_x, src_y, src_w, src_h,
> 			 drw_x, drw_y, drw_w, drw_h); 
>     /* FIXME: when the generic is perfect I will enable the optimizations */
> 
>     pbox++;
>   }
>   
>   pMga->AccelInfoRec->NeedToSync = TRUE;
>   pPriv->videoStatus = FREE_TIMER;
>   pPriv->freeTime = currentTime.milliseconds + FREE_DELAY;
>   pMga->VideoTimerCallback = MGAVideoTimerCallback;
>   
>   return Success;
> }
> 

Reply to: