33 #if COMPILE_TEMPLATE_MMXEXT 
   34 #define MOVNTQ "movntq" 
   35 #define SFENCE "sfence" 
   38 #define SFENCE " # nop" 
   46 #define YUV2RGB_LOOP(depth)                                          \ 
   47     h_size = (c->dstW + 7) & ~7;                                     \ 
   48     if (h_size * depth > FFABS(dstStride[0]))                        \ 
   51     vshift = c->srcFormat != AV_PIX_FMT_YUV422P;                        \ 
   53     __asm__ volatile ("pxor %mm4, %mm4\n\t");                        \ 
   54     for (y = 0; y < srcSliceH; y++) {                                \ 
   55         uint8_t *image    = dst[0] + (y + srcSliceY) * dstStride[0]; \ 
   56         const uint8_t *py = src[0] +               y * srcStride[0]; \ 
   57         const uint8_t *pu = src[1] +   (y >> vshift) * srcStride[1]; \ 
   58         const uint8_t *pv = src[2] +   (y >> vshift) * srcStride[2]; \ 
   59         x86_reg index = -h_size / 2;                                 \ 
   61 #define YUV2RGB_INITIAL_LOAD          \ 
   63         "movq (%5, %0, 2), %%mm6\n\t" \ 
   64         "movd    (%2, %0), %%mm0\n\t" \ 
   65         "movd    (%3, %0), %%mm1\n\t" \ 
   86     "movq      %%mm6, %%mm7\n\t"                 \ 
   87     "punpcklbw %%mm4, %%mm0\n\t"                 \ 
   88     "punpcklbw %%mm4, %%mm1\n\t"                 \ 
   89     "pand     "MANGLE(mmx_00ffw)", %%mm6\n\t"    \ 
   90     "psrlw     $8,    %%mm7\n\t"                 \ 
   91     "psllw     $3,    %%mm0\n\t"                 \ 
   92     "psllw     $3,    %%mm1\n\t"                 \ 
   93     "psllw     $3,    %%mm6\n\t"                 \ 
   94     "psllw     $3,    %%mm7\n\t"                 \ 
   95     "psubsw   "U_OFFSET"(%4), %%mm0\n\t"         \ 
   96     "psubsw   "V_OFFSET"(%4), %%mm1\n\t"         \ 
   97     "psubw    "Y_OFFSET"(%4), %%mm6\n\t"         \ 
   98     "psubw    "Y_OFFSET"(%4), %%mm7\n\t"         \ 
  101     "movq      %%mm0, %%mm2\n\t"                 \ 
  102     "movq      %%mm1, %%mm3\n\t"                 \ 
  103     "pmulhw   "UG_COEFF"(%4), %%mm2\n\t"         \ 
  104     "pmulhw   "VG_COEFF"(%4), %%mm3\n\t"         \ 
  105     "pmulhw   "Y_COEFF" (%4), %%mm6\n\t"         \ 
  106     "pmulhw   "Y_COEFF" (%4), %%mm7\n\t"         \ 
  107     "pmulhw   "UB_COEFF"(%4), %%mm0\n\t"         \ 
  108     "pmulhw   "VR_COEFF"(%4), %%mm1\n\t"         \ 
  109     "paddsw    %%mm3, %%mm2\n\t"                 \ 
  114     "movq      %%mm7, %%mm3\n\t"                 \ 
  115     "movq      %%mm7, %%mm5\n\t"                 \ 
  116     "paddsw    %%mm0, %%mm3\n\t"                 \ 
  117     "paddsw    %%mm1, %%mm5\n\t"                 \ 
  118     "paddsw    %%mm2, %%mm7\n\t"                 \ 
  119     "paddsw    %%mm6, %%mm0\n\t"                 \ 
  120     "paddsw    %%mm6, %%mm1\n\t"                 \ 
  121     "paddsw    %%mm6, %%mm2\n\t"                 \ 
  123 #define RGB_PACK_INTERLEAVE                  \ 
  125     "packuswb  %%mm1, %%mm0\n\t"                 \ 
  126     "packuswb  %%mm5, %%mm3\n\t"                 \ 
  127     "packuswb  %%mm2, %%mm2\n\t"                 \ 
  128     "movq      %%mm0, %%mm1\n\n"                 \ 
  129     "packuswb  %%mm7, %%mm7\n\t"                 \ 
  130     "punpcklbw %%mm3, %%mm0\n\t"                 \ 
  131     "punpckhbw %%mm3, %%mm1\n\t"                 \ 
  132     "punpcklbw %%mm7, %%mm2\n\t"                 \ 
  134 #define YUV2RGB_ENDLOOP(depth)                   \ 
  135     "movq 8 (%5, %0, 2), %%mm6\n\t"              \ 
  136     "movd 4 (%3, %0),    %%mm1\n\t"              \ 
  137     "movd 4 (%2, %0),    %%mm0\n\t"              \ 
  138     "add $"AV_STRINGIFY(depth * 8)", %1\n\t"     \ 
  142 #if COMPILE_TEMPLATE_MMXEXT 
  143 #undef RGB_PACK24_B_OPERANDS 
  144 #define RGB_PACK24_B_OPERANDS NAMED_CONSTRAINTS_ARRAY_ADD(mask1101,mask0110,mask0100,mask0010,mask1001) 
  146 #undef RGB_PACK24_B_OPERANDS 
  147 #define RGB_PACK24_B_OPERANDS 
  150 #define YUV2RGB_OPERANDS                                          \ 
  151         : "+r" (index), "+r" (image)                              \ 
  152         : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ 
  154           NAMED_CONSTRAINTS_ADD(mmx_00ffw,pb_03,pb_07,mmx_redmask,pb_e0) \ 
  155           RGB_PACK24_B_OPERANDS                                   \ 
  160 #define YUV2RGB_OPERANDS_ALPHA                                    \ 
  161         : "+r" (index), "+r" (image)                              \ 
  162         : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ 
  163           "r" (py - 2*index), "r" (pa - 2*index)                  \ 
  164           NAMED_CONSTRAINTS_ADD(mmx_00ffw)                        \ 
  169 #define YUV2RGB_ENDFUNC                          \ 
  170     __asm__ volatile (SFENCE"\n\t"               \ 
  177 #define RGB_PACK16(gmask, is15)                  \ 
  178     "pand      "MANGLE(mmx_redmask)", %%mm0\n\t" \ 
  179     "pand      "MANGLE(mmx_redmask)", %%mm1\n\t" \ 
  180     "movq      %%mm2,     %%mm3\n\t"             \ 
  181     "psllw   $"AV_STRINGIFY(3-is15)", %%mm2\n\t" \ 
  182     "psrlw   $"AV_STRINGIFY(5+is15)", %%mm3\n\t" \ 
  183     "psrlw     $3,        %%mm0\n\t"             \ 
  184     IF##is15("psrlw  $1,  %%mm1\n\t")            \ 
  185     "pand "MANGLE(pb_e0)", %%mm2\n\t"            \ 
  186     "pand "MANGLE(gmask)", %%mm3\n\t"            \ 
  187     "por       %%mm2,     %%mm0\n\t"             \ 
  188     "por       %%mm3,     %%mm1\n\t"             \ 
  189     "movq      %%mm0,     %%mm2\n\t"             \ 
  190     "punpcklbw %%mm1,     %%mm0\n\t"             \ 
  191     "punpckhbw %%mm1,     %%mm2\n\t"             \ 
  192     MOVNTQ "   %%mm0,      (%1)\n\t"             \ 
  193     MOVNTQ "   %%mm2,     8(%1)\n\t"             \ 
  196     "paddusb "BLUE_DITHER"(%4),  %%mm0\n\t"      \ 
  197     "paddusb "GREEN_DITHER"(%4), %%mm2\n\t"      \ 
  198     "paddusb "RED_DITHER"(%4),   %%mm1\n\t"      \ 
  200 #if !COMPILE_TEMPLATE_MMXEXT 
  203                                        int srcSliceY, 
int srcSliceH,
 
  204                                        uint8_t *dst[], 
int dstStride[])
 
  206     int y, h_size, vshift;
 
  231                                        int srcSliceY, 
int srcSliceH,
 
  232                                        uint8_t *dst[], 
int dstStride[])
 
  234     int y, h_size, vshift;
 
  258 #define RGB_PACK24(blue, red)\ 
  259     "packuswb  %%mm3,      %%mm0 \n" \ 
  260     "packuswb  %%mm5,      %%mm1 \n" \ 
  261     "packuswb  %%mm7,      %%mm2 \n" \ 
  262     "movq      %%mm"red",  %%mm3 \n"\ 
  263     "movq      %%mm"blue", %%mm6 \n"\ 
  264     "psrlq     $32,        %%mm"red" \n" \ 
  265     "punpcklbw %%mm2,      %%mm3 \n" \ 
  266     "punpcklbw %%mm"red",  %%mm6 \n" \ 
  267     "movq      %%mm3,      %%mm5 \n"\ 
  268     "punpckhbw %%mm"blue", %%mm2 \n" \ 
  269     "punpcklwd %%mm6,      %%mm3 \n" \ 
  270     "punpckhwd %%mm6,      %%mm5 \n" \ 
  273 #if COMPILE_TEMPLATE_MMXEXT 
  280 #define RGB_PACK24_B\ 
  281     "pshufw    $0xc6,  %%mm2, %%mm1 \n"\ 
  282     "pshufw    $0x84,  %%mm3, %%mm6 \n"\ 
  283     "pshufw    $0x38,  %%mm5, %%mm7 \n"\ 
  284     "pand "MANGLE(mask1101)", %%mm6 \n" \ 
  285     "movq      %%mm1,         %%mm0 \n"\ 
  286     "pand "MANGLE(mask0110)", %%mm7 \n" \ 
  287     "movq      %%mm1,         %%mm2 \n"\ 
  288     "pand "MANGLE(mask0100)", %%mm1 \n" \ 
  289     "psrlq       $48,         %%mm3 \n" \ 
  290     "pand "MANGLE(mask0010)", %%mm0 \n" \ 
  291     "psllq       $32,         %%mm5 \n" \ 
  292     "pand "MANGLE(mask1001)", %%mm2 \n" \ 
  293     "por       %%mm3,         %%mm1 \n"\ 
  294     "por       %%mm6,         %%mm0 \n"\ 
  295     "por       %%mm5,         %%mm1 \n"\ 
  296     "por       %%mm7,         %%mm2 \n"\ 
  297     MOVNTQ"    %%mm0,          (%1) \n"\ 
  298     MOVNTQ"    %%mm1,         8(%1) \n"\ 
  299     MOVNTQ"    %%mm2,        16(%1) \n"\ 
  303 #define RGB_PACK24_B\ 
  304     "movd      %%mm3,       (%1) \n" \ 
  305     "movd      %%mm2,      4(%1) \n" \ 
  306     "psrlq     $32,        %%mm3 \n"\ 
  307     "psrlq     $16,        %%mm2 \n"\ 
  308     "movd      %%mm3,      6(%1) \n" \ 
  309     "movd      %%mm2,     10(%1) \n" \ 
  310     "psrlq     $16,        %%mm2 \n"\ 
  311     "movd      %%mm5,     12(%1) \n" \ 
  312     "movd      %%mm2,     16(%1) \n" \ 
  313     "psrlq     $32,        %%mm5 \n"\ 
  314     "movd      %%mm2,     20(%1) \n" \ 
  315     "movd      %%mm5,     18(%1) \n" \ 
  321                                        int srcSliceY, 
int srcSliceH,
 
  322                                        uint8_t *dst[], 
int dstStride[])
 
  324     int y, h_size, vshift;
 
  339                                        int srcSliceY, 
int srcSliceH,
 
  340                                        uint8_t *dst[], 
int dstStride[])
 
  342     int y, h_size, vshift;
 
  356 #define SET_EMPTY_ALPHA                                                      \ 
  357     "pcmpeqd   %%mm"REG_ALPHA", %%mm"REG_ALPHA"\n\t"  \ 
  360     "movq      (%6, %0, 2),     %%mm"REG_ALPHA"\n\t" \ 
  362 #define RGB_PACK32(red, green, blue, alpha)  \ 
  363     "movq      %%mm"blue",  %%mm5\n\t"       \ 
  364     "movq      %%mm"red",   %%mm6\n\t"       \ 
  365     "punpckhbw %%mm"green", %%mm5\n\t"       \ 
  366     "punpcklbw %%mm"green", %%mm"blue"\n\t"  \ 
  367     "punpckhbw %%mm"alpha", %%mm6\n\t"       \ 
  368     "punpcklbw %%mm"alpha", %%mm"red"\n\t"   \ 
  369     "movq      %%mm"blue",  %%mm"green"\n\t" \ 
  370     "movq      %%mm5,       %%mm"alpha"\n\t" \ 
  371     "punpcklwd %%mm"red",   %%mm"blue"\n\t"  \ 
  372     "punpckhwd %%mm"red",   %%mm"green"\n\t" \ 
  373     "punpcklwd %%mm6,       %%mm5\n\t"       \ 
  374     "punpckhwd %%mm6,       %%mm"alpha"\n\t" \ 
  375     MOVNTQ "   %%mm"blue",   0(%1)\n\t"      \ 
  376     MOVNTQ "   %%mm"green",  8(%1)\n\t"      \ 
  377     MOVNTQ "   %%mm5,       16(%1)\n\t"      \ 
  378     MOVNTQ "   %%mm"alpha", 24(%1)\n\t"      \ 
  380 #if !COMPILE_TEMPLATE_MMXEXT 
  383                                        int srcSliceY, 
int srcSliceH,
 
  384                                        uint8_t *dst[], 
int dstStride[])
 
  386     int y, h_size, vshift;
 
  401 #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA 
  404                                         int srcSliceY, 
int srcSliceH,
 
  405                                         uint8_t *dst[], 
int dstStride[])
 
  407     int y, h_size, vshift;
 
  411         const 
uint8_t *pa = 
src[3] + y * srcStride[3];
 
  426                                        int srcSliceY, 
int srcSliceH,
 
  427                                        uint8_t *dst[], 
int dstStride[])
 
  429     int y, h_size, vshift;
 
  444 #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA 
  447                                         int srcSliceY, 
int srcSliceH,
 
  448                                         uint8_t *dst[], 
int dstStride[])
 
  450     int y, h_size, vshift;
 
  454         const 
uint8_t *pa = src[3] + y * srcStride[3];
 
const uint64_t ff_dither8[2]
 
#define YUV2RGB_INITIAL_LOAD
 
const uint64_t ff_dither4[2]
 
#define RGB_PACK32(red, green, blue, alpha)
 
#define DECLARE_ASM_CONST(n, t, v)
Declare a static constant aligned variable appropriate for use in inline assembly code...
 
#define YUV2RGB_ENDLOOP(depth)
 
#define YUV2RGB_LOOP(depth)
 
#define RGB_PACK16(gmask, is15)
 
#define YUV2RGB_OPERANDS_ALPHA
 
#define RGB_PACK_INTERLEAVE
 
#define RGB_PACK24(blue, red)