34 #   define RENAME(a) a ## _C 
   36 #   define TEMPLATE_PP_C 0 
   39 #ifdef TEMPLATE_PP_ALTIVEC 
   40 #   define RENAME(a) a ## _altivec 
   42 #   define TEMPLATE_PP_ALTIVEC 0 
   45 #ifdef TEMPLATE_PP_MMX 
   46 #   define RENAME(a) a ## _MMX 
   48 #   define TEMPLATE_PP_MMX 0 
   51 #ifdef TEMPLATE_PP_MMXEXT 
   52 #   undef  TEMPLATE_PP_MMX 
   53 #   define TEMPLATE_PP_MMX 1 
   54 #   define RENAME(a) a ## _MMX2 
   56 #   define TEMPLATE_PP_MMXEXT 0 
   59 #ifdef TEMPLATE_PP_3DNOW 
   60 #   undef  TEMPLATE_PP_MMX 
   61 #   define TEMPLATE_PP_MMX 1 
   62 #   define RENAME(a) a ## _3DNow 
   64 #   define TEMPLATE_PP_3DNOW 0 
   67 #ifdef TEMPLATE_PP_SSE2 
   68 #   undef  TEMPLATE_PP_MMX 
   69 #   define TEMPLATE_PP_MMX 1 
   70 #   undef  TEMPLATE_PP_MMXEXT 
   71 #   define TEMPLATE_PP_MMXEXT 1 
   72 #   define RENAME(a) a ## _SSE2 
   74 #   define TEMPLATE_PP_SSE2 0 
   82 #if   TEMPLATE_PP_MMXEXT 
   83 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t" 
   84 #elif TEMPLATE_PP_3DNOW 
   85 #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" 
   87 #define PAVGB(a,b)  REAL_PAVGB(a,b) 
   89 #if   TEMPLATE_PP_MMXEXT 
   90 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t" 
   92 #define PMINUB(b,a,t) \ 
   93     "movq " #a ", " #t " \n\t"\ 
   94     "psubusb " #b ", " #t " \n\t"\ 
   95     "psubb " #t ", " #a " \n\t" 
   98 #if   TEMPLATE_PP_MMXEXT 
   99 #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t" 
  100 #elif TEMPLATE_PP_MMX 
  101 #define PMAXUB(a,b) \ 
  102     "psubusb " #a ", " #b " \n\t"\ 
  103     "paddb " #a ", " #b " \n\t" 
  115         "movq %0, %%mm7                         \n\t" 
  116         "movq %1, %%mm6                         \n\t" 
  117         : : 
"m" (
c->mmxDcOffset[
c->nonBQP]),  
"m" (
c->mmxDcThreshold[
c->nonBQP])
 
  121         "lea (%2, %3), %%"FF_REG_a
"             \n\t" 
  125         "movq (%2), %%mm0                       \n\t" 
  126         "movq (%%"FF_REG_a
"), %%mm1             \n\t" 
  127         "movq %%mm0, %%mm3                      \n\t" 
  128         "movq %%mm0, %%mm4                      \n\t" 
  130         PMINUB(%%mm1, %%mm3, %%mm5)
 
  131         "psubb %%mm1, %%mm0                     \n\t"  
  132         "paddb %%mm7, %%mm0                     \n\t" 
  133         "pcmpgtb %%mm6, %%mm0                   \n\t" 
  135         "movq (%%"FF_REG_a
",%3), %%mm2          \n\t" 
  137         PMINUB(%%mm2, %%mm3, %%mm5)
 
  138         "psubb %%mm2, %%mm1                     \n\t" 
  139         "paddb %%mm7, %%mm1                     \n\t" 
  140         "pcmpgtb %%mm6, %%mm1                   \n\t" 
  141         "paddb %%mm1, %%mm0                     \n\t" 
  143         "movq (%%"FF_REG_a
", %3, 2), %%mm1      \n\t" 
  145         PMINUB(%%mm1, %%mm3, %%mm5)
 
  146         "psubb %%mm1, %%mm2                     \n\t" 
  147         "paddb %%mm7, %%mm2                     \n\t" 
  148         "pcmpgtb %%mm6, %%mm2                   \n\t" 
  149         "paddb %%mm2, %%mm0                     \n\t" 
  151         "lea (%%"FF_REG_a
", %3, 4), %%"FF_REG_a
"\n\t" 
  153         "movq (%2, %3, 4), %%mm2                \n\t" 
  155         PMINUB(%%mm2, %%mm3, %%mm5)
 
  156         "psubb %%mm2, %%mm1                     \n\t" 
  157         "paddb %%mm7, %%mm1                     \n\t" 
  158         "pcmpgtb %%mm6, %%mm1                   \n\t" 
  159         "paddb %%mm1, %%mm0                     \n\t" 
  161         "movq (%%"FF_REG_a
"), %%mm1             \n\t" 
  163         PMINUB(%%mm1, %%mm3, %%mm5)
 
  164         "psubb %%mm1, %%mm2                     \n\t" 
  165         "paddb %%mm7, %%mm2                     \n\t" 
  166         "pcmpgtb %%mm6, %%mm2                   \n\t" 
  167         "paddb %%mm2, %%mm0                     \n\t" 
  169         "movq (%%"FF_REG_a
", %3), %%mm2         \n\t" 
  171         PMINUB(%%mm2, %%mm3, %%mm5)
 
  172         "psubb %%mm2, %%mm1                     \n\t" 
  173         "paddb %%mm7, %%mm1                     \n\t" 
  174         "pcmpgtb %%mm6, %%mm1                   \n\t" 
  175         "paddb %%mm1, %%mm0                     \n\t" 
  177         "movq (%%"FF_REG_a
", %3, 2), %%mm1      \n\t" 
  179         PMINUB(%%mm1, %%mm3, %%mm5)
 
  180         "psubb %%mm1, %%mm2                     \n\t" 
  181         "paddb %%mm7, %%mm2                     \n\t" 
  182         "pcmpgtb %%mm6, %%mm2                   \n\t" 
  183         "paddb %%mm2, %%mm0                     \n\t" 
  184         "psubusb %%mm3, %%mm4                   \n\t" 
  187 #if TEMPLATE_PP_MMXEXT 
  188         "pxor %%mm7, %%mm7                      \n\t" 
  189         "psadbw %%mm7, %%mm0                    \n\t" 
  191         "movq %%mm0, %%mm1                      \n\t" 
  192         "psrlw $8, %%mm0                        \n\t" 
  193         "paddb %%mm1, %%mm0                     \n\t" 
  194         "movq %%mm0, %%mm1                      \n\t" 
  195         "psrlq $16, %%mm0                       \n\t" 
  196         "paddb %%mm1, %%mm0                     \n\t" 
  197         "movq %%mm0, %%mm1                      \n\t" 
  198         "psrlq $32, %%mm0                       \n\t" 
  199         "paddb %%mm1, %%mm0                     \n\t" 
  201         "movq %4, %%mm7                         \n\t"  
  202         "paddusb %%mm7, %%mm7                   \n\t"  
  203         "psubusb %%mm7, %%mm4                   \n\t"  
  204         "packssdw %%mm4, %%mm4                  \n\t" 
  205         "movd %%mm0, %0                         \n\t" 
  206         "movd %%mm4, %1                         \n\t" 
  208         : 
"=r" (numEq), 
"=r" (dcOk)
 
  213     numEq= (-numEq) &0xFF;
 
  214     if(numEq > 
c->ppMode.flatnessThreshold){
 
  221 #endif //TEMPLATE_PP_MMX 
  227 #if !TEMPLATE_PP_ALTIVEC 
  230 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
  233         "movq %2, %%mm0                         \n\t"   
  234         "pxor %%mm4, %%mm4                      \n\t" 
  236         "movq (%0), %%mm6                       \n\t" 
  237         "movq (%0, %1), %%mm5                   \n\t" 
  238         "movq %%mm5, %%mm1                      \n\t" 
  239         "movq %%mm6, %%mm2                      \n\t" 
  240         "psubusb %%mm6, %%mm5                   \n\t" 
  241         "psubusb %%mm1, %%mm2                   \n\t" 
  242         "por %%mm5, %%mm2                       \n\t"  
  243         "psubusb %%mm0, %%mm2                   \n\t"  
  244         "pcmpeqb %%mm4, %%mm2                   \n\t"  
  246         "pand %%mm2, %%mm6                      \n\t" 
  247         "pandn %%mm1, %%mm2                     \n\t" 
  248         "por %%mm2, %%mm6                       \n\t" 
  250         "movq (%0, %1, 8), %%mm5                \n\t" 
  251         "lea (%0, %1, 4), %%"FF_REG_a
"          \n\t" 
  252         "lea (%0, %1, 8), %%"FF_REG_c
"          \n\t" 
  253         "sub %1, %%"FF_REG_c
"                   \n\t" 
  255         "movq (%0, %1, 8), %%mm7                \n\t" 
  256         "movq %%mm5, %%mm1                      \n\t" 
  257         "movq %%mm7, %%mm2                      \n\t" 
  258         "psubusb %%mm7, %%mm5                   \n\t" 
  259         "psubusb %%mm1, %%mm2                   \n\t" 
  260         "por %%mm5, %%mm2                       \n\t"  
  261         "psubusb %%mm0, %%mm2                   \n\t"  
  262         "pcmpeqb %%mm4, %%mm2                   \n\t"  
  264         "pand %%mm2, %%mm7                      \n\t" 
  265         "pandn %%mm1, %%mm2                     \n\t" 
  266         "por %%mm2, %%mm7                       \n\t"  
  275         "movq (%0, %1), %%mm0                   \n\t"  
  276         "movq %%mm0, %%mm1                      \n\t"  
  280         "movq (%0, %1, 4), %%mm2                \n\t"  
  281         "movq %%mm2, %%mm5                      \n\t"  
  282         PAVGB((%%FF_REGa), %%mm2)                     
 
  283         PAVGB((%0, %1, 2), %%mm2)                     
 
  284         "movq %%mm2, %%mm3                      \n\t"  
  285         "movq (%0), %%mm4                       \n\t"  
  288         "movq %%mm3, (%0)                       \n\t"  
  290         "movq %%mm1, %%mm0                      \n\t"  
  292         "movq %%mm4, %%mm3                      \n\t"  
  293         PAVGB((%0,%1,2), %%mm3)                       
 
  294         PAVGB((%%FF_REGa,%1,2), %%mm5)                
 
  295         PAVGB((%%FF_REGa), %%mm5)                     
 
  298         "movq %%mm3, (%0,%1)                    \n\t"  
  301         "movq (%%"FF_REG_c
"), %%mm0             \n\t"  
  302         PAVGB((%%FF_REGa, %1, 2), %%mm0)              
 
  303         "movq %%mm0, %%mm3                      \n\t"  
  307         "movq (%0, %1, 2), %%mm2                \n\t"  
  308         "movq %%mm0, (%0, %1, 2)                \n\t"  
  310         "movq (%%"FF_REG_a
", %1, 4), %%mm0      \n\t"  
  311         PAVGB((%%FF_REGc), %%mm0)                     
 
  317         "movq (%%"FF_REG_a
"), %%mm5             \n\t"  
  318         "movq %%mm6, (%%"FF_REG_a
")             \n\t"  
  320         "movq (%%"FF_REG_a
", %1, 4), %%mm6      \n\t"  
  325         "movq (%0, %1, 4), %%mm4                \n\t"  
  328         "movq %%mm6, (%0, %1, 4)                \n\t"  
  333         "movq (%%"FF_REG_a
", %1, 2), %%mm6      \n\t"  
  336         "movq %%mm1, (%%"FF_REG_a
", %1, 2)      \n\t"  
  338         PAVGB((%%FF_REGc), %%mm2)                     
 
  339         "movq (%%"FF_REG_a
", %1, 4), %%mm0      \n\t"  
  343         "movq %%mm6, (%%"FF_REG_c
")             \n\t"  
  350         "movq %%mm5, (%%"FF_REG_a
", %1, 4)      \n\t"  
  355         : 
"%"FF_REG_a, 
"%"FF_REG_c
 
  357 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
  359     const int l2= 
stride + l1;
 
  360     const int l3= 
stride + l2;
 
  361     const int l4= 
stride + l3;
 
  362     const int l5= 
stride + l4;
 
  363     const int l6= 
stride + l5;
 
  364     const int l7= 
stride + l6;
 
  365     const int l8= 
stride + l7;
 
  366     const int l9= 
stride + l8;
 
  374         sums[0] = 4*first + 
src[l1] + 
src[l2] + 
src[l3] + 4;
 
  375         sums[1] = sums[0] - first  + src[l4];
 
  376         sums[2] = sums[1] - first  + src[l5];
 
  377         sums[3] = sums[2] - first  + src[l6];
 
  378         sums[4] = sums[3] - first  + src[l7];
 
  379         sums[5] = sums[4] - src[l1] + src[l8];
 
  380         sums[6] = sums[5] - src[l2] + last;
 
  381         sums[7] = sums[6] - src[l3] + last;
 
  382         sums[8] = sums[7] - src[l4] + last;
 
  383         sums[9] = sums[8] - src[l5] + last;
 
  385         src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4;
 
  386         src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4;
 
  387         src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4;
 
  388         src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4;
 
  389         src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4;
 
  390         src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4;
 
  391         src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
 
  392         src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
 
  396 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
  398 #endif //TEMPLATE_PP_ALTIVEC 
  409 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
  413         "pxor %%mm7, %%mm7                      \n\t"  
  414         "lea (%0, %1), %%"FF_REG_a
"             \n\t" 
  415         "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_c
"\n\t" 
  418         "movq (%%"FF_REG_a
", %1, 2), %%mm0      \n\t"  
  419         "movq (%0, %1, 4), %%mm1                \n\t"  
  420         "movq %%mm1, %%mm2                      \n\t"  
  421         "psubusb %%mm0, %%mm1                   \n\t" 
  422         "psubusb %%mm2, %%mm0                   \n\t" 
  423         "por %%mm1, %%mm0                       \n\t"  
  424         "movq (%%"FF_REG_c
"), %%mm3             \n\t"  
  425         "movq (%%"FF_REG_c
", %1), %%mm4         \n\t"  
  426         "movq %%mm3, %%mm5                      \n\t"  
  427         "psubusb %%mm4, %%mm3                   \n\t" 
  428         "psubusb %%mm5, %%mm4                   \n\t" 
  429         "por %%mm4, %%mm3                       \n\t"  
  431         "movq %%mm2, %%mm1                      \n\t"  
  432         "psubusb %%mm5, %%mm2                   \n\t" 
  433         "movq %%mm2, %%mm4                      \n\t" 
  434         "pcmpeqb %%mm7, %%mm2                   \n\t"  
  435         "psubusb %%mm1, %%mm5                   \n\t" 
  436         "por %%mm5, %%mm4                       \n\t"  
  437         "psubusb %%mm0, %%mm4                   \n\t"  
  438         "movq %%mm4, %%mm3                      \n\t"  
  439         "movq %2, %%mm0                         \n\t" 
  440         "paddusb %%mm0, %%mm0                   \n\t" 
  441         "psubusb %%mm0, %%mm4                   \n\t" 
  442         "pcmpeqb %%mm7, %%mm4                   \n\t"  
  443         "psubusb "MANGLE(b01)
", %%mm3           \n\t" 
  444         "pand %%mm4, %%mm3                      \n\t"  
  447         "movq %%mm3, %%mm1                      \n\t"  
  451         "movq (%0, %1, 4), %%mm0                \n\t"  
  452         "pxor %%mm2, %%mm0                      \n\t"  
  453         "psubusb %%mm3, %%mm0                   \n\t" 
  454         "pxor %%mm2, %%mm0                      \n\t" 
  455         "movq %%mm0, (%0, %1, 4)                \n\t"  
  457         "movq (%%"FF_REG_c
"), %%mm0             \n\t"  
  458         "pxor %%mm2, %%mm0                      \n\t"  
  459         "paddusb %%mm3, %%mm0                   \n\t" 
  460         "pxor %%mm2, %%mm0                      \n\t" 
  461         "movq %%mm0, (%%"FF_REG_c
")             \n\t"  
  465         "movq (%%"FF_REG_a
", %1, 2), %%mm0      \n\t"  
  466         "pxor %%mm2, %%mm0                      \n\t"  
  467         "psubusb %%mm1, %%mm0                   \n\t" 
  468         "pxor %%mm2, %%mm0                      \n\t" 
  469         "movq %%mm0, (%%"FF_REG_a
", %1, 2)      \n\t"  
  471         "movq (%%"FF_REG_c
", %1), %%mm0         \n\t"  
  472         "pxor %%mm2, %%mm0                      \n\t"  
  473         "paddusb %%mm1, %%mm0                   \n\t" 
  474         "pxor %%mm2, %%mm0                      \n\t" 
  475         "movq %%mm0, (%%"FF_REG_c
", %1)         \n\t"  
  479         "movq (%%"FF_REG_a
", %1), %%mm0         \n\t"  
  480         "pxor %%mm2, %%mm0                      \n\t"  
  481         "psubusb %%mm1, %%mm0                   \n\t" 
  482         "pxor %%mm2, %%mm0                      \n\t" 
  483         "movq %%mm0, (%%"FF_REG_a
", %1)         \n\t"  
  485         "movq (%%"FF_REG_c
", %1, 2), %%mm0      \n\t"  
  486         "pxor %%mm2, %%mm0                      \n\t"  
  487         "paddusb %%mm1, %%mm0                   \n\t" 
  488         "pxor %%mm2, %%mm0                      \n\t" 
  489         "movq %%mm0, (%%"FF_REG_c
", %1, 2)      \n\t"  
  494         : 
"%"FF_REG_a, 
"%"FF_REG_c
 
  496 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
  499     const int l2= 
stride + l1;
 
  500     const int l3= 
stride + l2;
 
  501     const int l4= 
stride + l3;
 
  502     const int l5= 
stride + l4;
 
  503     const int l6= 
stride + l5;
 
  504     const int l7= 
stride + l6;
 
  512         int b= src[l4] - src[l5];
 
  513         int c= src[l5] - src[l6];
 
  530 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
  533 #if !TEMPLATE_PP_ALTIVEC 
  536 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
  554 #if 0 //slightly more accurate and slightly slower 
  555         "pxor %%mm7, %%mm7                      \n\t"  
  556         "lea (%0, %1), %%"FF_REG_a
"             \n\t" 
  557         "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_c
"\n\t" 
  563         "movq (%0, %1, 2), %%mm0                \n\t"  
  564         "movq (%0), %%mm1                       \n\t"  
  565         "movq %%mm0, %%mm2                      \n\t"  
  570         "movq (%%"FF_REG_a
"), %%mm1             \n\t"  
  571         "movq (%%"FF_REG_a
", %1, 2), %%mm3      \n\t"  
  572         "movq %%mm1, %%mm4                      \n\t"  
  577         "movq %%mm0, %%mm4                      \n\t"  
  578         "psubusb %%mm1, %%mm0                   \n\t" 
  579         "psubusb %%mm4, %%mm1                   \n\t" 
  580         "por %%mm0, %%mm1                       \n\t"  
  583         "movq (%0, %1, 4), %%mm0                \n\t"  
  584         "movq %%mm0, %%mm4                      \n\t"  
  589         "movq (%%"FF_REG_c
"), %%mm2             \n\t"  
  590         "movq %%mm3, %%mm5                      \n\t"  
  595         "movq %%mm0, %%mm6                      \n\t"  
  596         "psubusb %%mm3, %%mm0                   \n\t" 
  597         "psubusb %%mm6, %%mm3                   \n\t" 
  598         "por %%mm0, %%mm3                       \n\t"  
  599         "pcmpeqb %%mm7, %%mm0                   \n\t"  
  602         "movq (%%"FF_REG_c
", %1), %%mm6         \n\t"  
  603         "movq %%mm6, %%mm5                      \n\t"  
  608         "movq (%%"FF_REG_c
", %1, 2), %%mm5      \n\t"  
  609         "movq %%mm2, %%mm4                      \n\t"  
  614         "movq %%mm6, %%mm4                      \n\t"  
  615         "psubusb %%mm2, %%mm6                   \n\t" 
  616         "psubusb %%mm4, %%mm2                   \n\t" 
  617         "por %%mm6, %%mm2                       \n\t"  
  621         PMINUB(%%mm2, %%mm1, %%mm4)                   
 
  622         "movq %2, %%mm4                         \n\t"  
  623         "paddusb "MANGLE(b01)
", %%mm4           \n\t" 
  624         "pcmpgtb %%mm3, %%mm4                   \n\t"  
  625         "psubusb %%mm1, %%mm3                   \n\t"  
  626         "pand %%mm4, %%mm3                      \n\t" 
  628         "movq %%mm3, %%mm1                      \n\t" 
  632         "paddusb %%mm1, %%mm3                   \n\t" 
  635         "movq (%%"FF_REG_a
", %1, 2), %%mm6      \n\t"  
  636         "movq (%0, %1, 4), %%mm5                \n\t"  
  637         "movq (%0, %1, 4), %%mm4                \n\t"  
  638         "psubusb %%mm6, %%mm5                   \n\t" 
  639         "psubusb %%mm4, %%mm6                   \n\t" 
  640         "por %%mm6, %%mm5                       \n\t"  
  641         "pcmpeqb %%mm7, %%mm6                   \n\t"  
  642         "pxor %%mm6, %%mm0                      \n\t" 
  643         "pand %%mm0, %%mm3                      \n\t" 
  644         PMINUB(%%mm5, %%mm3, %%mm0)
 
  646         "psubusb "MANGLE(b01)
", %%mm3           \n\t" 
  649         "movq (%%"FF_REG_a
", %1, 2), %%mm0      \n\t" 
  650         "movq (%0, %1, 4), %%mm2                \n\t" 
  651         "pxor %%mm6, %%mm0                      \n\t" 
  652         "pxor %%mm6, %%mm2                      \n\t" 
  653         "psubb %%mm3, %%mm0                     \n\t" 
  654         "paddb %%mm3, %%mm2                     \n\t" 
  655         "pxor %%mm6, %%mm0                      \n\t" 
  656         "pxor %%mm6, %%mm2                      \n\t" 
  657         "movq %%mm0, (%%"FF_REG_a
", %1, 2)      \n\t" 
  658         "movq %%mm2, (%0, %1, 4)                \n\t" 
  661         "lea (%0, %1), %%"FF_REG_a
"             \n\t" 
  662         "pcmpeqb %%mm6, %%mm6                   \n\t"  
  668         "movq (%%"FF_REG_a
", %1, 2), %%mm1      \n\t"  
  669         "movq (%0, %1, 4), %%mm0                \n\t"  
  670         "pxor %%mm6, %%mm1                      \n\t"  
  674         "movq (%%"FF_REG_a
", %1, 4), %%mm2      \n\t"  
  675         "movq (%%"FF_REG_a
", %1), %%mm3         \n\t"  
  676         "pxor %%mm6, %%mm2                      \n\t"  
  677         "movq %%mm2, %%mm5                      \n\t"  
  678         "movq "MANGLE(b80)
", %%mm4              \n\t"  
  679         "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_c
"\n\t" 
  686         "movq (%%"FF_REG_a
"), %%mm2             \n\t"  
  687         "pxor %%mm6, %%mm2                      \n\t"  
  690         "movq "MANGLE(b80)
", %%mm3              \n\t"  
  696         PAVGB((%%FF_REGc, %1), %%mm5)                 
 
  697         "movq (%%"FF_REG_c
", %1, 2), %%mm1      \n\t"  
  698         "pxor %%mm6, %%mm1                      \n\t"  
  699         PAVGB((%0, %1, 4), %%mm1)                     
 
  700         "movq "MANGLE(b80)
", %%mm2              \n\t"  
  706         "movq "MANGLE(b00)
", %%mm1              \n\t"  
  707         "movq "MANGLE(b00)
", %%mm5              \n\t"  
  708         "psubb %%mm2, %%mm1                     \n\t"  
  709         "psubb %%mm3, %%mm5                     \n\t"  
  712         PMINUB(%%mm2, %%mm3, %%mm1)                   
 
  716         "movq "MANGLE(b00)
", %%mm7              \n\t"  
  717         "movq %2, %%mm2                         \n\t"  
  719         "psubb %%mm6, %%mm2                     \n\t" 
  721         "movq %%mm4, %%mm1                      \n\t" 
  722         "pcmpgtb %%mm7, %%mm1                   \n\t"  
  723         "pxor %%mm1, %%mm4                      \n\t" 
  724         "psubb %%mm1, %%mm4                     \n\t"  
  725         "pcmpgtb %%mm4, %%mm2                   \n\t"  
  726         "psubusb %%mm3, %%mm4                   \n\t"  
  729         "movq %%mm4, %%mm3                      \n\t"  
  730         "psubusb "MANGLE(b01)
", %%mm4           \n\t" 
  733         "paddb %%mm3, %%mm4                     \n\t"  
  734         "pand %%mm2, %%mm4                      \n\t" 
  736         "movq "MANGLE(b80)
", %%mm5              \n\t"  
  737         "psubb %%mm0, %%mm5                     \n\t"  
  738         "paddsb %%mm6, %%mm5                    \n\t"  
  739         "pcmpgtb %%mm5, %%mm7                   \n\t"  
  740         "pxor %%mm7, %%mm5                      \n\t" 
  742         PMINUB(%%mm5, %%mm4, %%mm3)                   
 
  743         "pxor %%mm1, %%mm7                      \n\t"  
  745         "pand %%mm7, %%mm4                      \n\t" 
  746         "movq (%%"FF_REG_a
", %1, 2), %%mm0      \n\t" 
  747         "movq (%0, %1, 4), %%mm2                \n\t" 
  748         "pxor %%mm1, %%mm0                      \n\t" 
  749         "pxor %%mm1, %%mm2                      \n\t" 
  750         "paddb %%mm4, %%mm0                     \n\t" 
  751         "psubb %%mm4, %%mm2                     \n\t" 
  752         "pxor %%mm1, %%mm0                      \n\t" 
  753         "pxor %%mm1, %%mm2                      \n\t" 
  754         "movq %%mm0, (%%"FF_REG_a
", %1, 2)      \n\t" 
  755         "movq %%mm2, (%0, %1, 4)                \n\t" 
  760         : 
"%"FF_REG_a, 
"%"FF_REG_c
 
  818 #elif TEMPLATE_PP_MMX 
  822         "pxor %%mm7, %%mm7                      \n\t" 
  827         "movq (%0), %%mm0                       \n\t" 
  828         "movq %%mm0, %%mm1                      \n\t" 
  829         "punpcklbw %%mm7, %%mm0                 \n\t"  
  830         "punpckhbw %%mm7, %%mm1                 \n\t"  
  832         "movq (%0, %1), %%mm2                   \n\t" 
  833         "lea (%0, %1, 2), %%"FF_REG_a
"          \n\t" 
  834         "movq %%mm2, %%mm3                      \n\t" 
  835         "punpcklbw %%mm7, %%mm2                 \n\t"  
  836         "punpckhbw %%mm7, %%mm3                 \n\t"  
  838         "movq (%%"FF_REG_a
"), %%mm4             \n\t" 
  839         "movq %%mm4, %%mm5                      \n\t" 
  840         "punpcklbw %%mm7, %%mm4                 \n\t"  
  841         "punpckhbw %%mm7, %%mm5                 \n\t"  
  843         "paddw %%mm0, %%mm0                     \n\t"  
  844         "paddw %%mm1, %%mm1                     \n\t"  
  845         "psubw %%mm4, %%mm2                     \n\t"  
  846         "psubw %%mm5, %%mm3                     \n\t"  
  847         "psubw %%mm2, %%mm0                     \n\t"  
  848         "psubw %%mm3, %%mm1                     \n\t"  
  850         "psllw $2, %%mm2                        \n\t"  
  851         "psllw $2, %%mm3                        \n\t"  
  852         "psubw %%mm2, %%mm0                     \n\t"  
  853         "psubw %%mm3, %%mm1                     \n\t"  
  855         "movq (%%"FF_REG_a
", %1), %%mm2         \n\t" 
  856         "movq %%mm2, %%mm3                      \n\t" 
  857         "punpcklbw %%mm7, %%mm2                 \n\t"  
  858         "punpckhbw %%mm7, %%mm3                 \n\t"  
  860         "psubw %%mm2, %%mm0                     \n\t"  
  861         "psubw %%mm3, %%mm1                     \n\t"  
  862         "psubw %%mm2, %%mm0                     \n\t"  
  863         "psubw %%mm3, %%mm1                     \n\t"  
  864         "movq %%mm0, (%3)                       \n\t"  
  865         "movq %%mm1, 8(%3)                      \n\t"  
  867         "movq (%%"FF_REG_a
", %1, 2), %%mm0      \n\t" 
  868         "movq %%mm0, %%mm1                      \n\t" 
  869         "punpcklbw %%mm7, %%mm0                 \n\t"  
  870         "punpckhbw %%mm7, %%mm1                 \n\t"  
  872         "psubw %%mm0, %%mm2                     \n\t"  
  873         "psubw %%mm1, %%mm3                     \n\t"  
  874         "movq %%mm2, 16(%3)                     \n\t"  
  875         "movq %%mm3, 24(%3)                     \n\t"  
  876         "paddw %%mm4, %%mm4                     \n\t"  
  877         "paddw %%mm5, %%mm5                     \n\t"  
  878         "psubw %%mm2, %%mm4                     \n\t"  
  879         "psubw %%mm3, %%mm5                     \n\t"  
  881         "lea (%%"FF_REG_a
", %1), %0             \n\t" 
  882         "psllw $2, %%mm2                        \n\t"  
  883         "psllw $2, %%mm3                        \n\t"  
  884         "psubw %%mm2, %%mm4                     \n\t"  
  885         "psubw %%mm3, %%mm5                     \n\t"  
  887         "movq (%0, %1, 2), %%mm2                \n\t" 
  888         "movq %%mm2, %%mm3                      \n\t" 
  889         "punpcklbw %%mm7, %%mm2                 \n\t"  
  890         "punpckhbw %%mm7, %%mm3                 \n\t"  
  891         "psubw %%mm2, %%mm4                     \n\t"  
  892         "psubw %%mm3, %%mm5                     \n\t"  
  893         "psubw %%mm2, %%mm4                     \n\t"  
  894         "psubw %%mm3, %%mm5                     \n\t"  
  896         "movq (%%"FF_REG_a
", %1, 4), %%mm6      \n\t" 
  897         "punpcklbw %%mm7, %%mm6                 \n\t"  
  898         "psubw %%mm6, %%mm2                     \n\t"  
  899         "movq (%%"FF_REG_a
", %1, 4), %%mm6      \n\t" 
  900         "punpckhbw %%mm7, %%mm6                 \n\t"  
  901         "psubw %%mm6, %%mm3                     \n\t"  
  903         "paddw %%mm0, %%mm0                     \n\t"  
  904         "paddw %%mm1, %%mm1                     \n\t"  
  905         "psubw %%mm2, %%mm0                     \n\t"  
  906         "psubw %%mm3, %%mm1                     \n\t"  
  908         "psllw $2, %%mm2                        \n\t"  
  909         "psllw $2, %%mm3                        \n\t"  
  910         "psubw %%mm2, %%mm0                     \n\t"  
  911         "psubw %%mm3, %%mm1                     \n\t"  
  913         "movq (%0, %1, 4), %%mm2                \n\t" 
  914         "movq %%mm2, %%mm3                      \n\t" 
  915         "punpcklbw %%mm7, %%mm2                 \n\t"  
  916         "punpckhbw %%mm7, %%mm3                 \n\t"  
  918         "paddw %%mm2, %%mm2                     \n\t"  
  919         "paddw %%mm3, %%mm3                     \n\t"  
  920         "psubw %%mm2, %%mm0                     \n\t"  
  921         "psubw %%mm3, %%mm1                     \n\t"  
  923         "movq (%3), %%mm2                       \n\t"  
  924         "movq 8(%3), %%mm3                      \n\t"  
  926 #if TEMPLATE_PP_MMXEXT 
  927         "movq %%mm7, %%mm6                      \n\t"  
  928         "psubw %%mm0, %%mm6                     \n\t" 
  929         "pmaxsw %%mm6, %%mm0                    \n\t"  
  930         "movq %%mm7, %%mm6                      \n\t"  
  931         "psubw %%mm1, %%mm6                     \n\t" 
  932         "pmaxsw %%mm6, %%mm1                    \n\t"  
  933         "movq %%mm7, %%mm6                      \n\t"  
  934         "psubw %%mm2, %%mm6                     \n\t" 
  935         "pmaxsw %%mm6, %%mm2                    \n\t"  
  936         "movq %%mm7, %%mm6                      \n\t"  
  937         "psubw %%mm3, %%mm6                     \n\t" 
  938         "pmaxsw %%mm6, %%mm3                    \n\t"  
  940         "movq %%mm7, %%mm6                      \n\t"  
  941         "pcmpgtw %%mm0, %%mm6                   \n\t" 
  942         "pxor %%mm6, %%mm0                      \n\t" 
  943         "psubw %%mm6, %%mm0                     \n\t"  
  944         "movq %%mm7, %%mm6                      \n\t"  
  945         "pcmpgtw %%mm1, %%mm6                   \n\t" 
  946         "pxor %%mm6, %%mm1                      \n\t" 
  947         "psubw %%mm6, %%mm1                     \n\t"  
  948         "movq %%mm7, %%mm6                      \n\t"  
  949         "pcmpgtw %%mm2, %%mm6                   \n\t" 
  950         "pxor %%mm6, %%mm2                      \n\t" 
  951         "psubw %%mm6, %%mm2                     \n\t"  
  952         "movq %%mm7, %%mm6                      \n\t"  
  953         "pcmpgtw %%mm3, %%mm6                   \n\t" 
  954         "pxor %%mm6, %%mm3                      \n\t" 
  955         "psubw %%mm6, %%mm3                     \n\t"  
  958 #if TEMPLATE_PP_MMXEXT 
  959         "pminsw %%mm2, %%mm0                    \n\t" 
  960         "pminsw %%mm3, %%mm1                    \n\t" 
  962         "movq %%mm0, %%mm6                      \n\t" 
  963         "psubusw %%mm2, %%mm6                   \n\t" 
  964         "psubw %%mm6, %%mm0                     \n\t" 
  965         "movq %%mm1, %%mm6                      \n\t" 
  966         "psubusw %%mm3, %%mm6                   \n\t" 
  967         "psubw %%mm6, %%mm1                     \n\t" 
  970         "movd %2, %%mm2                         \n\t"  
  971         "punpcklbw %%mm7, %%mm2                 \n\t" 
  973         "movq %%mm7, %%mm6                      \n\t"  
  974         "pcmpgtw %%mm4, %%mm6                   \n\t"  
  975         "pxor %%mm6, %%mm4                      \n\t" 
  976         "psubw %%mm6, %%mm4                     \n\t"  
  977         "pcmpgtw %%mm5, %%mm7                   \n\t"  
  978         "pxor %%mm7, %%mm5                      \n\t" 
  979         "psubw %%mm7, %%mm5                     \n\t"  
  981         "psllw $3, %%mm2                        \n\t"  
  982         "movq %%mm2, %%mm3                      \n\t"  
  983         "pcmpgtw %%mm4, %%mm2                   \n\t" 
  984         "pcmpgtw %%mm5, %%mm3                   \n\t" 
  985         "pand %%mm2, %%mm4                      \n\t" 
  986         "pand %%mm3, %%mm5                      \n\t" 
  989         "psubusw %%mm0, %%mm4                   \n\t"  
  990         "psubusw %%mm1, %%mm5                   \n\t"  
  993         "movq "MANGLE(w05)
", %%mm2              \n\t"  
  994         "pmullw %%mm2, %%mm4                    \n\t" 
  995         "pmullw %%mm2, %%mm5                    \n\t" 
  996         "movq "MANGLE(w20)
", %%mm2              \n\t"  
  997         "paddw %%mm2, %%mm4                     \n\t" 
  998         "paddw %%mm2, %%mm5                     \n\t" 
  999         "psrlw $6, %%mm4                        \n\t" 
 1000         "psrlw $6, %%mm5                        \n\t" 
 1002         "movq 16(%3), %%mm0                     \n\t"  
 1003         "movq 24(%3), %%mm1                     \n\t"  
 1005         "pxor %%mm2, %%mm2                      \n\t" 
 1006         "pxor %%mm3, %%mm3                      \n\t" 
 1008         "pcmpgtw %%mm0, %%mm2                   \n\t"  
 1009         "pcmpgtw %%mm1, %%mm3                   \n\t"  
 1010         "pxor %%mm2, %%mm0                      \n\t" 
 1011         "pxor %%mm3, %%mm1                      \n\t" 
 1012         "psubw %%mm2, %%mm0                     \n\t"  
 1013         "psubw %%mm3, %%mm1                     \n\t"  
 1014         "psrlw $1, %%mm0                        \n\t"  
 1015         "psrlw $1, %%mm1                        \n\t"  
 1017         "pxor %%mm6, %%mm2                      \n\t" 
 1018         "pxor %%mm7, %%mm3                      \n\t" 
 1019         "pand %%mm2, %%mm4                      \n\t" 
 1020         "pand %%mm3, %%mm5                      \n\t" 
 1022 #if TEMPLATE_PP_MMXEXT 
 1023         "pminsw %%mm0, %%mm4                    \n\t" 
 1024         "pminsw %%mm1, %%mm5                    \n\t" 
 1026         "movq %%mm4, %%mm2                      \n\t" 
 1027         "psubusw %%mm0, %%mm2                   \n\t" 
 1028         "psubw %%mm2, %%mm4                     \n\t" 
 1029         "movq %%mm5, %%mm2                      \n\t" 
 1030         "psubusw %%mm1, %%mm2                   \n\t" 
 1031         "psubw %%mm2, %%mm5                     \n\t" 
 1033         "pxor %%mm6, %%mm4                      \n\t" 
 1034         "pxor %%mm7, %%mm5                      \n\t" 
 1035         "psubw %%mm6, %%mm4                     \n\t" 
 1036         "psubw %%mm7, %%mm5                     \n\t" 
 1037         "packsswb %%mm5, %%mm4                  \n\t" 
 1038         "movq (%0), %%mm0                       \n\t" 
 1039         "paddb   %%mm4, %%mm0                   \n\t" 
 1040         "movq %%mm0, (%0)                       \n\t" 
 1041         "movq (%0, %1), %%mm0                   \n\t" 
 1042         "psubb %%mm4, %%mm0                     \n\t" 
 1043         "movq %%mm0, (%0, %1)                   \n\t" 
 1050 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
 1052     const int l2= 
stride + l1;
 
 1053     const int l3= 
stride + l2;
 
 1054     const int l4= 
stride + l3;
 
 1055     const int l5= 
stride + l4;
 
 1056     const int l6= 
stride + l5;
 
 1057     const int l7= 
stride + l6;
 
 1058     const int l8= 
stride + l7;
 
 1063         const int middleEnergy= 5*(
src[l5] - 
src[l4]) + 2*(
src[l3] - 
src[l6]);
 
 1064         if(
FFABS(middleEnergy) < 8*
c->QP){
 
 1065             const int q=(
src[l4] - 
src[l5])/2;
 
 1066             const int leftEnergy=  5*(
src[l3] - 
src[l2]) + 2*(
src[l1] - 
src[l4]);
 
 1067             const int rightEnergy= 5*(
src[l7] - 
src[l6]) + 2*(
src[l5] - 
src[l8]);
 
 1073             d*= 
FFSIGN(-middleEnergy);
 
 1088 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
 1090 #endif //TEMPLATE_PP_ALTIVEC 
 1092 #if !TEMPLATE_PP_ALTIVEC 
 1095 #if HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) 
 1098         "pxor %%mm6, %%mm6                      \n\t" 
 1099         "pcmpeqb %%mm7, %%mm7                   \n\t" 
 1100         "movq %2, %%mm0                         \n\t" 
 1101         "punpcklbw %%mm6, %%mm0                 \n\t" 
 1102         "psrlw $1, %%mm0                        \n\t" 
 1103         "psubw %%mm7, %%mm0                     \n\t" 
 1104         "packuswb %%mm0, %%mm0                  \n\t" 
 1105         "movq %%mm0, %3                         \n\t" 
 1107         "lea (%0, %1), %%"FF_REG_a
"             \n\t" 
 1108         "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 
 1113 #undef REAL_FIND_MIN_MAX 
 1115 #if TEMPLATE_PP_MMXEXT 
 1116 #define REAL_FIND_MIN_MAX(addr)\ 
 1117         "movq " #addr ", %%mm0                  \n\t"\ 
 1118         "pminub %%mm0, %%mm7                    \n\t"\ 
 1119         "pmaxub %%mm0, %%mm6                    \n\t" 
 1121 #define REAL_FIND_MIN_MAX(addr)\ 
 1122         "movq " #addr ", %%mm0                  \n\t"\ 
 1123         "movq %%mm7, %%mm1                      \n\t"\ 
 1124         "psubusb %%mm0, %%mm6                   \n\t"\ 
 1125         "paddb %%mm0, %%mm6                     \n\t"\ 
 1126         "psubusb %%mm0, %%mm1                   \n\t"\ 
 1127         "psubb %%mm1, %%mm7                     \n\t" 
 1129 #define FIND_MIN_MAX(addr)  REAL_FIND_MIN_MAX(addr) 
 1131 FIND_MIN_MAX((%%FF_REGa))
 
 1132 FIND_MIN_MAX((%%FF_REGa, %1))
 
 1133 FIND_MIN_MAX((%%FF_REGa, %1, 2))
 
 1134 FIND_MIN_MAX((%0, %1, 4))
 
 1135 FIND_MIN_MAX((%%FF_REGd))
 
 1136 FIND_MIN_MAX((%%FF_REGd, %1))
 
 1137 FIND_MIN_MAX((%%FF_REGd, %1, 2))
 
 1138 FIND_MIN_MAX((%0, %1, 8))
 
 1140         "movq %%mm7, %%mm4                      \n\t" 
 1141         "psrlq $8, %%mm7                        \n\t" 
 1142 #if TEMPLATE_PP_MMXEXT 
 1143         "pminub %%mm4, %%mm7                    \n\t"  
 1144         "pshufw $0xF9, %%mm7, %%mm4             \n\t" 
 1145         "pminub %%mm4, %%mm7                    \n\t"  
 1146         "pshufw $0xFE, %%mm7, %%mm4             \n\t" 
 1147         "pminub %%mm4, %%mm7                    \n\t" 
 1149         "movq %%mm7, %%mm1                      \n\t" 
 1150         "psubusb %%mm4, %%mm1                   \n\t" 
 1151         "psubb %%mm1, %%mm7                     \n\t" 
 1152         "movq %%mm7, %%mm4                      \n\t" 
 1153         "psrlq $16, %%mm7                       \n\t" 
 1154         "movq %%mm7, %%mm1                      \n\t" 
 1155         "psubusb %%mm4, %%mm1                   \n\t" 
 1156         "psubb %%mm1, %%mm7                     \n\t" 
 1157         "movq %%mm7, %%mm4                      \n\t" 
 1158         "psrlq $32, %%mm7                       \n\t" 
 1159         "movq %%mm7, %%mm1                      \n\t" 
 1160         "psubusb %%mm4, %%mm1                   \n\t" 
 1161         "psubb %%mm1, %%mm7                     \n\t" 
 1165         "movq %%mm6, %%mm4                      \n\t" 
 1166         "psrlq $8, %%mm6                        \n\t" 
 1167 #if TEMPLATE_PP_MMXEXT 
 1168         "pmaxub %%mm4, %%mm6                    \n\t"  
 1169         "pshufw $0xF9, %%mm6, %%mm4             \n\t" 
 1170         "pmaxub %%mm4, %%mm6                    \n\t" 
 1171         "pshufw $0xFE, %%mm6, %%mm4             \n\t" 
 1172         "pmaxub %%mm4, %%mm6                    \n\t" 
 1174         "psubusb %%mm4, %%mm6                   \n\t" 
 1175         "paddb %%mm4, %%mm6                     \n\t" 
 1176         "movq %%mm6, %%mm4                      \n\t" 
 1177         "psrlq $16, %%mm6                       \n\t" 
 1178         "psubusb %%mm4, %%mm6                   \n\t" 
 1179         "paddb %%mm4, %%mm6                     \n\t" 
 1180         "movq %%mm6, %%mm4                      \n\t" 
 1181         "psrlq $32, %%mm6                       \n\t" 
 1182         "psubusb %%mm4, %%mm6                   \n\t" 
 1183         "paddb %%mm4, %%mm6                     \n\t" 
 1185         "movq %%mm6, %%mm0                      \n\t"  
 1186         "psubb %%mm7, %%mm6                     \n\t"  
 1188         "movd %%mm6, %k4                        \n\t" 
 1189         "cmpb "MANGLE(deringThreshold)
", %b4    \n\t" 
 1193         "punpcklbw %%mm7, %%mm7                 \n\t" 
 1194         "punpcklbw %%mm7, %%mm7                 \n\t" 
 1195         "punpcklbw %%mm7, %%mm7                 \n\t" 
 1196         "movq %%mm7, (%4)                       \n\t" 
 1198         "movq (%0), %%mm0                       \n\t"  
 1199         "movq %%mm0, %%mm1                      \n\t"  
 1200         "movq %%mm0, %%mm2                      \n\t"  
 1201         "psllq $8, %%mm1                        \n\t" 
 1202         "psrlq $8, %%mm2                        \n\t" 
 1203         "movd -4(%0), %%mm3                     \n\t" 
 1204         "movd 8(%0), %%mm4                      \n\t" 
 1205         "psrlq $24, %%mm3                       \n\t" 
 1206         "psllq $56, %%mm4                       \n\t" 
 1207         "por %%mm3, %%mm1                       \n\t"  
 1208         "por %%mm4, %%mm2                       \n\t"  
 1209         "movq %%mm1, %%mm3                      \n\t"  
 1212         "psubusb %%mm7, %%mm0                   \n\t" 
 1213         "psubusb %%mm7, %%mm2                   \n\t" 
 1214         "psubusb %%mm7, %%mm3                   \n\t" 
 1215         "pcmpeqb "MANGLE(b00)
", %%mm0           \n\t"  
 1216         "pcmpeqb "MANGLE(b00)
", %%mm2           \n\t"  
 1217         "pcmpeqb "MANGLE(b00)
", %%mm3           \n\t"  
 1218         "paddb %%mm2, %%mm0                     \n\t" 
 1219         "paddb %%mm3, %%mm0                     \n\t" 
 1221         "movq (%%"FF_REG_a
"), %%mm2             \n\t"  
 1222         "movq %%mm2, %%mm3                      \n\t"  
 1223         "movq %%mm2, %%mm4                      \n\t"  
 1224         "psllq $8, %%mm3                        \n\t" 
 1225         "psrlq $8, %%mm4                        \n\t" 
 1226         "movd -4(%%"FF_REG_a
"), %%mm5           \n\t" 
 1227         "movd 8(%%"FF_REG_a
"), %%mm6            \n\t" 
 1228         "psrlq $24, %%mm5                       \n\t" 
 1229         "psllq $56, %%mm6                       \n\t" 
 1230         "por %%mm5, %%mm3                       \n\t"  
 1231         "por %%mm6, %%mm4                       \n\t"  
 1232         "movq %%mm3, %%mm5                      \n\t"  
 1235         "psubusb %%mm7, %%mm2                   \n\t" 
 1236         "psubusb %%mm7, %%mm4                   \n\t" 
 1237         "psubusb %%mm7, %%mm5                   \n\t" 
 1238         "pcmpeqb "MANGLE(b00)
", %%mm2           \n\t"  
 1239         "pcmpeqb "MANGLE(b00)
", %%mm4           \n\t"  
 1240         "pcmpeqb "MANGLE(b00)
", %%mm5           \n\t"  
 1241         "paddb %%mm4, %%mm2                     \n\t" 
 1242         "paddb %%mm5, %%mm2                     \n\t" 
 1244 #define REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \ 
 1245         "movq " #src ", " #sx "                 \n\t" \ 
 1246         "movq " #sx ", " #lx "                  \n\t" \ 
 1247         "movq " #sx ", " #t0 "                  \n\t" \ 
 1248         "psllq $8, " #lx "                      \n\t"\ 
 1249         "psrlq $8, " #t0 "                      \n\t"\ 
 1250         "movd -4" #src ", " #t1 "               \n\t"\ 
 1251         "psrlq $24, " #t1 "                     \n\t"\ 
 1252         "por " #t1 ", " #lx "                   \n\t" \ 
 1253         "movd 8" #src ", " #t1 "                \n\t"\ 
 1254         "psllq $56, " #t1 "                     \n\t"\ 
 1255         "por " #t1 ", " #t0 "                   \n\t" \ 
 1256         "movq " #lx ", " #t1 "                  \n\t" \ 
 1260         "movq " #lx ", 8(%4)                    \n\t"\ 
 1261         "movq (%4), " #lx "                     \n\t"\ 
 1262         "psubusb " #lx ", " #t1 "               \n\t"\ 
 1263         "psubusb " #lx ", " #t0 "               \n\t"\ 
 1264         "psubusb " #lx ", " #sx "               \n\t"\ 
 1265         "movq "MANGLE(b00)", " #lx "            \n\t"\ 
 1266         "pcmpeqb " #lx ", " #t1 "               \n\t" \ 
 1267         "pcmpeqb " #lx ", " #t0 "               \n\t" \ 
 1268         "pcmpeqb " #lx ", " #sx "               \n\t" \ 
 1269         "paddb " #t1 ", " #t0 "                 \n\t"\ 
 1270         "paddb " #t0 ", " #sx "                 \n\t"\ 
 1273         "movq " #dst ", " #t0 "                 \n\t" \ 
 1274         "movq " #t0 ", " #t1 "                  \n\t" \ 
 1275         "psubusb %3, " #t0 "                    \n\t"\ 
 1276         "paddusb %3, " #t1 "                    \n\t"\ 
 1278         PMINUB(t1, pplx, t0)\ 
 1279         "paddb " #sx ", " #ppsx "               \n\t"\ 
 1280         "paddb " #psx ", " #ppsx "              \n\t"\ 
 1281         "#paddb "MANGLE(b02)", " #ppsx "        \n\t"\ 
 1282         "pand "MANGLE(b08)", " #ppsx "          \n\t"\ 
 1283         "pcmpeqb " #lx ", " #ppsx "             \n\t"\ 
 1284         "pand " #ppsx ", " #pplx "              \n\t"\ 
 1285         "pandn " #dst ", " #ppsx "              \n\t"\ 
 1286         "por " #pplx ", " #ppsx "               \n\t"\ 
 1287         "movq " #ppsx ", " #dst "               \n\t"\ 
 1288         "movq 8(%4), " #lx "                    \n\t" 
 1290 #define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \ 
 1291    REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) 
 1308 DERING_CORE((%%FF_REGa)       ,(%%FF_REGa, %1)   ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
 
 1309 DERING_CORE((%%FF_REGa, %1)   ,(%%FF_REGa, %1, 2),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
 
 1310 DERING_CORE((%%FF_REGa, %1, 2),(%0, %1, 4)       ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
 
 1311 DERING_CORE((%0, %1, 4)       ,(%%FF_REGd)       ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
 
 1312 DERING_CORE((%%FF_REGd)       ,(%%FF_REGd, %1)   ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
 
 1313 DERING_CORE((%%FF_REGd, %1)   ,(%%FF_REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
 
 1314 DERING_CORE((%%FF_REGd, %1, 2),(%0, %1, 8)       ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
 
 1315 DERING_CORE((%0, %1, 8)       ,(%%FF_REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
 
 1320         : 
"%"FF_REG_a, 
"%"FF_REG_d, 
"%"FF_REG_sp
 
 1322 #else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) 
 1329     const int QP2= 
c->QP/2 + 1;
 
 1337             if(*p > max) max= *p;
 
 1338             if(*p < min) min= *p;
 
 1341     avg= (min + max + 1)>>1;
 
 1343     if(max - min <deringThreshold) 
return;
 
 1345     for(y=0; y<10; y++){
 
 1360         t &= (t<<1) & (t>>1);
 
 1365         int t = s[y-1] & s[y] & s[y+1];
 
 1379                       +2*(*(p     -1)) + 4*(*p         ) + 2*(*(p     +1))
 
 1383 #ifdef DEBUG_DERING_THRESHOLD 
 1384                     __asm__ 
volatile(
"emms\n\t":);
 
 1386                     static uint64_t numPixels=0;
 
 1387                     if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
 
 1392                         static int numSkipped=0;
 
 1393                         static int errorSum=0;
 
 1394                         static int worstQP=0;
 
 1395                         static int worstRange=0;
 
 1396                         static int worstDiff=0;
 
 1398                         int absDiff= 
FFABS(diff);
 
 1401                         if(x==1 || x==8 || y==1 || y==8) 
continue;
 
 1404                         if(absDiff > worstDiff){
 
 1407                             worstRange= max-
min;
 
 1411                         if(1024LL*1024LL*1024LL % numSkipped == 0){
 
 1413                                    "wRange:%d, wDiff:%d, relSkip:%1.3f\n",
 
 1414                                    (
float)errorSum/numSkipped, numSkipped, worstQP, worstRange,
 
 1415                                    worstDiff, (
float)numSkipped/numPixels);
 
 1420                     if     (*p + QP2 < f) *p= *p + QP2;
 
 1421                     else if(*p - QP2 > f) *p= *p - QP2;
 
 1426 #ifdef DEBUG_DERING_THRESHOLD 
 1434                 *p = 
FFMIN(*p + 20, 255);
 
 1440 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
 1442 #endif //TEMPLATE_PP_ALTIVEC 
 1452 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
 1455         "lea (%0, %1), %%"FF_REG_a
"             \n\t" 
 1456         "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_c
"\n\t" 
 1460         "movq (%0), %%mm0                       \n\t" 
 1461         "movq (%%"FF_REG_a
", %1), %%mm1         \n\t" 
 1463         "movq %%mm0, (%%"FF_REG_a
")             \n\t" 
 1464         "movq (%0, %1, 4), %%mm0                \n\t" 
 1466         "movq %%mm1, (%%"FF_REG_a
", %1, 2)      \n\t" 
 1467         "movq (%%"FF_REG_c
", %1), %%mm1         \n\t" 
 1469         "movq %%mm0, (%%"FF_REG_c
")             \n\t" 
 1470         "movq (%0, %1, 8), %%mm0                \n\t" 
 1472         "movq %%mm1, (%%"FF_REG_c
", %1, 2)      \n\t" 
 1475         : 
"%"FF_REG_a, 
"%"FF_REG_c
 
 1484         *(uint32_t*)&
src[
stride*1]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
 
 1486         *(uint32_t*)&
src[
stride*3]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
 
 1488         *(uint32_t*)&
src[
stride*5]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
 
 1490         *(uint32_t*)&
src[
stride*7]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
 
 1505 #if TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
 1508         "lea (%0, %1), %%"FF_REG_a
"             \n\t" 
 1509         "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 
 1510         "lea (%%"FF_REG_d
", %1, 4), %%"FF_REG_c
"\n\t" 
 1511         "add %1, %%"FF_REG_c
"                   \n\t" 
 1512 #if TEMPLATE_PP_SSE2 
 1513         "pxor %%xmm7, %%xmm7                    \n\t" 
 1514 #define REAL_DEINT_CUBIC(a,b,c,d,e)\ 
 1515         "movq " #a ", %%xmm0                    \n\t"\ 
 1516         "movq " #b ", %%xmm1                    \n\t"\ 
 1517         "movq " #d ", %%xmm2                    \n\t"\ 
 1518         "movq " #e ", %%xmm3                    \n\t"\ 
 1519         "pavgb %%xmm2, %%xmm1                   \n\t"\ 
 1520         "pavgb %%xmm3, %%xmm0                   \n\t"\ 
 1521         "punpcklbw %%xmm7, %%xmm0               \n\t"\ 
 1522         "punpcklbw %%xmm7, %%xmm1               \n\t"\ 
 1523         "psubw %%xmm1, %%xmm0                   \n\t"\ 
 1524         "psraw $3, %%xmm0                       \n\t"\ 
 1525         "psubw %%xmm0, %%xmm1                   \n\t"\ 
 1526         "packuswb %%xmm1, %%xmm1                \n\t"\ 
 1527         "movlps %%xmm1, " #c "                  \n\t" 
 1528 #else //TEMPLATE_PP_SSE2 
 1529         "pxor %%mm7, %%mm7                      \n\t" 
 1533 #define REAL_DEINT_CUBIC(a,b,c,d,e)\ 
 1534         "movq " #a ", %%mm0                     \n\t"\ 
 1535         "movq " #b ", %%mm1                     \n\t"\ 
 1536         "movq " #d ", %%mm2                     \n\t"\ 
 1537         "movq " #e ", %%mm3                     \n\t"\ 
 1538         PAVGB(%%mm2, %%mm1)                             \ 
 1539         PAVGB(%%mm3, %%mm0)                             \ 
 1540         "movq %%mm0, %%mm2                      \n\t"\ 
 1541         "punpcklbw %%mm7, %%mm0                 \n\t"\ 
 1542         "punpckhbw %%mm7, %%mm2                 \n\t"\ 
 1543         "movq %%mm1, %%mm3                      \n\t"\ 
 1544         "punpcklbw %%mm7, %%mm1                 \n\t"\ 
 1545         "punpckhbw %%mm7, %%mm3                 \n\t"\ 
 1546         "psubw %%mm1, %%mm0                     \n\t"   \ 
 1547         "psubw %%mm3, %%mm2                     \n\t"   \ 
 1548         "psraw $3, %%mm0                        \n\t"   \ 
 1549         "psraw $3, %%mm2                        \n\t"   \ 
 1550         "psubw %%mm0, %%mm1                     \n\t"   \ 
 1551         "psubw %%mm2, %%mm3                     \n\t"   \ 
 1552         "packuswb %%mm3, %%mm1                  \n\t"\ 
 1553         "movq %%mm1, " #c "                     \n\t" 
 1554 #endif //TEMPLATE_PP_SSE2 
 1555 #define DEINT_CUBIC(a,b,c,d,e)  REAL_DEINT_CUBIC(a,b,c,d,e) 
 1557 DEINT_CUBIC((%0)           , (%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4)    , (%%FF_REGd, %1))
 
 1558 DEINT_CUBIC((%%FF_REGa, %1), (%0, %1, 4)    , (%%FF_REGd)       , (%%FF_REGd, %1), (%0, %1, 8))
 
 1559 DEINT_CUBIC((%0, %1, 4)    , (%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8)    , (%%FF_REGc))
 
 1560 DEINT_CUBIC((%%FF_REGd, %1), (%0, %1, 8)    , (%%FF_REGd, %1, 4), (%%FF_REGc)    , (%%FF_REGc, %1, 2))
 
 1565         XMM_CLOBBERS(
"%xmm0", 
"%xmm1", 
"%xmm2", 
"%xmm3", 
"%xmm7",)
 
 1567         "%"FF_REG_a, 
"%"FF_REG_d, 
"%"FF_REG_c
 
 1569 #undef REAL_DEINT_CUBIC 
 1570 #else //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
 1575         src[stride*5] = av_clip_uint8((-
src[stride*2] + 9*
src[stride*4] + 9*
src[stride*6] - 
src[stride*8])>>4);
 
 1576         src[stride*7] = av_clip_uint8((-
src[stride*4] + 9*
src[stride*6] + 9*
src[stride*8] - 
src[stride*10])>>4);
 
 1577         src[stride*9] = av_clip_uint8((-
src[stride*6] + 9*
src[stride*8] + 9*
src[stride*10] - 
src[stride*12])>>4);
 
 1580 #endif //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
 1592 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
 1595         "lea (%0, %1), %%"FF_REG_a
"             \n\t" 
 1596         "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 
 1597         "pxor %%mm7, %%mm7                      \n\t" 
 1598         "movq (%2), %%mm0                       \n\t" 
 1602 #define REAL_DEINT_FF(a,b,c,d)\ 
 1603         "movq " #a ", %%mm1                     \n\t"\ 
 1604         "movq " #b ", %%mm2                     \n\t"\ 
 1605         "movq " #c ", %%mm3                     \n\t"\ 
 1606         "movq " #d ", %%mm4                     \n\t"\ 
 1607         PAVGB(%%mm3, %%mm1)                          \ 
 1608         PAVGB(%%mm4, %%mm0)                          \ 
 1609         "movq %%mm0, %%mm3                      \n\t"\ 
 1610         "punpcklbw %%mm7, %%mm0                 \n\t"\ 
 1611         "punpckhbw %%mm7, %%mm3                 \n\t"\ 
 1612         "movq %%mm1, %%mm4                      \n\t"\ 
 1613         "punpcklbw %%mm7, %%mm1                 \n\t"\ 
 1614         "punpckhbw %%mm7, %%mm4                 \n\t"\ 
 1615         "psllw $2, %%mm1                        \n\t"\ 
 1616         "psllw $2, %%mm4                        \n\t"\ 
 1617         "psubw %%mm0, %%mm1                     \n\t"\ 
 1618         "psubw %%mm3, %%mm4                     \n\t"\ 
 1619         "movq %%mm2, %%mm5                      \n\t"\ 
 1620         "movq %%mm2, %%mm0                      \n\t"\ 
 1621         "punpcklbw %%mm7, %%mm2                 \n\t"\ 
 1622         "punpckhbw %%mm7, %%mm5                 \n\t"\ 
 1623         "paddw %%mm2, %%mm1                     \n\t"\ 
 1624         "paddw %%mm5, %%mm4                     \n\t"\ 
 1625         "psraw $2, %%mm1                        \n\t"\ 
 1626         "psraw $2, %%mm4                        \n\t"\ 
 1627         "packuswb %%mm4, %%mm1                  \n\t"\ 
 1628         "movq %%mm1, " #b "                     \n\t"\ 
 1630 #define DEINT_FF(a,b,c,d)  REAL_DEINT_FF(a,b,c,d) 
 1632 DEINT_FF((%0)           , (%%FF_REGa)       , (%%FF_REGa, %1), (%%FF_REGa, %1, 2))
 
 1633 DEINT_FF((%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4)    , (%%FF_REGd)       )
 
 1634 DEINT_FF((%0, %1, 4)    , (%%FF_REGd)       , (%%FF_REGd, %1), (%%FF_REGd, %1, 2))
 
 1635 DEINT_FF((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8)    , (%%FF_REGd, %1, 4))
 
 1637         "movq %%mm0, (%2)                       \n\t" 
 1639         : 
"%"FF_REG_a, 
"%"FF_REG_d
 
 1641 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
 1659 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
 1671 #if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 
 1674         "lea (%0, %1), %%"FF_REG_a
"             \n\t" 
 1675         "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 
 1676         "pxor %%mm7, %%mm7                      \n\t" 
 1677         "movq (%2), %%mm0                       \n\t" 
 1678         "movq (%3), %%mm1                       \n\t" 
 1682 #define REAL_DEINT_L5(t1,t2,a,b,c)\ 
 1683         "movq " #a ", %%mm2                     \n\t"\ 
 1684         "movq " #b ", %%mm3                     \n\t"\ 
 1685         "movq " #c ", %%mm4                     \n\t"\ 
 1688         "movq %%mm2, %%mm5                      \n\t"\ 
 1689         "movq %%mm2, " #t1 "                    \n\t"\ 
 1690         "punpcklbw %%mm7, %%mm2                 \n\t"\ 
 1691         "punpckhbw %%mm7, %%mm5                 \n\t"\ 
 1692         "movq %%mm2, %%mm6                      \n\t"\ 
 1693         "paddw %%mm2, %%mm2                     \n\t"\ 
 1694         "paddw %%mm6, %%mm2                     \n\t"\ 
 1695         "movq %%mm5, %%mm6                      \n\t"\ 
 1696         "paddw %%mm5, %%mm5                     \n\t"\ 
 1697         "paddw %%mm6, %%mm5                     \n\t"\ 
 1698         "movq %%mm3, %%mm6                      \n\t"\ 
 1699         "punpcklbw %%mm7, %%mm3                 \n\t"\ 
 1700         "punpckhbw %%mm7, %%mm6                 \n\t"\ 
 1701         "paddw %%mm3, %%mm3                     \n\t"\ 
 1702         "paddw %%mm6, %%mm6                     \n\t"\ 
 1703         "paddw %%mm3, %%mm2                     \n\t"\ 
 1704         "paddw %%mm6, %%mm5                     \n\t"\ 
 1705         "movq %%mm4, %%mm6                      \n\t"\ 
 1706         "punpcklbw %%mm7, %%mm4                 \n\t"\ 
 1707         "punpckhbw %%mm7, %%mm6                 \n\t"\ 
 1708         "psubw %%mm4, %%mm2                     \n\t"\ 
 1709         "psubw %%mm6, %%mm5                     \n\t"\ 
 1710         "psraw $2, %%mm2                        \n\t"\ 
 1711         "psraw $2, %%mm5                        \n\t"\ 
 1712         "packuswb %%mm5, %%mm2                  \n\t"\ 
 1713         "movq %%mm2, " #a "                     \n\t"\ 
 1715 #define DEINT_L5(t1,t2,a,b,c)  REAL_DEINT_L5(t1,t2,a,b,c) 
 1717 DEINT_L5(%%mm0, %%mm1, (%0)              , (%%FF_REGa)       , (%%FF_REGa, %1)   )
 
 1718 DEINT_L5(%%mm1, %%mm0, (%%FF_REGa)       , (%%FF_REGa, %1)   , (%%FF_REGa, %1, 2))
 
 1719 DEINT_L5(%%mm0, %%mm1, (%%FF_REGa, %1)   , (%%FF_REGa, %1, 2), (%0, %1, 4)   )
 
 1720 DEINT_L5(%%mm1, %%mm0, (%%FF_REGa, %1, 2), (%0, %1, 4)       , (%%FF_REGd)       )
 
 1721 DEINT_L5(%%mm0, %%mm1, (%0, %1, 4)       , (%%FF_REGd)       , (%%FF_REGd, %1)   )
 
 1722 DEINT_L5(%%mm1, %%mm0, (%%FF_REGd)       , (%%FF_REGd, %1)   , (%%FF_REGd, %1, 2))
 
 1723 DEINT_L5(%%mm0, %%mm1, (%%FF_REGd, %1)   , (%%FF_REGd, %1, 2), (%0, %1, 8)   )
 
 1724 DEINT_L5(%%mm1, %%mm0, (%%FF_REGd, %1, 2), (%0, %1, 8)       , (%%FF_REGd, %1, 4))
 
 1726         "movq %%mm0, (%2)                       \n\t" 
 1727         "movq %%mm1, (%3)                       \n\t" 
 1729         : 
"%"FF_REG_a, 
"%"FF_REG_d
 
 1731 #else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 
 1741         src[stride*1]= av_clip_uint8((-(t2 + 
src[stride*3]) + 2*(t3 + 
src[stride*2]) + 6*t1 + 4)>>3);
 
 1743         src[stride*2]= av_clip_uint8((-(t3 + 
src[stride*4]) + 2*(t1 + 
src[stride*3]) + 6*t2 + 4)>>3);
 
 1745         src[stride*3]= av_clip_uint8((-(t1 + 
src[stride*5]) + 2*(t2 + 
src[stride*4]) + 6*t3 + 4)>>3);
 
 1747         src[stride*4]= av_clip_uint8((-(t2 + 
src[stride*6]) + 2*(t3 + 
src[stride*5]) + 6*t1 + 4)>>3);
 
 1749         src[stride*5]= av_clip_uint8((-(t3 + 
src[stride*7]) + 2*(t1 + 
src[stride*6]) + 6*t2 + 4)>>3);
 
 1751         src[stride*6]= av_clip_uint8((-(t1 + 
src[stride*8]) + 2*(t2 + 
src[stride*7]) + 6*t3 + 4)>>3);
 
 1753         src[stride*7]= av_clip_uint8((-(t2 + 
src[stride*9]) + 2*(t3 + 
src[stride*8]) + 6*t1 + 4)>>3);
 
 1760 #endif //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 
 1772 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
 1775         "lea (%0, %1), %%"FF_REG_a
"             \n\t" 
 1776         "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 
 1780         "movq (%2), %%mm0                       \n\t"  
 1781         "movq (%%"FF_REG_a
"), %%mm1             \n\t"  
 1783         "movq (%0), %%mm2                       \n\t"  
 1785         "movq %%mm0, (%0)                       \n\t" 
 1786         "movq (%%"FF_REG_a
", %1), %%mm0         \n\t"  
 1789         "movq %%mm2, (%%"FF_REG_a
")             \n\t" 
 1790         "movq (%%"FF_REG_a
", %1, 2), %%mm2      \n\t"  
 1793         "movq %%mm1, (%%"FF_REG_a
", %1)         \n\t" 
 1794         "movq (%0, %1, 4), %%mm1                \n\t"  
 1797         "movq %%mm0, (%%"FF_REG_a
", %1, 2)      \n\t" 
 1798         "movq (%%"FF_REG_d
"), %%mm0             \n\t"  
 1801         "movq %%mm2, (%0, %1, 4)                \n\t" 
 1802         "movq (%%"FF_REG_d
", %1), %%mm2         \n\t"  
 1805         "movq %%mm1, (%%"FF_REG_d
")             \n\t" 
 1806         "movq (%%"FF_REG_d
", %1, 2), %%mm1      \n\t"  
 1809         "movq %%mm0, (%%"FF_REG_d
", %1)         \n\t" 
 1810         "movq (%0, %1, 8), %%mm0                \n\t"  
 1813         "movq %%mm2, (%%"FF_REG_d
", %1, 2)      \n\t" 
 1814         "movq %%mm1, (%2)                       \n\t" 
 1817         : 
"%"FF_REG_a, 
"%"FF_REG_d
 
 1819 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
 1827         a= (a&
c) + (((a^c)&0xFEFEFEFEUL)>>1);
 
 1828         *(uint32_t*)&
src[
stride*0]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
 
 1831         b= (a&
b) + (((a^b)&0xFEFEFEFEUL)>>1);
 
 1832         *(uint32_t*)&
src[
stride*1]= (c|b) - (((c^
b)&0xFEFEFEFEUL)>>1);
 
 1835         c= (b&
c) + (((b^c)&0xFEFEFEFEUL)>>1);
 
 1836         *(uint32_t*)&
src[
stride*2]= (c|a) - (((c^
a)&0xFEFEFEFEUL)>>1);
 
 1839         a= (a&
c) + (((a^c)&0xFEFEFEFEUL)>>1);
 
 1840         *(uint32_t*)&
src[
stride*3]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
 
 1843         b= (a&
b) + (((a^b)&0xFEFEFEFEUL)>>1);
 
 1844         *(uint32_t*)&
src[
stride*4]= (c|b) - (((c^
b)&0xFEFEFEFEUL)>>1);
 
 1847         c= (b&
c) + (((b^c)&0xFEFEFEFEUL)>>1);
 
 1848         *(uint32_t*)&
src[
stride*5]= (c|a) - (((c^
a)&0xFEFEFEFEUL)>>1);
 
 1851         a= (a&
c) + (((a^c)&0xFEFEFEFEUL)>>1);
 
 1852         *(uint32_t*)&
src[
stride*6]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
 
 1855         b= (a&
b) + (((a^b)&0xFEFEFEFEUL)>>1);
 
 1856         *(uint32_t*)&
src[
stride*7]= (c|b) - (((c^
b)&0xFEFEFEFEUL)>>1);
 
 1862 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 
 1875 #if TEMPLATE_PP_MMXEXT 
 1877         "lea (%0, %1), %%"FF_REG_a
"             \n\t" 
 1878         "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 
 1882         "movq (%0), %%mm0                       \n\t" 
 1883         "movq (%%"FF_REG_a
", %1), %%mm2         \n\t" 
 1884         "movq (%%"FF_REG_a
"), %%mm1             \n\t" 
 1885         "movq %%mm0, %%mm3                      \n\t" 
 1886         "pmaxub %%mm1, %%mm0                    \n\t" 
 1887         "pminub %%mm3, %%mm1                    \n\t" 
 1888         "pmaxub %%mm2, %%mm1                    \n\t" 
 1889         "pminub %%mm1, %%mm0                    \n\t" 
 1890         "movq %%mm0, (%%"FF_REG_a
")             \n\t" 
 1892         "movq (%0, %1, 4), %%mm0                \n\t" 
 1893         "movq (%%"FF_REG_a
", %1, 2), %%mm1      \n\t" 
 1894         "movq %%mm2, %%mm3                      \n\t" 
 1895         "pmaxub %%mm1, %%mm2                    \n\t" 
 1896         "pminub %%mm3, %%mm1                    \n\t" 
 1897         "pmaxub %%mm0, %%mm1                    \n\t" 
 1898         "pminub %%mm1, %%mm2                    \n\t" 
 1899         "movq %%mm2, (%%"FF_REG_a
", %1, 2)      \n\t" 
 1901         "movq (%%"FF_REG_d
"), %%mm2             \n\t" 
 1902         "movq (%%"FF_REG_d
", %1), %%mm1         \n\t" 
 1903         "movq %%mm2, %%mm3                      \n\t" 
 1904         "pmaxub %%mm0, %%mm2                    \n\t" 
 1905         "pminub %%mm3, %%mm0                    \n\t" 
 1906         "pmaxub %%mm1, %%mm0                    \n\t" 
 1907         "pminub %%mm0, %%mm2                    \n\t" 
 1908         "movq %%mm2, (%%"FF_REG_d
")             \n\t" 
 1910         "movq (%%"FF_REG_d
", %1, 2), %%mm2      \n\t" 
 1911         "movq (%0, %1, 8), %%mm0                \n\t" 
 1912         "movq %%mm2, %%mm3                      \n\t" 
 1913         "pmaxub %%mm0, %%mm2                    \n\t" 
 1914         "pminub %%mm3, %%mm0                    \n\t" 
 1915         "pmaxub %%mm1, %%mm0                    \n\t" 
 1916         "pminub %%mm0, %%mm2                    \n\t" 
 1917         "movq %%mm2, (%%"FF_REG_d
", %1, 2)      \n\t" 
 1921         : 
"%"FF_REG_a, 
"%"FF_REG_d
 
 1924 #else // MMX without MMX2 
 1926         "lea (%0, %1), %%"FF_REG_a
"             \n\t" 
 1927         "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 
 1930         "pxor %%mm7, %%mm7                      \n\t" 
 1932 #define REAL_MEDIAN(a,b,c)\ 
 1933         "movq " #a ", %%mm0                     \n\t"\ 
 1934         "movq " #b ", %%mm2                     \n\t"\ 
 1935         "movq " #c ", %%mm1                     \n\t"\ 
 1936         "movq %%mm0, %%mm3                      \n\t"\ 
 1937         "movq %%mm1, %%mm4                      \n\t"\ 
 1938         "movq %%mm2, %%mm5                      \n\t"\ 
 1939         "psubusb %%mm1, %%mm3                   \n\t"\ 
 1940         "psubusb %%mm2, %%mm4                   \n\t"\ 
 1941         "psubusb %%mm0, %%mm5                   \n\t"\ 
 1942         "pcmpeqb %%mm7, %%mm3                   \n\t"\ 
 1943         "pcmpeqb %%mm7, %%mm4                   \n\t"\ 
 1944         "pcmpeqb %%mm7, %%mm5                   \n\t"\ 
 1945         "movq %%mm3, %%mm6                      \n\t"\ 
 1946         "pxor %%mm4, %%mm3                      \n\t"\ 
 1947         "pxor %%mm5, %%mm4                      \n\t"\ 
 1948         "pxor %%mm6, %%mm5                      \n\t"\ 
 1949         "por %%mm3, %%mm1                       \n\t"\ 
 1950         "por %%mm4, %%mm2                       \n\t"\ 
 1951         "por %%mm5, %%mm0                       \n\t"\ 
 1952         "pand %%mm2, %%mm0                      \n\t"\ 
 1953         "pand %%mm1, %%mm0                      \n\t"\ 
 1954         "movq %%mm0, " #b "                     \n\t" 
 1955 #define MEDIAN(a,b,c)  REAL_MEDIAN(a,b,c) 
 1957 MEDIAN((%0)           , (%%FF_REGa)       , (%%FF_REGa, %1))
 
 1958 MEDIAN((%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4))
 
 1959 MEDIAN((%0, %1, 4)    , (%%FF_REGd)       , (%%FF_REGd, %1))
 
 1960 MEDIAN((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8))
 
 1963         : 
"%"FF_REG_a, 
"%"FF_REG_d
 
 1965 #endif //TEMPLATE_PP_MMXEXT 
 1966 #else //TEMPLATE_PP_MMX 
 1972         for (y=0; y<4; y++){
 
 1973             int a, 
b, 
c, d, e, f;
 
 1980             colsrc[
stride  ] = (a|(d^f)) & (b|(d^e)) & (c|(e^f));
 
 1985 #endif //TEMPLATE_PP_MMX 
 1995         "lea (%0, %1), %%"FF_REG_a
"             \n\t" 
 1998         "movq (%0), %%mm0                       \n\t"  
 1999         "movq (%%"FF_REG_a
"), %%mm1             \n\t"  
 2000         "movq %%mm0, %%mm2                      \n\t"  
 2001         "punpcklbw %%mm1, %%mm0                 \n\t"  
 2002         "punpckhbw %%mm1, %%mm2                 \n\t"  
 2004         "movq (%%"FF_REG_a
", %1), %%mm1         \n\t" 
 2005         "movq (%%"FF_REG_a
", %1, 2), %%mm3      \n\t" 
 2006         "movq %%mm1, %%mm4                      \n\t" 
 2007         "punpcklbw %%mm3, %%mm1                 \n\t" 
 2008         "punpckhbw %%mm3, %%mm4                 \n\t" 
 2010         "movq %%mm0, %%mm3                      \n\t" 
 2011         "punpcklwd %%mm1, %%mm0                 \n\t" 
 2012         "punpckhwd %%mm1, %%mm3                 \n\t" 
 2013         "movq %%mm2, %%mm1                      \n\t" 
 2014         "punpcklwd %%mm4, %%mm2                 \n\t" 
 2015         "punpckhwd %%mm4, %%mm1                 \n\t" 
 2017         "movd %%mm0, 128(%2)                    \n\t" 
 2018         "psrlq $32, %%mm0                       \n\t" 
 2019         "movd %%mm0, 144(%2)                    \n\t" 
 2020         "movd %%mm3, 160(%2)                    \n\t" 
 2021         "psrlq $32, %%mm3                       \n\t" 
 2022         "movd %%mm3, 176(%2)                    \n\t" 
 2023         "movd %%mm3, 48(%3)                     \n\t" 
 2024         "movd %%mm2, 192(%2)                    \n\t" 
 2025         "movd %%mm2, 64(%3)                     \n\t" 
 2026         "psrlq $32, %%mm2                       \n\t" 
 2027         "movd %%mm2, 80(%3)                     \n\t" 
 2028         "movd %%mm1, 96(%3)                     \n\t" 
 2029         "psrlq $32, %%mm1                       \n\t" 
 2030         "movd %%mm1, 112(%3)                    \n\t" 
 2032         "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_a
"\n\t" 
 2034         "movq (%0, %1, 4), %%mm0                \n\t"  
 2035         "movq (%%"FF_REG_a
"), %%mm1             \n\t"  
 2036         "movq %%mm0, %%mm2                      \n\t"  
 2037         "punpcklbw %%mm1, %%mm0                 \n\t"  
 2038         "punpckhbw %%mm1, %%mm2                 \n\t"  
 2040         "movq (%%"FF_REG_a
", %1), %%mm1         \n\t" 
 2041         "movq (%%"FF_REG_a
", %1, 2), %%mm3      \n\t" 
 2042         "movq %%mm1, %%mm4                      \n\t" 
 2043         "punpcklbw %%mm3, %%mm1                 \n\t" 
 2044         "punpckhbw %%mm3, %%mm4                 \n\t" 
 2046         "movq %%mm0, %%mm3                      \n\t" 
 2047         "punpcklwd %%mm1, %%mm0                 \n\t" 
 2048         "punpckhwd %%mm1, %%mm3                 \n\t" 
 2049         "movq %%mm2, %%mm1                      \n\t" 
 2050         "punpcklwd %%mm4, %%mm2                 \n\t" 
 2051         "punpckhwd %%mm4, %%mm1                 \n\t" 
 2053         "movd %%mm0, 132(%2)                    \n\t" 
 2054         "psrlq $32, %%mm0                       \n\t" 
 2055         "movd %%mm0, 148(%2)                    \n\t" 
 2056         "movd %%mm3, 164(%2)                    \n\t" 
 2057         "psrlq $32, %%mm3                       \n\t" 
 2058         "movd %%mm3, 180(%2)                    \n\t" 
 2059         "movd %%mm3, 52(%3)                     \n\t" 
 2060         "movd %%mm2, 196(%2)                    \n\t" 
 2061         "movd %%mm2, 68(%3)                     \n\t" 
 2062         "psrlq $32, %%mm2                       \n\t" 
 2063         "movd %%mm2, 84(%3)                     \n\t" 
 2064         "movd %%mm1, 100(%3)                    \n\t" 
 2065         "psrlq $32, %%mm1                       \n\t" 
 2066         "movd %%mm1, 116(%3)                    \n\t" 
 2069         :: 
"r" (
src), 
"r" ((
x86_reg)srcStride), 
"r" (dst1), 
"r" (dst2)
 
 2080         "lea (%0, %1), %%"FF_REG_a
"             \n\t" 
 2081         "lea (%%"FF_REG_a
",%1,4), %%"FF_REG_d
"  \n\t" 
 2084         "movq (%2), %%mm0                       \n\t"  
 2085         "movq 16(%2), %%mm1                     \n\t"  
 2086         "movq %%mm0, %%mm2                      \n\t"  
 2087         "punpcklbw %%mm1, %%mm0                 \n\t"  
 2088         "punpckhbw %%mm1, %%mm2                 \n\t"  
 2090         "movq 32(%2), %%mm1                     \n\t" 
 2091         "movq 48(%2), %%mm3                     \n\t" 
 2092         "movq %%mm1, %%mm4                      \n\t" 
 2093         "punpcklbw %%mm3, %%mm1                 \n\t" 
 2094         "punpckhbw %%mm3, %%mm4                 \n\t" 
 2096         "movq %%mm0, %%mm3                      \n\t" 
 2097         "punpcklwd %%mm1, %%mm0                 \n\t" 
 2098         "punpckhwd %%mm1, %%mm3                 \n\t" 
 2099         "movq %%mm2, %%mm1                      \n\t" 
 2100         "punpcklwd %%mm4, %%mm2                 \n\t" 
 2101         "punpckhwd %%mm4, %%mm1                 \n\t" 
 2103         "movd %%mm0, (%0)                       \n\t" 
 2104         "psrlq $32, %%mm0                       \n\t" 
 2105         "movd %%mm0, (%%"FF_REG_a
")             \n\t" 
 2106         "movd %%mm3, (%%"FF_REG_a
", %1)         \n\t" 
 2107         "psrlq $32, %%mm3                       \n\t" 
 2108         "movd %%mm3, (%%"FF_REG_a
", %1, 2)      \n\t" 
 2109         "movd %%mm2, (%0, %1, 4)                \n\t" 
 2110         "psrlq $32, %%mm2                       \n\t" 
 2111         "movd %%mm2, (%%"FF_REG_d
")             \n\t" 
 2112         "movd %%mm1, (%%"FF_REG_d
", %1)         \n\t" 
 2113         "psrlq $32, %%mm1                       \n\t" 
 2114         "movd %%mm1, (%%"FF_REG_d
", %1, 2)      \n\t" 
 2117         "movq 64(%2), %%mm0                     \n\t"  
 2118         "movq 80(%2), %%mm1                     \n\t"  
 2119         "movq %%mm0, %%mm2                      \n\t"  
 2120         "punpcklbw %%mm1, %%mm0                 \n\t"  
 2121         "punpckhbw %%mm1, %%mm2                 \n\t"  
 2123         "movq 96(%2), %%mm1                     \n\t" 
 2124         "movq 112(%2), %%mm3                    \n\t" 
 2125         "movq %%mm1, %%mm4                      \n\t" 
 2126         "punpcklbw %%mm3, %%mm1                 \n\t" 
 2127         "punpckhbw %%mm3, %%mm4                 \n\t" 
 2129         "movq %%mm0, %%mm3                      \n\t" 
 2130         "punpcklwd %%mm1, %%mm0                 \n\t" 
 2131         "punpckhwd %%mm1, %%mm3                 \n\t" 
 2132         "movq %%mm2, %%mm1                      \n\t" 
 2133         "punpcklwd %%mm4, %%mm2                 \n\t" 
 2134         "punpckhwd %%mm4, %%mm1                 \n\t" 
 2136         "movd %%mm0, 4(%0)                      \n\t" 
 2137         "psrlq $32, %%mm0                       \n\t" 
 2138         "movd %%mm0, 4(%%"FF_REG_a
")            \n\t" 
 2139         "movd %%mm3, 4(%%"FF_REG_a
", %1)        \n\t" 
 2140         "psrlq $32, %%mm3                       \n\t" 
 2141         "movd %%mm3, 4(%%"FF_REG_a
", %1, 2)     \n\t" 
 2142         "movd %%mm2, 4(%0, %1, 4)               \n\t" 
 2143         "psrlq $32, %%mm2                       \n\t" 
 2144         "movd %%mm2, 4(%%"FF_REG_d
")            \n\t" 
 2145         "movd %%mm1, 4(%%"FF_REG_d
", %1)        \n\t" 
 2146         "psrlq $32, %%mm1                       \n\t" 
 2147         "movd %%mm1, 4(%%"FF_REG_d
", %1, 2)     \n\t" 
 2149         :: 
"r" (dst), 
"r" ((
x86_reg)dstStride), 
"r" (
src)
 
 2150         : 
"%"FF_REG_a, 
"%"FF_REG_d
 
 2153 #endif //TEMPLATE_PP_MMX 
 2156 #if !TEMPLATE_PP_ALTIVEC 
 2158                                     uint8_t *tempBlurred, uint32_t *tempBlurredPast, 
const int *maxNoise)
 
 2161     tempBlurredPast[127]= maxNoise[0];
 
 2162     tempBlurredPast[128]= maxNoise[1];
 
 2163     tempBlurredPast[129]= maxNoise[2];
 
 2165 #define FAST_L2_DIFF 
 2167 #if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 
 2169         "lea (%2, %2, 2), %%"FF_REG_a
"          \n\t"  
 2170         "lea (%2, %2, 4), %%"FF_REG_d
"          \n\t"  
 2171         "lea (%%"FF_REG_d
", %2, 2), %%"FF_REG_c
"\n\t"  
 2175 #ifdef L1_DIFF //needs mmx2 
 2176         "movq (%0), %%mm0                       \n\t"  
 2177         "psadbw (%1), %%mm0                     \n\t"  
 2178         "movq (%0, %2), %%mm1                   \n\t"  
 2179         "psadbw (%1, %2), %%mm1                 \n\t"  
 2180         "movq (%0, %2, 2), %%mm2                \n\t"  
 2181         "psadbw (%1, %2, 2), %%mm2              \n\t"  
 2182         "movq (%0, %%"FF_REG_a
"), %%mm3         \n\t"  
 2183         "psadbw (%1, %%"FF_REG_a
"), %%mm3       \n\t"  
 2185         "movq (%0, %2, 4), %%mm4                \n\t"  
 2186         "paddw %%mm1, %%mm0                     \n\t" 
 2187         "psadbw (%1, %2, 4), %%mm4              \n\t"  
 2188         "movq (%0, %%"FF_REG_d
"), %%mm5         \n\t"  
 2189         "paddw %%mm2, %%mm0                     \n\t" 
 2190         "psadbw (%1, %%"FF_REG_d
"), %%mm5       \n\t"  
 2191         "movq (%0, %%"FF_REG_a
", 2), %%mm6      \n\t"  
 2192         "paddw %%mm3, %%mm0                     \n\t" 
 2193         "psadbw (%1, %%"FF_REG_a
", 2), %%mm6    \n\t"  
 2194         "movq (%0, %%"FF_REG_c
"), %%mm7         \n\t"  
 2195         "paddw %%mm4, %%mm0                     \n\t" 
 2196         "psadbw (%1, %%"FF_REG_c
"), %%mm7       \n\t"  
 2197         "paddw %%mm5, %%mm6                     \n\t" 
 2198         "paddw %%mm7, %%mm6                     \n\t" 
 2199         "paddw %%mm6, %%mm0                     \n\t" 
 2201 #if defined (FAST_L2_DIFF) 
 2202         "pcmpeqb %%mm7, %%mm7                   \n\t" 
 2203         "movq "MANGLE(b80)
", %%mm6              \n\t" 
 2204         "pxor %%mm0, %%mm0                      \n\t" 
 2205 #define REAL_L2_DIFF_CORE(a, b)\ 
 2206         "movq " #a ", %%mm5                     \n\t"\ 
 2207         "movq " #b ", %%mm2                     \n\t"\ 
 2208         "pxor %%mm7, %%mm2                      \n\t"\ 
 2209         PAVGB(%%mm2, %%mm5)\ 
 2210         "paddb %%mm6, %%mm5                     \n\t"\ 
 2211         "movq %%mm5, %%mm2                      \n\t"\ 
 2212         "psllw $8, %%mm5                        \n\t"\ 
 2213         "pmaddwd %%mm5, %%mm5                   \n\t"\ 
 2214         "pmaddwd %%mm2, %%mm2                   \n\t"\ 
 2215         "paddd %%mm2, %%mm5                     \n\t"\ 
 2216         "psrld $14, %%mm5                       \n\t"\ 
 2217         "paddd %%mm5, %%mm0                     \n\t" 
 2219 #else //defined (FAST_L2_DIFF) 
 2220         "pxor %%mm7, %%mm7                      \n\t" 
 2221         "pxor %%mm0, %%mm0                      \n\t" 
 2222 #define REAL_L2_DIFF_CORE(a, b)\ 
 2223         "movq " #a ", %%mm5                     \n\t"\ 
 2224         "movq " #b ", %%mm2                     \n\t"\ 
 2225         "movq %%mm5, %%mm1                      \n\t"\ 
 2226         "movq %%mm2, %%mm3                      \n\t"\ 
 2227         "punpcklbw %%mm7, %%mm5                 \n\t"\ 
 2228         "punpckhbw %%mm7, %%mm1                 \n\t"\ 
 2229         "punpcklbw %%mm7, %%mm2                 \n\t"\ 
 2230         "punpckhbw %%mm7, %%mm3                 \n\t"\ 
 2231         "psubw %%mm2, %%mm5                     \n\t"\ 
 2232         "psubw %%mm3, %%mm1                     \n\t"\ 
 2233         "pmaddwd %%mm5, %%mm5                   \n\t"\ 
 2234         "pmaddwd %%mm1, %%mm1                   \n\t"\ 
 2235         "paddd %%mm1, %%mm5                     \n\t"\ 
 2236         "paddd %%mm5, %%mm0                     \n\t" 
 2238 #endif //defined (FAST_L2_DIFF) 
 2240 #define L2_DIFF_CORE(a, b)  REAL_L2_DIFF_CORE(a, b) 
 2242 L2_DIFF_CORE((%0)             , (%1))
 
 2243 L2_DIFF_CORE((%0, %2)         , (%1, %2))
 
 2244 L2_DIFF_CORE((%0, %2, 2)      , (%1, %2, 2))
 
 2245 L2_DIFF_CORE((%0, %%FF_REGa)  , (%1, %%FF_REGa))
 
 2246 L2_DIFF_CORE((%0, %2, 4)      , (%1, %2, 4))
 
 2247 L2_DIFF_CORE((%0, %%FF_REGd)  , (%1, %%FF_REGd))
 
 2248 L2_DIFF_CORE((%0, %%FF_REGa,2), (%1, %%FF_REGa,2))
 
 2249 L2_DIFF_CORE((%0, %%FF_REGc)  , (%1, %%FF_REGc))
 
 2253         "movq %%mm0, %%mm4                      \n\t" 
 2254         "psrlq $32, %%mm0                       \n\t" 
 2255         "paddd %%mm0, %%mm4                     \n\t" 
 2256         "movd %%mm4, %%ecx                      \n\t" 
 2257         "shll $2, %%ecx                         \n\t" 
 2258         "mov %3, %%"FF_REG_d
"                   \n\t" 
 2259         "addl -4(%%"FF_REG_d
"), %%ecx           \n\t" 
 2260         "addl 4(%%"FF_REG_d
"), %%ecx            \n\t" 
 2261         "addl -1024(%%"FF_REG_d
"), %%ecx        \n\t" 
 2262         "addl $4, %%ecx                         \n\t" 
 2263         "addl 1024(%%"FF_REG_d
"), %%ecx         \n\t" 
 2264         "shrl $3, %%ecx                         \n\t" 
 2265         "movl %%ecx, (%%"FF_REG_d
")             \n\t" 
 2270         "cmpl 512(%%"FF_REG_d
"), %%ecx          \n\t" 
 2272         "cmpl 516(%%"FF_REG_d
"), %%ecx          \n\t" 
 2275         "lea (%%"FF_REG_a
", %2, 2), %%"FF_REG_d
"\n\t"  
 2276         "lea (%%"FF_REG_d
", %2, 2), %%"FF_REG_c
"\n\t"  
 2277         "movq (%0), %%mm0                       \n\t"  
 2278         "movq (%0, %2), %%mm1                   \n\t"  
 2279         "movq (%0, %2, 2), %%mm2                \n\t"  
 2280         "movq (%0, %%"FF_REG_a
"), %%mm3         \n\t"  
 2281         "movq (%0, %2, 4), %%mm4                \n\t"  
 2282         "movq (%0, %%"FF_REG_d
"), %%mm5         \n\t"  
 2283         "movq (%0, %%"FF_REG_a
", 2), %%mm6      \n\t"  
 2284         "movq (%0, %%"FF_REG_c
"), %%mm7         \n\t"  
 2285         "movq %%mm0, (%1)                       \n\t"  
 2286         "movq %%mm1, (%1, %2)                   \n\t"  
 2287         "movq %%mm2, (%1, %2, 2)                \n\t"  
 2288         "movq %%mm3, (%1, %%"FF_REG_a
")         \n\t"  
 2289         "movq %%mm4, (%1, %2, 4)                \n\t"  
 2290         "movq %%mm5, (%1, %%"FF_REG_d
")         \n\t"  
 2291         "movq %%mm6, (%1, %%"FF_REG_a
", 2)      \n\t"  
 2292         "movq %%mm7, (%1, %%"FF_REG_c
")         \n\t"  
 2296         "lea (%%"FF_REG_a
", %2, 2), %%"FF_REG_d
"\n\t"  
 2297         "lea (%%"FF_REG_d
", %2, 2), %%"FF_REG_c
"\n\t"  
 2298         "movq (%0), %%mm0                       \n\t"  
 2300         "movq (%0, %2), %%mm1                   \n\t"  
 2301         PAVGB((%1, %2), %%mm1)                        
 
 2302         "movq (%0, %2, 2), %%mm2                \n\t"  
 2303         PAVGB((%1, %2, 2), %%mm2)                     
 
 2304         "movq (%0, %%"FF_REG_a
"), %%mm3         \n\t"  
 2305         PAVGB((%1, %%FF_REGa), %%mm3)                 
 
 2306         "movq (%0, %2, 4), %%mm4                \n\t"  
 2307         PAVGB((%1, %2, 4), %%mm4)                     
 
 2308         "movq (%0, %%"FF_REG_d
"), %%mm5         \n\t"  
 2309         PAVGB((%1, %%FF_REGd), %%mm5)                 
 
 2310         "movq (%0, %%"FF_REG_a
", 2), %%mm6      \n\t"  
 2311         PAVGB((%1, %%FF_REGa, 2), %%mm6)              
 
 2312         "movq (%0, %%"FF_REG_c
"), %%mm7         \n\t"  
 2313         PAVGB((%1, %%FF_REGc), %%mm7)                 
 
 2314         "movq %%mm0, (%1)                       \n\t"  
 2315         "movq %%mm1, (%1, %2)                   \n\t"  
 2316         "movq %%mm2, (%1, %2, 2)                \n\t"  
 2317         "movq %%mm3, (%1, %%"FF_REG_a
")         \n\t"  
 2318         "movq %%mm4, (%1, %2, 4)                \n\t"  
 2319         "movq %%mm5, (%1, %%"FF_REG_d
")         \n\t"  
 2320         "movq %%mm6, (%1, %%"FF_REG_a
", 2)      \n\t"  
 2321         "movq %%mm7, (%1, %%"FF_REG_c
")         \n\t"  
 2322         "movq %%mm0, (%0)                       \n\t"  
 2323         "movq %%mm1, (%0, %2)                   \n\t"  
 2324         "movq %%mm2, (%0, %2, 2)                \n\t"  
 2325         "movq %%mm3, (%0, %%"FF_REG_a
")         \n\t"  
 2326         "movq %%mm4, (%0, %2, 4)                \n\t"  
 2327         "movq %%mm5, (%0, %%"FF_REG_d
")         \n\t"  
 2328         "movq %%mm6, (%0, %%"FF_REG_a
", 2)      \n\t"  
 2329         "movq %%mm7, (%0, %%"FF_REG_c
")         \n\t"  
 2333         "cmpl 508(%%"FF_REG_d
"), %%ecx          \n\t" 
 2336         "lea (%%"FF_REG_a
", %2, 2), %%"FF_REG_d
"\n\t"  
 2337         "lea (%%"FF_REG_d
", %2, 2), %%"FF_REG_c
"\n\t"  
 2338         "movq (%0), %%mm0                       \n\t"  
 2339         "movq (%0, %2), %%mm1                   \n\t"  
 2340         "movq (%0, %2, 2), %%mm2                \n\t"  
 2341         "movq (%0, %%"FF_REG_a
"), %%mm3         \n\t"  
 2342         "movq (%1), %%mm4                       \n\t"  
 2343         "movq (%1, %2), %%mm5                   \n\t"  
 2344         "movq (%1, %2, 2), %%mm6                \n\t"  
 2345         "movq (%1, %%"FF_REG_a
"), %%mm7         \n\t"  
 2354         "movq %%mm0, (%1)                       \n\t"  
 2355         "movq %%mm1, (%1, %2)                   \n\t"  
 2356         "movq %%mm2, (%1, %2, 2)                \n\t"  
 2357         "movq %%mm3, (%1, %%"FF_REG_a
")         \n\t"  
 2358         "movq %%mm0, (%0)                       \n\t"  
 2359         "movq %%mm1, (%0, %2)                   \n\t"  
 2360         "movq %%mm2, (%0, %2, 2)                \n\t"  
 2361         "movq %%mm3, (%0, %%"FF_REG_a
")         \n\t"  
 2363         "movq (%0, %2, 4), %%mm0                \n\t"  
 2364         "movq (%0, %%"FF_REG_d
"), %%mm1         \n\t"  
 2365         "movq (%0, %%"FF_REG_a
", 2), %%mm2      \n\t"  
 2366         "movq (%0, %%"FF_REG_c
"), %%mm3         \n\t"  
 2367         "movq (%1, %2, 4), %%mm4                \n\t"  
 2368         "movq (%1, %%"FF_REG_d
"), %%mm5         \n\t"  
 2369         "movq (%1, %%"FF_REG_a
", 2), %%mm6      \n\t"  
 2370         "movq (%1, %%"FF_REG_c
"), %%mm7         \n\t"  
 2379         "movq %%mm0, (%1, %2, 4)                \n\t"  
 2380         "movq %%mm1, (%1, %%"FF_REG_d
")         \n\t"  
 2381         "movq %%mm2, (%1, %%"FF_REG_a
", 2)      \n\t"  
 2382         "movq %%mm3, (%1, %%"FF_REG_c
")         \n\t"  
 2383         "movq %%mm0, (%0, %2, 4)                \n\t"  
 2384         "movq %%mm1, (%0, %%"FF_REG_d
")         \n\t"  
 2385         "movq %%mm2, (%0, %%"FF_REG_a
", 2)      \n\t"  
 2386         "movq %%mm3, (%0, %%"FF_REG_c
")         \n\t"  
 2390         "lea (%%"FF_REG_a
", %2, 2), %%"FF_REG_d
"\n\t"  
 2391         "lea (%%"FF_REG_d
", %2, 2), %%"FF_REG_c
"\n\t"  
 2392         "movq (%0), %%mm0                       \n\t"  
 2393         "movq (%0, %2), %%mm1                   \n\t"  
 2394         "movq (%0, %2, 2), %%mm2                \n\t"  
 2395         "movq (%0, %%"FF_REG_a
"), %%mm3         \n\t"  
 2396         "movq (%1), %%mm4                       \n\t"  
 2397         "movq (%1, %2), %%mm5                   \n\t"  
 2398         "movq (%1, %2, 2), %%mm6                \n\t"  
 2399         "movq (%1, %%"FF_REG_a
"), %%mm7         \n\t"  
 2412         "movq %%mm0, (%1)                       \n\t"  
 2413         "movq %%mm1, (%1, %2)                   \n\t"  
 2414         "movq %%mm2, (%1, %2, 2)                \n\t"  
 2415         "movq %%mm3, (%1, %%"FF_REG_a
")         \n\t"  
 2416         "movq %%mm0, (%0)                       \n\t"  
 2417         "movq %%mm1, (%0, %2)                   \n\t"  
 2418         "movq %%mm2, (%0, %2, 2)                \n\t"  
 2419         "movq %%mm3, (%0, %%"FF_REG_a
")         \n\t"  
 2421         "movq (%0, %2, 4), %%mm0                \n\t"  
 2422         "movq (%0, %%"FF_REG_d
"), %%mm1         \n\t"  
 2423         "movq (%0, %%"FF_REG_a
", 2), %%mm2      \n\t"  
 2424         "movq (%0, %%"FF_REG_c
"), %%mm3         \n\t"  
 2425         "movq (%1, %2, 4), %%mm4                \n\t"  
 2426         "movq (%1, %%"FF_REG_d
"), %%mm5         \n\t"  
 2427         "movq (%1, %%"FF_REG_a
", 2), %%mm6      \n\t"  
 2428         "movq (%1, %%"FF_REG_c
"), %%mm7         \n\t"  
 2441         "movq %%mm0, (%1, %2, 4)                \n\t"  
 2442         "movq %%mm1, (%1, %%"FF_REG_d
")         \n\t"  
 2443         "movq %%mm2, (%1, %%"FF_REG_a
", 2)      \n\t"  
 2444         "movq %%mm3, (%1, %%"FF_REG_c
")         \n\t"  
 2445         "movq %%mm0, (%0, %2, 4)                \n\t"  
 2446         "movq %%mm1, (%0, %%"FF_REG_d
")         \n\t"  
 2447         "movq %%mm2, (%0, %%"FF_REG_a
", 2)      \n\t"  
 2448         "movq %%mm3, (%0, %%"FF_REG_c
")         \n\t"  
 2452         :: 
"r" (
src), 
"r" (tempBlurred), 
"r"((
x86_reg)stride), 
"m" (tempBlurredPast)
 
 2454         : 
"%"FF_REG_a, 
"%"FF_REG_d, 
"%"FF_REG_c, 
"memory" 
 2456 #else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 
 2467             int cur= src[ x + y*
stride ];
 
 2479         +(*(tempBlurredPast-256))
 
 2480         +(*(tempBlurredPast-1))+ (*(tempBlurredPast+1))
 
 2481         +(*(tempBlurredPast+256))
 
 2493     if(d > maxNoise[1]){
 
 2494         if(d < maxNoise[2]){
 
 2499                     int cur= src[ x + y*
stride ];
 
 2500                     tempBlurred[ x + y*
stride ]=
 
 2514         if(d < maxNoise[0]){
 
 2519                     int cur= src[ x + y*
stride ];
 
 2520                     tempBlurred[ x + y*
stride ]=
 
 2522                         (ref*7 + cur + 4)>>3;
 
 2530                     int cur= src[ x + y*
stride ];
 
 2531                     tempBlurred[ x + y*
stride ]=
 
 2533                         (ref*3 + cur + 2)>>2;
 
 2539 #endif //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 
 2541 #endif //TEMPLATE_PP_ALTIVEC 
 2548     int64_t dc_mask, eq_mask, both_masks;
 
 2549     int64_t sums[10*8*2];
 
 2553         "movq %0, %%mm7                         \n\t" 
 2554         "movq %1, %%mm6                         \n\t" 
 2555         : : 
"m" (c->mmxDcOffset[c->nonBQP]),  
"m" (c->mmxDcThreshold[c->nonBQP])
 
 2559         "lea (%2, %3), %%"FF_REG_a
"             \n\t" 
 2563         "movq (%2), %%mm0                       \n\t" 
 2564         "movq (%%"FF_REG_a
"), %%mm1             \n\t" 
 2565         "movq %%mm1, %%mm3                      \n\t" 
 2566         "movq %%mm1, %%mm4                      \n\t" 
 2567         "psubb %%mm1, %%mm0                     \n\t"  
 2568         "paddb %%mm7, %%mm0                     \n\t" 
 2569         "pcmpgtb %%mm6, %%mm0                   \n\t" 
 2571         "movq (%%"FF_REG_a
",%3), %%mm2          \n\t" 
 2572         PMAXUB(%%mm2, %%mm4)
 
 2573         PMINUB(%%mm2, %%mm3, %%mm5)
 
 2574         "psubb %%mm2, %%mm1                     \
n\t"
 
 2575         "paddb %%mm7, %%mm1                     \
n\t"
 
 2576         "pcmpgtb %%mm6, %%mm1                   \
n\t"
 
 2577         "paddb %%mm1, %%mm0                     \
n\t"
 
 2579         "movq (%%"FF_REG_a", %3, 2), %%mm1      \
n\t"
 
 2580         PMAXUB(%%mm1, %%mm4)
 
 2581         PMINUB(%%mm1, %%mm3, %%mm5)
 
 2582         "psubb %%mm1, %%mm2                     \
n\t"
 
 2583         "paddb %%mm7, %%mm2                     \
n\t"
 
 2584         "pcmpgtb %%mm6, %%mm2                   \
n\t"
 
 2585         "paddb %%mm2, %%mm0                     \
n\t"
 
 2587         "lea (%%"FF_REG_a", %3, 4), %%"FF_REG_a"\
n\t"
 
 2589         "movq (%2, %3, 4), %%mm2                \
n\t"
 
 2590         PMAXUB(%%mm2, %%mm4)
 
 2591         PMINUB(%%mm2, %%mm3, %%mm5)
 
 2592         "psubb %%mm2, %%mm1                     \
n\t"
 
 2593         "paddb %%mm7, %%mm1                     \
n\t"
 
 2594         "pcmpgtb %%mm6, %%mm1                   \
n\t"
 
 2595         "paddb %%mm1, %%mm0                     \
n\t"
 
 2597         "movq (%%"FF_REG_a"), %%mm1             \
n\t"
 
 2598         PMAXUB(%%mm1, %%mm4)
 
 2599         PMINUB(%%mm1, %%mm3, %%mm5)
 
 2600         "psubb %%mm1, %%mm2                     \
n\t"
 
 2601         "paddb %%mm7, %%mm2                     \
n\t"
 
 2602         "pcmpgtb %%mm6, %%mm2                   \
n\t"
 
 2603         "paddb %%mm2, %%mm0                     \
n\t"
 
 2605         "movq (%%"FF_REG_a", %3), %%mm2         \
n\t"
 
 2606         PMAXUB(%%mm2, %%mm4)
 
 2607         PMINUB(%%mm2, %%mm3, %%mm5)
 
 2608         "psubb %%mm2, %%mm1                     \
n\t"
 
 2609         "paddb %%mm7, %%mm1                     \
n\t"
 
 2610         "pcmpgtb %%mm6, %%mm1                   \
n\t"
 
 2611         "paddb %%mm1, %%mm0                     \
n\t"
 
 2613         "movq (%%"FF_REG_a", %3, 2), %%mm1      \
n\t"
 
 2614         PMAXUB(%%mm1, %%mm4)
 
 2615         PMINUB(%%mm1, %%mm3, %%mm5)
 
 2616         "psubb %%mm1, %%mm2                     \
n\t"
 
 2617         "paddb %%mm7, %%mm2                     \
n\t"
 
 2618         "pcmpgtb %%mm6, %%mm2                   \
n\t"
 
 2619         "paddb %%mm2, %%mm0                     \
n\t"
 
 2621         "movq (%2, %3, 8), %%mm2                \
n\t"
 
 2622         PMAXUB(%%mm2, %%mm4)
 
 2623         PMINUB(%%mm2, %%mm3, %%mm5)
 
 2624         "psubb %%mm2, %%mm1                     \
n\t"
 
 2625         "paddb %%mm7, %%mm1                     \
n\t"
 
 2626         "pcmpgtb %%mm6, %%mm1                   \
n\t"
 
 2627         "paddb %%mm1, %%mm0                     \
n\t"
 
 2629         "movq (%%"FF_REG_a", %3, 4), %%mm1      \
n\t"
 
 2630         "psubb %%mm1, %%mm2                     \
n\t"
 
 2631         "paddb %%mm7, %%mm2                     \
n\t"
 
 2632         "pcmpgtb %%mm6, %%mm2                   \
n\t"
 
 2633         "paddb %%mm2, %%mm0                     \
n\t"
 
 2634         "psubusb %%mm3, %%mm4                   \
n\t"
 
 2636         "pxor %%mm6, %%mm6                      \
n\t"
 
 2637         "movq %4, %%mm7                         \
n\t" 
 
 2638         "paddusb %%mm7, %%mm7                   \
n\t" 
 
 2639         "psubusb %%mm4, %%mm7                   \
n\t" 
 
 2640         "pcmpeqb %%mm6, %%mm7                   \
n\t" 
 
 2641         "pcmpeqb %%mm6, %%mm7                   \
n\t" 
 
 2642         "movq %%mm7, %1                         \
n\t"
 
 2644         "movq %5, %%mm7                         \
n\t"
 
 2645         "punpcklbw %%mm7, %%mm7                 \
n\t"
 
 2646         "punpcklbw %%mm7, %%mm7                 \
n\t"
 
 2647         "punpcklbw %%mm7, %%mm7                 \
n\t"
 
 2648         "psubb %%mm0, %%mm6                     \
n\t"
 
 2649         "pcmpgtb %%mm7, %%mm6                   \
n\t"
 
 2650         "movq %%mm6, %0                         \
n\t"
 
 2652         : "=m" (eq_mask), "=m" (dc_mask)
 
 2653         : "
r" (src), "
r" ((
x86_reg)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold)
 
 2657     both_masks = dc_mask & eq_mask;
 
 2661         int64_t *temp_sums= sums;
 
 2664             "movq %2, %%mm0                         \n\t"   
 2665             "pxor %%mm4, %%mm4                      \n\t" 
 2667             "movq (%0), %%mm6                       \n\t" 
 2668             "movq (%0, %1), %%mm5                   \n\t" 
 2669             "movq %%mm5, %%mm1                      \n\t" 
 2670             "movq %%mm6, %%mm2                      \n\t" 
 2671             "psubusb %%mm6, %%mm5                   \n\t" 
 2672             "psubusb %%mm1, %%mm2                   \n\t" 
 2673             "por %%mm5, %%mm2                       \n\t"  
 2674             "psubusb %%mm2, %%mm0                   \n\t"  
 2675             "pcmpeqb %%mm4, %%mm0                   \n\t"  
 2677             "pxor %%mm6, %%mm1                      \n\t" 
 2678             "pand %%mm0, %%mm1                      \n\t" 
 2679             "pxor %%mm1, %%mm6                      \n\t" 
 2682             "movq (%0, %1, 8), %%mm5                \n\t" 
 2684             "movq (%0, %1, 8), %%mm7                \n\t" 
 2685             "movq %%mm5, %%mm1                      \n\t" 
 2686             "movq %%mm7, %%mm2                      \n\t" 
 2687             "psubusb %%mm7, %%mm5                   \n\t" 
 2688             "psubusb %%mm1, %%mm2                   \n\t" 
 2689             "por %%mm5, %%mm2                       \n\t"  
 2690             "movq %2, %%mm0                         \n\t"   
 2691             "psubusb %%mm2, %%mm0                   \n\t"  
 2692             "pcmpeqb %%mm4, %%mm0                   \n\t"  
 2694             "pxor %%mm7, %%mm1                      \n\t" 
 2695             "pand %%mm0, %%mm1                      \n\t" 
 2696             "pxor %%mm1, %%mm7                      \n\t" 
 2698             "movq %%mm6, %%mm5                      \n\t" 
 2699             "punpckhbw %%mm4, %%mm6                 \n\t" 
 2700             "punpcklbw %%mm4, %%mm5                 \n\t" 
 2703             "movq %%mm5, %%mm0                      \n\t" 
 2704             "movq %%mm6, %%mm1                      \n\t" 
 2705             "psllw $2, %%mm0                        \n\t" 
 2706             "psllw $2, %%mm1                        \n\t" 
 2707             "paddw "MANGLE(w04)
", %%mm0             \n\t" 
 2708             "paddw "MANGLE(w04)
", %%mm1             \n\t" 
 2711             "movq (%0), %%mm2                       \n\t"\ 
 2712             "movq (%0), %%mm3                       \n\t"\ 
 2714             "punpcklbw %%mm4, %%mm2                 \n\t"\ 
 2715             "punpckhbw %%mm4, %%mm3                 \n\t"\ 
 2716             "paddw %%mm2, %%mm0                     \n\t"\ 
 2717             "paddw %%mm3, %%mm1                     \n\t" 
 2720             "movq (%0), %%mm2                       \n\t"\ 
 2721             "movq (%0), %%mm3                       \n\t"\ 
 2723             "punpcklbw %%mm4, %%mm2                 \n\t"\ 
 2724             "punpckhbw %%mm4, %%mm3                 \n\t"\ 
 2725             "psubw %%mm2, %%mm0                     \n\t"\ 
 2726             "psubw %%mm3, %%mm1                     \n\t" 
 2732             "movq %%mm0, (%3)                       \n\t" 
 2733             "movq %%mm1, 8(%3)                      \n\t" 
 2736             "psubw %%mm5, %%mm0                     \n\t" 
 2737             "psubw %%mm6, %%mm1                     \n\t" 
 2738             "movq %%mm0, 16(%3)                     \n\t" 
 2739             "movq %%mm1, 24(%3)                     \n\t" 
 2742             "psubw %%mm5, %%mm0                     \n\t" 
 2743             "psubw %%mm6, %%mm1                     \n\t" 
 2744             "movq %%mm0, 32(%3)                     \n\t" 
 2745             "movq %%mm1, 40(%3)                     \n\t" 
 2748             "psubw %%mm5, %%mm0                     \n\t" 
 2749             "psubw %%mm6, %%mm1                     \n\t" 
 2750             "movq %%mm0, 48(%3)                     \n\t" 
 2751             "movq %%mm1, 56(%3)                     \n\t" 
 2754             "psubw %%mm5, %%mm0                     \n\t" 
 2755             "psubw %%mm6, %%mm1                     \n\t" 
 2756             "movq %%mm0, 64(%3)                     \n\t" 
 2757             "movq %%mm1, 72(%3)                     \n\t" 
 2759             "movq %%mm7, %%mm6                      \n\t" 
 2760             "punpckhbw %%mm4, %%mm7                 \n\t" 
 2761             "punpcklbw %%mm4, %%mm6                 \n\t" 
 2767             "movq %%mm0, 80(%3)                     \n\t" 
 2768             "movq %%mm1, 88(%3)                     \n\t" 
 2771             "paddw %%mm6, %%mm0                     \n\t" 
 2772             "paddw %%mm7, %%mm1                     \n\t" 
 2773             "movq %%mm0, 96(%3)                     \n\t" 
 2774             "movq %%mm1, 104(%3)                    \n\t" 
 2777             "paddw %%mm6, %%mm0                     \n\t" 
 2778             "paddw %%mm7, %%mm1                     \n\t" 
 2779             "movq %%mm0, 112(%3)                    \n\t" 
 2780             "movq %%mm1, 120(%3)                    \n\t" 
 2783             "paddw %%mm6, %%mm0                     \n\t" 
 2784             "paddw %%mm7, %%mm1                     \n\t" 
 2785             "movq %%mm0, 128(%3)                    \n\t" 
 2786             "movq %%mm1, 136(%3)                    \n\t" 
 2789             "paddw %%mm6, %%mm0                     \n\t" 
 2790             "paddw %%mm7, %%mm1                     \n\t" 
 2791             "movq %%mm0, 144(%3)                    \n\t" 
 2792             "movq %%mm1, 152(%3)                    \n\t" 
 2797             : 
"r" ((
x86_reg)step), 
"m" (c->pQPb), 
"r"(sums), 
"g"(src)
 
 2804             "movq %4, %%mm6                         \n\t" 
 2805             "pcmpeqb %%mm5, %%mm5                   \n\t" 
 2806             "pxor %%mm6, %%mm5                      \n\t" 
 2807             "pxor %%mm7, %%mm7                      \n\t" 
 2810             "movq (%1), %%mm0                       \n\t" 
 2811             "movq 8(%1), %%mm1                      \n\t" 
 2812             "paddw 32(%1), %%mm0                    \n\t" 
 2813             "paddw 40(%1), %%mm1                    \n\t" 
 2814             "movq (%0, %3), %%mm2                   \n\t" 
 2815             "movq %%mm2, %%mm3                      \n\t" 
 2816             "movq %%mm2, %%mm4                      \n\t" 
 2817             "punpcklbw %%mm7, %%mm2                 \n\t" 
 2818             "punpckhbw %%mm7, %%mm3                 \n\t" 
 2819             "paddw %%mm2, %%mm0                     \n\t" 
 2820             "paddw %%mm3, %%mm1                     \n\t" 
 2821             "paddw %%mm2, %%mm0                     \n\t" 
 2822             "paddw %%mm3, %%mm1                     \n\t" 
 2823             "psrlw $4, %%mm0                        \n\t" 
 2824             "psrlw $4, %%mm1                        \n\t" 
 2825             "packuswb %%mm1, %%mm0                  \n\t" 
 2826             "pand %%mm6, %%mm0                      \n\t" 
 2827             "pand %%mm5, %%mm4                      \n\t" 
 2828             "por %%mm4, %%mm0                       \n\t" 
 2829             "movq %%mm0, (%0, %3)                   \n\t" 
 2834             : 
"+r"(
offset), 
"+r"(temp_sums)
 
 2840     if(eq_mask != -1LL){
 
 2844             "pxor %%mm7, %%mm7                      \n\t" 
 2848             "movq (%0), %%mm0                       \n\t" 
 2849             "movq %%mm0, %%mm1                      \n\t" 
 2850             "punpcklbw %%mm7, %%mm0                 \n\t"  
 2851             "punpckhbw %%mm7, %%mm1                 \n\t"  
 2853             "movq (%0, %1), %%mm2                   \n\t" 
 2854             "lea (%0, %1, 2), %%"FF_REG_a
"          \n\t" 
 2855             "movq %%mm2, %%mm3                      \n\t" 
 2856             "punpcklbw %%mm7, %%mm2                 \n\t"  
 2857             "punpckhbw %%mm7, %%mm3                 \n\t"  
 2859             "movq (%%"FF_REG_a
"), %%mm4             \n\t" 
 2860             "movq %%mm4, %%mm5                      \n\t" 
 2861             "punpcklbw %%mm7, %%mm4                 \n\t"  
 2862             "punpckhbw %%mm7, %%mm5                 \n\t"  
 2864             "paddw %%mm0, %%mm0                     \n\t"  
 2865             "paddw %%mm1, %%mm1                     \n\t"  
 2866             "psubw %%mm4, %%mm2                     \n\t"  
 2867             "psubw %%mm5, %%mm3                     \n\t"  
 2868             "psubw %%mm2, %%mm0                     \n\t"  
 2869             "psubw %%mm3, %%mm1                     \n\t"  
 2871             "psllw $2, %%mm2                        \n\t"  
 2872             "psllw $2, %%mm3                        \n\t"  
 2873             "psubw %%mm2, %%mm0                     \n\t"  
 2874             "psubw %%mm3, %%mm1                     \n\t"  
 2876             "movq (%%"FF_REG_a
", %1), %%mm2         \n\t" 
 2877             "movq %%mm2, %%mm3                      \n\t" 
 2878             "punpcklbw %%mm7, %%mm2                 \n\t"  
 2879             "punpckhbw %%mm7, %%mm3                 \n\t"  
 2881             "psubw %%mm2, %%mm0                     \n\t"  
 2882             "psubw %%mm3, %%mm1                     \n\t"  
 2883             "psubw %%mm2, %%mm0                     \n\t"  
 2884             "psubw %%mm3, %%mm1                     \n\t"  
 2885             "movq %%mm0, (%4)                       \n\t"  
 2886             "movq %%mm1, 8(%4)                      \n\t"  
 2888             "movq (%%"FF_REG_a
", %1, 2), %%mm0      \n\t" 
 2889             "movq %%mm0, %%mm1                      \n\t" 
 2890             "punpcklbw %%mm7, %%mm0                 \n\t"  
 2891             "punpckhbw %%mm7, %%mm1                 \n\t"  
 2893             "psubw %%mm0, %%mm2                     \n\t"  
 2894             "psubw %%mm1, %%mm3                     \n\t"  
 2895             "movq %%mm2, 16(%4)                     \n\t"  
 2896             "movq %%mm3, 24(%4)                     \n\t"  
 2897             "paddw %%mm4, %%mm4                     \n\t"  
 2898             "paddw %%mm5, %%mm5                     \n\t"  
 2899             "psubw %%mm2, %%mm4                     \n\t"  
 2900             "psubw %%mm3, %%mm5                     \n\t"  
 2902             "lea (%%"FF_REG_a
", %1), %0             \n\t" 
 2903             "psllw $2, %%mm2                        \n\t"  
 2904             "psllw $2, %%mm3                        \n\t"  
 2905             "psubw %%mm2, %%mm4                     \n\t"  
 2906             "psubw %%mm3, %%mm5                     \n\t"  
 2908             "movq (%0, %1, 2), %%mm2                \n\t" 
 2909             "movq %%mm2, %%mm3                      \n\t" 
 2910             "punpcklbw %%mm7, %%mm2                 \n\t"  
 2911             "punpckhbw %%mm7, %%mm3                 \n\t"  
 2912             "psubw %%mm2, %%mm4                     \n\t"  
 2913             "psubw %%mm3, %%mm5                     \n\t"  
 2914             "psubw %%mm2, %%mm4                     \n\t"  
 2915             "psubw %%mm3, %%mm5                     \n\t"  
 2917             "movq (%%"FF_REG_a
", %1, 4), %%mm6      \n\t" 
 2918             "punpcklbw %%mm7, %%mm6                 \n\t"  
 2919             "psubw %%mm6, %%mm2                     \n\t"  
 2920             "movq (%%"FF_REG_a
", %1, 4), %%mm6      \n\t" 
 2921             "punpckhbw %%mm7, %%mm6                 \n\t"  
 2922             "psubw %%mm6, %%mm3                     \n\t"  
 2924             "paddw %%mm0, %%mm0                     \n\t"  
 2925             "paddw %%mm1, %%mm1                     \n\t"  
 2926             "psubw %%mm2, %%mm0                     \n\t"  
 2927             "psubw %%mm3, %%mm1                     \n\t"  
 2929             "psllw $2, %%mm2                        \n\t"  
 2930             "psllw $2, %%mm3                        \n\t"  
 2931             "psubw %%mm2, %%mm0                     \n\t"  
 2932             "psubw %%mm3, %%mm1                     \n\t"  
 2934             "movq (%0, %1, 4), %%mm2                \n\t" 
 2935             "movq %%mm2, %%mm3                      \n\t" 
 2936             "punpcklbw %%mm7, %%mm2                 \n\t"  
 2937             "punpckhbw %%mm7, %%mm3                 \n\t"  
 2939             "paddw %%mm2, %%mm2                     \n\t"  
 2940             "paddw %%mm3, %%mm3                     \n\t"  
 2941             "psubw %%mm2, %%mm0                     \n\t"  
 2942             "psubw %%mm3, %%mm1                     \n\t"  
 2944             "movq (%4), %%mm2                       \n\t"  
 2945             "movq 8(%4), %%mm3                      \n\t"  
 2947 #if TEMPLATE_PP_MMXEXT 
 2948             "movq %%mm7, %%mm6                      \n\t"  
 2949             "psubw %%mm0, %%mm6                     \n\t" 
 2950             "pmaxsw %%mm6, %%mm0                    \n\t"  
 2951             "movq %%mm7, %%mm6                      \n\t"  
 2952             "psubw %%mm1, %%mm6                     \n\t" 
 2953             "pmaxsw %%mm6, %%mm1                    \n\t"  
 2954             "movq %%mm7, %%mm6                      \n\t"  
 2955             "psubw %%mm2, %%mm6                     \n\t" 
 2956             "pmaxsw %%mm6, %%mm2                    \n\t"  
 2957             "movq %%mm7, %%mm6                      \n\t"  
 2958             "psubw %%mm3, %%mm6                     \n\t" 
 2959             "pmaxsw %%mm6, %%mm3                    \n\t"  
 2961             "movq %%mm7, %%mm6                      \n\t"  
 2962             "pcmpgtw %%mm0, %%mm6                   \n\t" 
 2963             "pxor %%mm6, %%mm0                      \n\t" 
 2964             "psubw %%mm6, %%mm0                     \n\t"  
 2965             "movq %%mm7, %%mm6                      \n\t"  
 2966             "pcmpgtw %%mm1, %%mm6                   \n\t" 
 2967             "pxor %%mm6, %%mm1                      \n\t" 
 2968             "psubw %%mm6, %%mm1                     \n\t"  
 2969             "movq %%mm7, %%mm6                      \n\t"  
 2970             "pcmpgtw %%mm2, %%mm6                   \n\t" 
 2971             "pxor %%mm6, %%mm2                      \n\t" 
 2972             "psubw %%mm6, %%mm2                     \n\t"  
 2973             "movq %%mm7, %%mm6                      \n\t"  
 2974             "pcmpgtw %%mm3, %%mm6                   \n\t" 
 2975             "pxor %%mm6, %%mm3                      \n\t" 
 2976             "psubw %%mm6, %%mm3                     \n\t"  
 2979 #if TEMPLATE_PP_MMXEXT 
 2980             "pminsw %%mm2, %%mm0                    \n\t" 
 2981             "pminsw %%mm3, %%mm1                    \n\t" 
 2983             "movq %%mm0, %%mm6                      \n\t" 
 2984             "psubusw %%mm2, %%mm6                   \n\t" 
 2985             "psubw %%mm6, %%mm0                     \n\t" 
 2986             "movq %%mm1, %%mm6                      \n\t" 
 2987             "psubusw %%mm3, %%mm6                   \n\t" 
 2988             "psubw %%mm6, %%mm1                     \n\t" 
 2991             "movd %2, %%mm2                         \n\t"  
 2992             "punpcklbw %%mm7, %%mm2                 \n\t" 
 2994             "movq %%mm7, %%mm6                      \n\t"  
 2995             "pcmpgtw %%mm4, %%mm6                   \n\t"  
 2996             "pxor %%mm6, %%mm4                      \n\t" 
 2997             "psubw %%mm6, %%mm4                     \n\t"  
 2998             "pcmpgtw %%mm5, %%mm7                   \n\t"  
 2999             "pxor %%mm7, %%mm5                      \n\t" 
 3000             "psubw %%mm7, %%mm5                     \n\t"  
 3002             "psllw $3, %%mm2                        \n\t"  
 3003             "movq %%mm2, %%mm3                      \n\t"  
 3004             "pcmpgtw %%mm4, %%mm2                   \n\t" 
 3005             "pcmpgtw %%mm5, %%mm3                   \n\t" 
 3006             "pand %%mm2, %%mm4                      \n\t" 
 3007             "pand %%mm3, %%mm5                      \n\t" 
 3010             "psubusw %%mm0, %%mm4                   \n\t"  
 3011             "psubusw %%mm1, %%mm5                   \n\t"  
 3014             "movq "MANGLE(w05)
", %%mm2              \n\t"  
 3015             "pmullw %%mm2, %%mm4                    \n\t" 
 3016             "pmullw %%mm2, %%mm5                    \n\t" 
 3017             "movq "MANGLE(w20)
", %%mm2              \n\t"  
 3018             "paddw %%mm2, %%mm4                     \n\t" 
 3019             "paddw %%mm2, %%mm5                     \n\t" 
 3020             "psrlw $6, %%mm4                        \n\t" 
 3021             "psrlw $6, %%mm5                        \n\t" 
 3023             "movq 16(%4), %%mm0                     \n\t"  
 3024             "movq 24(%4), %%mm1                     \n\t"  
 3026             "pxor %%mm2, %%mm2                      \n\t" 
 3027             "pxor %%mm3, %%mm3                      \n\t" 
 3029             "pcmpgtw %%mm0, %%mm2                   \n\t"  
 3030             "pcmpgtw %%mm1, %%mm3                   \n\t"  
 3031             "pxor %%mm2, %%mm0                      \n\t" 
 3032             "pxor %%mm3, %%mm1                      \n\t" 
 3033             "psubw %%mm2, %%mm0                     \n\t"  
 3034             "psubw %%mm3, %%mm1                     \n\t"  
 3035             "psrlw $1, %%mm0                        \n\t"  
 3036             "psrlw $1, %%mm1                        \n\t"  
 3038             "pxor %%mm6, %%mm2                      \n\t" 
 3039             "pxor %%mm7, %%mm3                      \n\t" 
 3040             "pand %%mm2, %%mm4                      \n\t" 
 3041             "pand %%mm3, %%mm5                      \n\t" 
 3043 #if TEMPLATE_PP_MMXEXT 
 3044             "pminsw %%mm0, %%mm4                    \n\t" 
 3045             "pminsw %%mm1, %%mm5                    \n\t" 
 3047             "movq %%mm4, %%mm2                      \n\t" 
 3048             "psubusw %%mm0, %%mm2                   \n\t" 
 3049             "psubw %%mm2, %%mm4                     \n\t" 
 3050             "movq %%mm5, %%mm2                      \n\t" 
 3051             "psubusw %%mm1, %%mm2                   \n\t" 
 3052             "psubw %%mm2, %%mm5                     \n\t" 
 3054             "pxor %%mm6, %%mm4                      \n\t" 
 3055             "pxor %%mm7, %%mm5                      \n\t" 
 3056             "psubw %%mm6, %%mm4                     \n\t" 
 3057             "psubw %%mm7, %%mm5                     \n\t" 
 3058             "packsswb %%mm5, %%mm4                  \n\t" 
 3059             "movq %3, %%mm1                         \n\t" 
 3060             "pandn %%mm4, %%mm1                     \n\t" 
 3061             "movq (%0), %%mm0                       \n\t" 
 3062             "paddb   %%mm1, %%mm0                   \n\t" 
 3063             "movq %%mm0, (%0)                       \n\t" 
 3064             "movq (%0, %1), %%mm0                   \n\t" 
 3065             "psubb %%mm1, %%mm0                     \n\t" 
 3066             "movq %%mm0, (%0, %1)                   \n\t" 
 3069             : 
"r" ((
x86_reg)step), 
"m" (c->pQPb), 
"m"(eq_mask), 
"r"(
tmp)
 
 3081 #endif //TEMPLATE_PP_MMX 
 3084                                 const QP_STORE_T QPs[], 
int QPStride, 
int isColor, 
PPContext *
c);
 
 3090 #undef REAL_SCALED_CPY 
 3094                                      int levelFix, int64_t *packedOffsetAndScale)
 
 3096 #if !TEMPLATE_PP_MMX || !HAVE_6REGS 
 3100 #if TEMPLATE_PP_MMX && HAVE_6REGS 
 3102         "movq (%%"FF_REG_a
"), %%mm2     \n\t"  
 3103         "movq 8(%%"FF_REG_a
"), %%mm3    \n\t"  
 3104         "lea (%2,%4), %%"FF_REG_a
"      \n\t" 
 3105         "lea (%3,%5), %%"FF_REG_d
"      \n\t" 
 3106         "pxor %%mm4, %%mm4              \n\t" 
 3107 #if TEMPLATE_PP_MMXEXT 
 3108 #define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                                \ 
 3109         "movq " #src1 ", %%mm0          \n\t"\ 
 3110         "movq " #src1 ", %%mm5          \n\t"\ 
 3111         "movq " #src2 ", %%mm1          \n\t"\ 
 3112         "movq " #src2 ", %%mm6          \n\t"\ 
 3113         "punpcklbw %%mm0, %%mm0         \n\t"\ 
 3114         "punpckhbw %%mm5, %%mm5         \n\t"\ 
 3115         "punpcklbw %%mm1, %%mm1         \n\t"\ 
 3116         "punpckhbw %%mm6, %%mm6         \n\t"\ 
 3117         "pmulhuw %%mm3, %%mm0           \n\t"\ 
 3118         "pmulhuw %%mm3, %%mm5           \n\t"\ 
 3119         "pmulhuw %%mm3, %%mm1           \n\t"\ 
 3120         "pmulhuw %%mm3, %%mm6           \n\t"\ 
 3121         "psubw %%mm2, %%mm0             \n\t"\ 
 3122         "psubw %%mm2, %%mm5             \n\t"\ 
 3123         "psubw %%mm2, %%mm1             \n\t"\ 
 3124         "psubw %%mm2, %%mm6             \n\t"\ 
 3125         "packuswb %%mm5, %%mm0          \n\t"\ 
 3126         "packuswb %%mm6, %%mm1          \n\t"\ 
 3127         "movq %%mm0, " #dst1 "          \n\t"\ 
 3128         "movq %%mm1, " #dst2 "          \n\t"\ 
 3130 #else //TEMPLATE_PP_MMXEXT 
 3131 #define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                        \ 
 3132         "movq " #src1 ", %%mm0          \n\t"\ 
 3133         "movq " #src1 ", %%mm5          \n\t"\ 
 3134         "punpcklbw %%mm4, %%mm0         \n\t"\ 
 3135         "punpckhbw %%mm4, %%mm5         \n\t"\ 
 3136         "psubw %%mm2, %%mm0             \n\t"\ 
 3137         "psubw %%mm2, %%mm5             \n\t"\ 
 3138         "movq " #src2 ", %%mm1          \n\t"\ 
 3139         "psllw $6, %%mm0                \n\t"\ 
 3140         "psllw $6, %%mm5                \n\t"\ 
 3141         "pmulhw %%mm3, %%mm0            \n\t"\ 
 3142         "movq " #src2 ", %%mm6          \n\t"\ 
 3143         "pmulhw %%mm3, %%mm5            \n\t"\ 
 3144         "punpcklbw %%mm4, %%mm1         \n\t"\ 
 3145         "punpckhbw %%mm4, %%mm6         \n\t"\ 
 3146         "psubw %%mm2, %%mm1             \n\t"\ 
 3147         "psubw %%mm2, %%mm6             \n\t"\ 
 3148         "psllw $6, %%mm1                \n\t"\ 
 3149         "psllw $6, %%mm6                \n\t"\ 
 3150         "pmulhw %%mm3, %%mm1            \n\t"\ 
 3151         "pmulhw %%mm3, %%mm6            \n\t"\ 
 3152         "packuswb %%mm5, %%mm0          \n\t"\ 
 3153         "packuswb %%mm6, %%mm1          \n\t"\ 
 3154         "movq %%mm0, " #dst1 "          \n\t"\ 
 3155         "movq %%mm1, " #dst2 "          \n\t"\ 
 3157 #endif //TEMPLATE_PP_MMXEXT 
 3158 #define SCALED_CPY(src1, src2, dst1, dst2)\ 
 3159    REAL_SCALED_CPY(src1, src2, dst1, dst2) 
 3161 SCALED_CPY((%2)       , (%2, %4)      , (%3)       , (%3, %5))
 
 3162 SCALED_CPY((%2, %4, 2), (%%FF_REGa, %4, 2), (%3, %5, 2), (%%FF_REGd, %5, 2))
 
 3163 SCALED_CPY((%2, %4, 4), (%%FF_REGa, %4, 4), (%3, %5, 4), (%%FF_REGd, %5, 4))
 
 3164         "lea (%%"FF_REG_a
",%4,4), %%"FF_REG_a
"        \n\t" 
 3165         "lea (%%"FF_REG_d
",%5,4), %%"FF_REG_d
"        \n\t" 
 3166 SCALED_CPY((%%FF_REGa, %4), (%%FF_REGa, %4, 2), (%%FF_REGd, %5), (%%FF_REGd, %5, 2))
 
 3169         : 
"=&a" (packedOffsetAndScale)
 
 3170         : 
"0" (packedOffsetAndScale),
 
 3177 #else //TEMPLATE_PP_MMX && HAVE_6REGS 
 3179         memcpy( &(dst[dstStride*i]),
 
 3181 #endif //TEMPLATE_PP_MMX && HAVE_6REGS 
 3183 #if TEMPLATE_PP_MMX && HAVE_6REGS 
 3185         "lea (%0,%2), %%"FF_REG_a
"      \n\t" 
 3186         "lea (%1,%3), %%"FF_REG_d
"      \n\t" 
 3188 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2)                              \ 
 3189         "movq " #src1 ", %%mm0          \n\t"\ 
 3190         "movq " #src2 ", %%mm1          \n\t"\ 
 3191         "movq %%mm0, " #dst1 "          \n\t"\ 
 3192         "movq %%mm1, " #dst2 "          \n\t"\ 
 3194 #define SIMPLE_CPY(src1, src2, dst1, dst2)\ 
 3195    REAL_SIMPLE_CPY(src1, src2, dst1, dst2) 
 3197 SIMPLE_CPY((%0)       , (%0, %2)          , (%1)       , (%1, %3))
 
 3198 SIMPLE_CPY((%0, %2, 2), (%%FF_REGa, %2, 2), (%1, %3, 2), (%%FF_REGd, %3, 2))
 
 3199 SIMPLE_CPY((%0, %2, 4), (%%FF_REGa, %2, 4), (%1, %3, 4), (%%FF_REGd, %3, 4))
 
 3200         "lea (%%"FF_REG_a
",%2,4), %%"FF_REG_a
"        \n\t" 
 3201         "lea (%%"FF_REG_d
",%3,4), %%"FF_REG_d
"        \n\t" 
 3202 SIMPLE_CPY((%%FF_REGa, %2), (%%FF_REGa, %2, 2), (%%FF_REGd, %3), (%%FF_REGd, %3, 2))
 
 3208         : 
"%"FF_REG_a, 
"%"FF_REG_d
 
 3210 #else //TEMPLATE_PP_MMX && HAVE_6REGS 
 3212         memcpy( &(dst[dstStride*i]),
 
 3214 #endif //TEMPLATE_PP_MMX && HAVE_6REGS 
 3225         "movq (%0), %%mm0               \n\t" 
 3226         "movq %%mm0, (%0, %1, 4)        \n\t" 
 3228         "movq %%mm0, (%0)               \n\t" 
 3229         "movq %%mm0, (%0, %1)           \n\t" 
 3230         "movq %%mm0, (%0, %1, 2)        \n\t" 
 3231         "movq %%mm0, (%0, %1, 4)        \n\t" 
 3245 #if ARCH_X86 && TEMPLATE_PP_MMXEXT 
 3246 static inline void RENAME(prefetchnta)(
const void *p)
 
 3248     __asm__ 
volatile(   
"prefetchnta (%0)\n\t" 
 3253 static inline void RENAME(prefetcht0)(
const void *p)
 
 3255     __asm__ 
volatile(   
"prefetcht0 (%0)\n\t" 
 3260 static inline void RENAME(prefetcht1)(
const void *p)
 
 3262     __asm__ 
volatile(   
"prefetcht1 (%0)\n\t" 
 3267 static inline void RENAME(prefetcht2)(
const void *p)
 
 3269     __asm__ 
volatile(   
"prefetcht2 (%0)\n\t" 
 3273 #elif !ARCH_X86 && AV_GCC_VERSION_AT_LEAST(3,2) 
 3274 static inline void RENAME(prefetchnta)(
const void *p)
 
 3276     __builtin_prefetch(p,0,0);
 
 3278 static inline void RENAME(prefetcht0)(
const void *p)
 
 3280     __builtin_prefetch(p,0,1);
 
 3282 static inline void RENAME(prefetcht1)(
const void *p)
 
 3284     __builtin_prefetch(p,0,2);
 
 3286 static inline void RENAME(prefetcht2)(
const void *p)
 
 3288     __builtin_prefetch(p,0,3);
 
 3291 static inline void RENAME(prefetchnta)(
const void *p)
 
 3295 static inline void RENAME(prefetcht0)(
const void *p)
 
 3299 static inline void RENAME(prefetcht1)(
const void *p)
 
 3303 static inline void RENAME(prefetcht2)(
const void *p)
 
 3312                                 const QP_STORE_T QPs[], 
int QPStride, 
int isColor, 
PPContext *
c2)
 
 3316 #ifdef TEMPLATE_PP_TIME_MODE 
 3317     const int mode= TEMPLATE_PP_TIME_MODE;
 
 3319     const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode;
 
 3321     int black=0, white=255; 
 
 3322     int QPCorrecture= 256*256;
 
 3329     const int qpHShift= isColor ? 4-c.hChromaSubSample : 4;
 
 3330     const int qpVShift= isColor ? 4-c.vChromaSubSample : 4;
 
 3333     uint64_t * 
const yHistogram= c.yHistogram;
 
 3334     uint8_t * 
const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride;
 
 3335     uint8_t * 
const tempDst= (dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride) + 32;
 
 3340             av_log(
c2, 
AV_LOG_WARNING, 
"Visualization is currently only supported with the accurate deblock filter without SIMD\n");
 
 3345     for(i=0; i<57; i++){
 
 3346         int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
 
 3347         int threshold= offset*2 + 1;
 
 3348         c.mmxDcOffset[i]= 0x7F - 
offset;
 
 3349         c.mmxDcThreshold[i]= 0x7F - threshold;
 
 3350         c.mmxDcOffset[i]*= 0x0101010101010101LL;
 
 3351         c.mmxDcThreshold[i]*= 0x0101010101010101LL;
 
 3365     else if(mode & 
DERING) copyAhead=9;
 
 3373         uint64_t maxClipped;
 
 3379         if(c.frameNum == 1) yHistogram[0]= width*(uint64_t)height/64*15/256;
 
 3381         for(i=0; i<256; i++){
 
 3382             sum+= yHistogram[i];
 
 3386         maxClipped= 
av_rescale(sum, c.ppMode.maxClippedThreshold.num, c.ppMode.maxClippedThreshold.den);
 
 3389         for(black=255; black>0; black--){
 
 3390             if(clipped < maxClipped) 
break;
 
 3391             clipped-= yHistogram[black];
 
 3395         for(white=0; white<256; white++){
 
 3396             if(clipped < maxClipped) 
break;
 
 3397             clipped-= yHistogram[white];
 
 3400         scale = (
AVRational){c.ppMode.maxAllowedY - c.ppMode.minAllowedY, white - black};
 
 3402 #if TEMPLATE_PP_MMXEXT 
 3404         c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
 
 3407         c.packedYOffset= (black - c.ppMode.minAllowedY) & 0xFFFF;
 
 3410         c.packedYOffset|= c.packedYOffset<<32;
 
 3411         c.packedYOffset|= c.packedYOffset<<16;
 
 3413         c.packedYScale|= c.packedYScale<<32;
 
 3414         c.packedYScale|= c.packedYScale<<16;
 
 3417         else                        QPCorrecture= 256*256;
 
 3419         c.packedYScale= 0x0100010001000100LL;
 
 3421         QPCorrecture= 256*256;
 
 3427         const uint8_t *srcBlock= &(src[y*srcStride]);
 
 3428         uint8_t *dstBlock= tempDst + dstStride;
 
 3434             RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
 
 3435             RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
 
 3436             RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
 
 3437             RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
 
 3439             RENAME(blockCopy)(dstBlock + dstStride*8, dstStride,
 
 3440                               srcBlock + srcStride*8, srcStride, mode & 
LEVEL_FIX, &c.packedYOffset);
 
 3442             RENAME(duplicate)(dstBlock + dstStride*8, dstStride);
 
 3445                 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
 
 3446             else if(mode & LINEAR_BLEND_DEINT_FILTER)
 
 3447                 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
 
 3448             else if(mode & MEDIAN_DEINT_FILTER)
 
 3449                 RENAME(deInterlaceMedian)(dstBlock, dstStride);
 
 3450             else if(mode & CUBIC_IPOL_DEINT_FILTER)
 
 3451                 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
 
 3453                 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
 
 3454             else if(mode & LOWPASS5_DEINT_FILTER)
 
 3455                 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
 
 3462         if(width==
FFABS(dstStride))
 
 3463             linecpy(dst, tempDst + 9*dstStride, copyAhead, dstStride);
 
 3466             for(i=0; i<copyAhead; i++){
 
 3467                 memcpy(dst + i*dstStride, tempDst + (9+i)*dstStride, width);
 
 3474         const uint8_t *srcBlock= &(src[y*srcStride]);
 
 3475         uint8_t *dstBlock= &(dst[y*dstStride]);
 
 3477         uint8_t *tempBlock1= c.tempBlocks;
 
 3478         uint8_t *tempBlock2= c.tempBlocks + 8;
 
 3480         const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
 
 3481         int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*
FFABS(QPStride)];
 
 3489             linecpy(tempSrc + srcStride*copyAhead, srcBlock + srcStride*copyAhead,
 
 3490                     FFMAX(height-y-copyAhead, 0), srcStride);
 
 3493             for(i=
FFMAX(height-y, 8); i<copyAhead+8; i++)
 
 3494                     memcpy(tempSrc + srcStride*i, src + srcStride*(height-1), 
FFABS(srcStride));
 
 3497             linecpy(tempDst, dstBlock - dstStride, 
FFMIN(height-y+1, copyAhead+1), dstStride);
 
 3500             for(i=height-y+1; i<=copyAhead; i++)
 
 3501                     memcpy(tempDst + dstStride*i, dst + dstStride*(height-1), 
FFABS(dstStride));
 
 3503             dstBlock= tempDst + dstStride;
 
 3510         for(x=0; x<
width; ){
 
 3512             int endx = 
FFMIN(width, x+32);
 
 3513             uint8_t *dstBlockStart = dstBlock;
 
 3514             const uint8_t *srcBlockStart = srcBlock;
 
 3516             for(qp_index=0; qp_index < (endx-startx)/
BLOCK_SIZE; qp_index++){
 
 3517                 QP = QPptr[(x+qp_index*
BLOCK_SIZE)>>qpHShift];
 
 3518                 nonBQP = nonBQPptr[(x+qp_index*
BLOCK_SIZE)>>qpHShift];
 
 3520                 QP= (QP* QPCorrecture + 256*128)>>16;
 
 3521                 nonBQP= (nonBQP* QPCorrecture + 256*128)>>16;
 
 3522                 yHistogram[(srcBlock+qp_index*8)[srcStride*12 + 4]]++;
 
 3524             c.QP_block[qp_index] = 
QP;
 
 3525             c.nonBQP_block[qp_index] = nonBQP;
 
 3528                 "movd %1, %%mm7         \n\t" 
 3529                 "packuswb %%mm7, %%mm7  \n\t"  
 3530                 "packuswb %%mm7, %%mm7  \n\t"  
 3531                 "packuswb %%mm7, %%mm7  \n\t"  
 3532                 "movq %%mm7, %0         \n\t" 
 3533                 : 
"=m" (c.pQPb_block[qp_index])
 
 3539             RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
 
 3540             RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
 
 3541             RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
 
 3542             RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
 
 3544             RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
 
 3545                               srcBlock + srcStride*copyAhead, srcStride, mode & 
LEVEL_FIX, &c.packedYOffset);
 
 3548                 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
 
 3549             else if(mode & LINEAR_BLEND_DEINT_FILTER)
 
 3550                 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
 
 3551             else if(mode & MEDIAN_DEINT_FILTER)
 
 3552                 RENAME(deInterlaceMedian)(dstBlock, dstStride);
 
 3553             else if(mode & CUBIC_IPOL_DEINT_FILTER)
 
 3554                 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
 
 3556                 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
 
 3557             else if(mode & LOWPASS5_DEINT_FILTER)
 
 3558                 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
 
 3566           dstBlock = dstBlockStart;
 
 3567           srcBlock = srcBlockStart;
 
 3569           for(x = startx, qp_index = 0; x < endx; x+=
BLOCK_SIZE, qp_index++){
 
 3570             const int stride= dstStride;
 
 3573             c.QP = c.QP_block[qp_index];
 
 3574             c.nonBQP = c.nonBQP_block[qp_index];
 
 3575             c.pQPb = c.pQPb_block[qp_index];
 
 3576             c.pQPb2 = c.pQPb2_block[qp_index];
 
 3580                 if(mode & V_X1_FILTER)
 
 3582                 else if(mode & V_DEBLOCK){
 
 3598           dstBlock = dstBlockStart;
 
 3599           srcBlock = srcBlockStart;
 
 3601           for(x = startx, qp_index=0; x < endx; x+=
BLOCK_SIZE, qp_index++){
 
 3602             const int stride= dstStride;
 
 3604             c.QP = c.QP_block[qp_index];
 
 3605             c.nonBQP = c.nonBQP_block[qp_index];
 
 3606             c.pQPb = c.pQPb_block[qp_index];
 
 3607             c.pQPb2 = c.pQPb2_block[qp_index];
 
 3609             RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
 
 3615                         RENAME(vertX1Filter)(tempBlock1, 16, &
c);
 
 3617                     const int t= 
RENAME(vertClassify)(tempBlock1, 16, &
c);
 
 3619                         RENAME(doVertLowPass)(tempBlock1, 16, &
c);
 
 3621                         RENAME(doVertDefFilter)(tempBlock1, 16, &
c);
 
 3623                         RENAME(do_a_deblock)(tempBlock1, 16, 1, &
c, 
mode);
 
 3626                 RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16);
 
 3629                 if(mode & H_X1_FILTER)
 
 3631                 else if(mode & H_DEBLOCK){
 
 3632 #if TEMPLATE_PP_ALTIVEC 
 3654                 }
else if(mode & H_A_DEBLOCK){
 
 3657 #endif //TEMPLATE_PP_MMX 
 3666                             c.tempBlurred[isColor] + y*dstStride + x,
 
 3667                             c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
 
 3668                             c.ppMode.maxTmpNoise);
 
 3676             tmpXchg= tempBlock1;
 
 3677             tempBlock1= tempBlock2;
 
 3678             tempBlock2 = tmpXchg;
 
 3684             if(y > 0) 
RENAME(dering)(dstBlock - dstStride - 8, dstStride, &
c);
 
 3688             RENAME(tempNoiseReducer)(dstBlock-8, dstStride,
 
 3689                     c.tempBlurred[isColor] + y*dstStride + x,
 
 3690                     c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
 
 3691                     c.ppMode.maxTmpNoise);
 
 3696             uint8_t *dstBlock= &(dst[y*dstStride]);
 
 3697             if(width==
FFABS(dstStride))
 
 3698                 linecpy(dstBlock, tempDst + dstStride, height-y, dstStride);
 
 3701                 for(i=0; i<height-y; i++){
 
 3702                     memcpy(dstBlock + i*dstStride, tempDst + (i+1)*dstStride, width);
 
 3707 #if   TEMPLATE_PP_3DNOW 
 3708     __asm__ 
volatile(
"femms");
 
 3709 #elif TEMPLATE_PP_MMX 
 3710     __asm__ 
volatile(
"emms");
 
 3713 #ifdef DEBUG_BRIGHTNESS 
 3717         for(i=0; i<256; i++)
 
 3718             if(yHistogram[i] > max) max=yHistogram[i];
 
 3720         for(i=1; i<256; i++){
 
 3722             int start=yHistogram[i-1]/(max/256+1);
 
 3723             int end=yHistogram[i]/(max/256+1);
 
 3724             int inc= end > start ? 1 : -1;
 
 3725             for(x=start; x!=end+inc; x+=inc)
 
 3726                 dst[ i*dstStride + x]+=128;
 
 3729         for(i=0; i<100; i+=2){
 
 3730             dst[ (white)*dstStride + i]+=128;
 
 3731             dst[ (black)*dstStride + i]+=128;
 
 3741 #undef TEMPLATE_PP_C 
 3742 #undef TEMPLATE_PP_ALTIVEC 
 3743 #undef TEMPLATE_PP_MMX 
 3744 #undef TEMPLATE_PP_MMXEXT 
 3745 #undef TEMPLATE_PP_3DNOW 
 3746 #undef TEMPLATE_PP_SSE2 
static int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
#define AV_LOG_WARNING
Something somehow does not look correct. 
static void transpose_16x8_char_toPackedAlign_altivec(unsigned char *dst, unsigned char *src, int stride)
#define LINEAR_BLEND_DEINT_FILTER
static av_cold int end(AVCodecContext *avctx)
static void transpose_8x16_char_fromPackedAlign_altivec(unsigned char *dst, unsigned char *src, int stride)
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory. 
#define LOWPASS5_DEINT_FILTER
static void horizX1Filter(uint8_t *src, int stride, int QP)
Experimental Filter 1 (Horizontal) will not damage linear gradients Flat blocks should look like they...
static void linecpy(void *dest, const void *src, int lines, int stride)
#define MEDIAN_DEINT_FILTER
#define CUBIC_IPOL_DEINT_FILTER
static const uint8_t offset[127][2]
int64_t av_rescale(int64_t a, int64_t b, int64_t c)
Rescale a 64-bit integer with rounding to nearest. 
#define LINEAR_IPOL_DEINT_FILTER
#define XMM_CLOBBERS(...)
#define FFMPEG_DEINT_FILTER
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
static void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext *c)
static void error(const char *err)
#define AV_LOG_INFO
Standard information. 
Rational number (pair of numerator and denominator). 
GLint GLenum GLboolean GLsizei stride
static int ref[MAX_W *MAX_W]
static void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
static av_always_inline int diff(const uint32_t a, const uint32_t b)
#define TEMP_NOISE_FILTER
#define LEVEL_FIX
Brightness & Contrast. 
mode
Use these values in ebur128_init (or'ed). 
static void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
#define NAMED_CONSTRAINTS_ADD(...)