92 #include "libavutil/ffversion.h" 
  103     return FFMPEG_CONFIGURATION;
 
  108 #define LICENSE_PREFIX "libpostproc license: " 
  116 #define GET_MODE_BUFFER_SIZE 500 
  117 #define OPTIONS_ARRAY_SIZE 10 
  119 #define TEMP_STRIDE 8 
  122 #if ARCH_X86 && HAVE_INLINE_ASM 
  146     {
"dr", 
"dering",                1, 5, 6, 
DERING},
 
  147     {
"al", 
"autolevels",            0, 1, 2, 
LEVEL_FIX},
 
  156     {
"be", 
"bitexact",              1, 0, 0, 
BITEXACT},
 
  163     "default",      
"hb:a,vb:a,dr:a",
 
  164     "de",           
"hb:a,vb:a,dr:a",
 
  165     "fast",         
"h1:a,v1:a,dr:a",
 
  166     "fa",           
"h1:a,v1:a,dr:a",
 
  167     "ac",           
"ha:a:128:7,va:a,dr:a",
 
  172 #if ARCH_X86 && HAVE_INLINE_ASM 
  173 static inline void prefetchnta(
const void *p)
 
  175     __asm__ 
volatile(   
"prefetchnta (%0)\n\t" 
  180 static inline void prefetcht0(
const void *p)
 
  182     __asm__ 
volatile(   
"prefetcht0 (%0)\n\t" 
  187 static inline void prefetcht1(
const void *p)
 
  189     __asm__ 
volatile(   
"prefetcht1 (%0)\n\t" 
  194 static inline void prefetcht2(
const void *p)
 
  196     __asm__ 
volatile(   
"prefetcht2 (%0)\n\t" 
  213     const int dcThreshold= dcOffset*2 + 1;
 
  216         numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
 
  217         numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
 
  218         numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
 
  219         numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
 
  220         numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
 
  221         numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
 
  222         numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
 
  236     const int dcThreshold= dcOffset*2 + 1;
 
  240         numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
 
  241         numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
 
  242         numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
 
  243         numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
 
  244         numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
 
  245         numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
 
  246         numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
 
  247         numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
 
  257         if((
unsigned)(src[0] - src[5] + 2*QP) > 4*QP) 
return 0;
 
  259         if((
unsigned)(src[2] - src[7] + 2*QP) > 4*QP) 
return 0;
 
  261         if((
unsigned)(src[4] - src[1] + 2*QP) > 4*QP) 
return 0;
 
  263         if((
unsigned)(src[6] - src[3] + 2*QP) > 4*QP) 
return 0;
 
  274         if((
unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) 
return 0;
 
  275         if((
unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) 
return 0;
 
  276         if((
unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) 
return 0;
 
  277         if((
unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) 
return 0;
 
  304         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
 
  306         if(
FFABS(middleEnergy) < 8*c->
QP){
 
  307             const int q=(dst[3] - dst[4])/2;
 
  308             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
 
  309             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
 
  315             d*= 
FFSIGN(-middleEnergy);
 
  343         const int first= 
FFABS(dst[-1] - dst[0]) < c->
QP ? dst[-1] : dst[0];
 
  344         const int last= 
FFABS(dst[8] - dst[7]) < c->
QP ? dst[8] : dst[7];
 
  347         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
 
  348         sums[1] = sums[0] - first  + dst[3];
 
  349         sums[2] = sums[1] - first  + dst[4];
 
  350         sums[3] = sums[2] - first  + dst[5];
 
  351         sums[4] = sums[3] - first  + dst[6];
 
  352         sums[5] = sums[4] - dst[0] + dst[7];
 
  353         sums[6] = sums[5] - dst[1] + last;
 
  354         sums[7] = sums[6] - dst[2] + last;
 
  355         sums[8] = sums[7] - dst[3] + last;
 
  356         sums[9] = sums[8] - dst[4] + last;
 
  358         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
 
  359         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
 
  360         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
 
  361         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
 
  362         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
 
  363         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
 
  364         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
 
  365         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
 
  382     static uint64_t lut[256];
 
  388             int v= i < 128 ? 2*i : 2*(i-256);
 
  397             uint64_t 
a= (v/16)   & 0xFF;
 
  398             uint64_t 
b= (v*3/16) & 0xFF;
 
  399             uint64_t 
c= (v*5/16) & 0xFF;
 
  400             uint64_t d= (7*v/16) & 0xFF;
 
  401             uint64_t 
A= (0x100 - 
a)&0xFF;
 
  402             uint64_t 
B= (0x100 - 
b)&0xFF;
 
  403             uint64_t 
C= (0x100 - 
c)&0xFF;
 
  404             uint64_t 
D= (0x100 - 
c)&0xFF;
 
  406             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
 
  407                        (D<<24) | (C<<16) | (B<<8)  | (A);
 
  413         int a= src[1] - src[2];
 
  414         int b= src[3] - src[4];
 
  415         int c= src[5] - src[6];
 
  442     const int dcThreshold= dcOffset*2 + 1;
 
  448         numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
 
  449         numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
 
  450         numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
 
  451         numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
 
  452         numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
 
  453         numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
 
  454         numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
 
  455         numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
 
  456         numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
 
  460             if(src[0] > src[step]){
 
  468                 if(src[x*step] > src[(x+1)*step]){
 
  469                         if(src[x    *step] > max) max= src[ x   *step];
 
  470                         if(src[(x+1)*step] < 
min) min= src[(x+1)*step];
 
  472                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
 
  473                         if(src[ x   *step] < min) min= src[ x   *step];
 
  477                 const int first= 
FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
 
  478                 const int last= 
FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
 
  481                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
 
  482                 sums[1] = sums[0] - first       + src[3*step];
 
  483                 sums[2] = sums[1] - first       + src[4*step];
 
  484                 sums[3] = sums[2] - first       + src[5*step];
 
  485                 sums[4] = sums[3] - first       + src[6*step];
 
  486                 sums[5] = sums[4] - src[0*step] + src[7*step];
 
  487                 sums[6] = sums[5] - src[1*step] + last;
 
  488                 sums[7] = sums[6] - src[2*step] + last;
 
  489                 sums[8] = sums[7] - src[3*step] + last;
 
  490                 sums[9] = sums[8] - src[4*step] + last;
 
  502                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
 
  503                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
 
  504                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
 
  505                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
 
  506                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
 
  507                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
 
  508                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
 
  509                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
 
  512             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
 
  514             if(
FFABS(middleEnergy) < 8*
QP){
 
  515                 const int q=(src[3*step] - src[4*step])/2;
 
  516                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
 
  517                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
 
  523                 d*= 
FFSIGN(-middleEnergy);
 
  534                     d= (d < 0) ? 32 : -32;
 
  535                     src[3*step]= av_clip_uint8(src[3*step] - d);
 
  536                     src[4*step]= av_clip_uint8(src[4*step] + d);
 
  557 #define TEMPLATE_PP_C 1 
  561 #   define TEMPLATE_PP_ALTIVEC 1 
  566 #if ARCH_X86 && HAVE_INLINE_ASM 
  567 #    if CONFIG_RUNTIME_CPUDETECT 
  568 #        define TEMPLATE_PP_MMX 1 
  570 #        define TEMPLATE_PP_MMXEXT 1 
  572 #        define TEMPLATE_PP_3DNOW 1 
  574 #        define TEMPLATE_PP_SSE2 1 
  577 #        if HAVE_SSE2_INLINE 
  578 #            define TEMPLATE_PP_SSE2 1 
  580 #        elif HAVE_MMXEXT_INLINE 
  581 #            define TEMPLATE_PP_MMXEXT 1 
  583 #        elif HAVE_AMD3DNOW_INLINE 
  584 #            define TEMPLATE_PP_3DNOW 1 
  586 #        elif HAVE_MMX_INLINE 
  587 #            define TEMPLATE_PP_MMX 1 
  599     pp_fn pp = postProcess_C;
 
  605 #if CONFIG_RUNTIME_CPUDETECT 
  606 #if ARCH_X86 && HAVE_INLINE_ASM 
  617         pp = postProcess_SSE2;
 
  618 #elif   HAVE_MMXEXT_INLINE 
  619         pp = postProcess_MMX2;
 
  620 #elif HAVE_AMD3DNOW_INLINE 
  621         pp = postProcess_3DNow;
 
  622 #elif HAVE_MMX_INLINE 
  623         pp = postProcess_MMX;
 
  625         pp = postProcess_altivec;
 
  630     pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 
  636 "Available postprocessing filters:\n" 
  638 "short  long name       short   long option     Description\n" 
  639 "*      *               a       autoq           CPU power dependent enabler\n" 
  640 "                       c       chrom           chrominance filtering enabled\n" 
  641 "                       y       nochrom         chrominance filtering disabled\n" 
  642 "                       n       noluma          luma filtering disabled\n" 
  643 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n" 
  644 "       1. difference factor: default=32, higher -> more deblocking\n" 
  645 "       2. flatness threshold: default=39, lower -> more deblocking\n" 
  646 "                       the h & v deblocking filters share these\n" 
  647 "                       so you can't set different thresholds for h / v\n" 
  648 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n" 
  649 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n" 
  650 "va     vadeblock       (2 threshold)           vertical deblocking filter\n" 
  651 "h1     x1hdeblock                              experimental h deblock filter 1\n" 
  652 "v1     x1vdeblock                              experimental v deblock filter 1\n" 
  653 "dr     dering                                  deringing filter\n" 
  654 "al     autolevels                              automatic brightness / contrast\n" 
  655 "                       f        fullyrange     stretch luminance to (0..255)\n" 
  656 "lb     linblenddeint                           linear blend deinterlacer\n" 
  657 "li     linipoldeint                            linear interpolating deinterlace\n" 
  658 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n" 
  659 "md     mediandeint                             median deinterlacer\n" 
  660 "fd     ffmpegdeint                             ffmpeg deinterlacer\n" 
  661 "l5     lowpass5                                FIR lowpass deinterlacer\n" 
  662 "de     default                                 hb:a,vb:a,dr:a\n" 
  663 "fa     fast                                    h1:a,v1:a,dr:a\n" 
  664 "ac                                             ha:a:128:7,va:a,dr:a\n" 
  665 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n" 
  666 "                     1. <= 2. <= 3.            larger -> stronger filtering\n" 
  667 "fq     forceQuant      <quantizer>             force quantizer\n" 
  669 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n" 
  670 "long form example:\n" 
  671 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n" 
  672 "short form example:\n" 
  673 "vb:a/hb:a/lb                                   de,-vb\n" 
  683     static const char filterDelimiters[] = 
",/";
 
  684     static const char optionDelimiters[] = 
":|";
 
  693     if (!strcmp(name, 
"help")) {
 
  695         for (p = 
pp_help; strchr(p, 
'\n'); p = strchr(p, 
'\n') + 1) {
 
  724         const char *filterName;
 
  732         int numOfUnknownOptions=0;
 
  736         filterToken= 
av_strtok(p, filterDelimiters, &tokstate);
 
  737         if(!filterToken) 
break;
 
  738         p+= strlen(filterToken) + 1; 
 
  739         filterName= 
av_strtok(filterToken, optionDelimiters, &tokstate);
 
  746         if(*filterName == 
'-'){
 
  756             if(!strcmp(
"autoq", option) || !strcmp(
"a", option)) q= quality;
 
  757             else if(!strcmp(
"nochrom", option) || !strcmp(
"y", option)) chrom=0;
 
  758             else if(!strcmp(
"chrom", option) || !strcmp(
"c", option)) chrom=1;
 
  759             else if(!strcmp(
"noluma", option) || !strcmp(
"n", option)) luma=0;
 
  761                 options[numOfUnknownOptions] = 
option;
 
  762                 numOfUnknownOptions++;
 
  766         options[numOfUnknownOptions] = 
NULL;
 
  778                 spaceLeft= p - temp + plen;
 
  783                 memmove(p + newlen, p, plen+1);
 
  790             if(   !strcmp(filters[i].longName, filterName)
 
  791                || !strcmp(filters[i].shortName, filterName)){
 
  798                 if(q >= filters[i].minLumQuality && luma)
 
  800                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
 
  801                     if(q >= filters[i].minChromQuality)
 
  808                     for(o=0; options[o]; o++){
 
  809                         if(  !strcmp(options[o],
"fullyrange")
 
  810                            ||!strcmp(options[o],
"f")){
 
  813                             numOfUnknownOptions--;
 
  822                     for(o=0; options[o]; o++){
 
  825                             strtol(options[o], &tail, 0);
 
  826                         if(tail!=options[o]){
 
  828                             numOfUnknownOptions--;
 
  829                             if(numOfNoises >= 3) 
break;
 
  837                     for(o=0; options[o] && o<2; o++){
 
  839                         int val= strtol(options[o], &tail, 0);
 
  840                         if(tail==options[o]) 
break;
 
  842                         numOfUnknownOptions--;
 
  851                     for(o=0; options[o] && o<1; o++){
 
  853                         int val= strtol(options[o], &tail, 0);
 
  854                         if(tail==options[o]) 
break;
 
  856                         numOfUnknownOptions--;
 
  862         if(!filterNameOk) ppMode->
error++;
 
  863         ppMode->
error += numOfUnknownOptions;
 
  885     int mbWidth = (width+15)>>4;
 
  886     int mbHeight= (height+15)>>4;
 
  920     int qpStride= (width+15)/16 + 2; 
 
  974                      uint8_t * dst[3], 
const int dstStride[3],
 
  977                      pp_mode *vm,  
void *vc, 
int pict_type)
 
  979     int mbWidth = (width+15)>>4;
 
  980     int mbHeight= (height+15)>>4;
 
  984     int absQPStride = 
FFABS(QPStride);
 
  995         absQPStride = QPStride = 0;
 
 1004         const int count= 
FFMAX(mbHeight * absQPStride, mbWidth);
 
 1005         for(i=0; i<(count>>2); i++){
 
 1006             ((uint32_t*)c->
stdQPTable)[i] = (((
const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
 
 1008         for(i<<=2; i<
count; i++){
 
 1012         QPStride= absQPStride;
 
 1017         for(y=0; y<mbHeight; y++){
 
 1018             for(x=0; x<mbWidth; x++){
 
 1026     if((pict_type&7)!=3){
 
 1029             const int count= 
FFMAX(mbHeight * QPStride, mbWidth);
 
 1030             for(i=0; i<(count>>2); i++){
 
 1031                 ((uint32_t*)c->
nonBQPTable)[i] = ((
const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
 
 1033             for(i<<=2; i<
count; i++){
 
 1038             for(i=0; i<mbHeight; i++) {
 
 1039                 for(j=0; j<absQPStride; j++) {
 
 1040                     c->
nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
 
 1049     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
 
 1050                 width, height, QP_store, QPStride, 0, mode, c);
 
 1052     if (!(src[1] && src[2] && dst[1] && dst[2]))
 
 1059         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
 
 1060                     width, height, QP_store, QPStride, 1, mode, c);
 
 1061         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
 
 1062                     width, height, QP_store, QPStride, 2, mode, c);
 
 1064     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
 
 1065         linecpy(dst[1], src[1], height, srcStride[1]);
 
 1066         linecpy(dst[2], src[2], height, srcStride[2]);
 
 1070             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
 
 1071             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);