00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076 #include "config.h"
00077 #include "libavutil/avutil.h"
00078 #include <inttypes.h>
00079 #include <stdio.h>
00080 #include <stdlib.h>
00081 #include <string.h>
00082
00083
00084
00085
00086
00087 #include "postprocess.h"
00088 #include "postprocess_internal.h"
00089
00090 unsigned postproc_version(void)
00091 {
00092 return LIBPOSTPROC_VERSION_INT;
00093 }
00094
00095 #if HAVE_ALTIVEC_H
00096 #include <altivec.h>
00097 #endif
00098
00099 #define GET_MODE_BUFFER_SIZE 500
00100 #define OPTIONS_ARRAY_SIZE 10
00101 #define BLOCK_SIZE 8
00102 #define TEMP_STRIDE 8
00103
00104
00105 #if ARCH_X86
00106 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
00107 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
00108 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
00109 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
00110 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
00111 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
00112 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
00113 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
00114 #endif
00115
00116 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
00117
00118
00119 static struct PPFilter filters[]=
00120 {
00121 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
00122 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
00123
00124
00125 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
00126 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
00127 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
00128 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
00129 {"dr", "dering", 1, 5, 6, DERING},
00130 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
00131 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
00132 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
00133 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
00134 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
00135 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
00136 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
00137 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
00138 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
00139 {NULL, NULL,0,0,0,0}
00140 };
00141
00142 static const char *replaceTable[]=
00143 {
00144 "default", "hb:a,vb:a,dr:a",
00145 "de", "hb:a,vb:a,dr:a",
00146 "fast", "h1:a,v1:a,dr:a",
00147 "fa", "h1:a,v1:a,dr:a",
00148 "ac", "ha:a:128:7,va:a,dr:a",
00149 NULL
00150 };
00151
00152
00153 #if ARCH_X86
00154 static inline void prefetchnta(void *p)
00155 {
00156 __asm__ volatile( "prefetchnta (%0)\n\t"
00157 : : "r" (p)
00158 );
00159 }
00160
00161 static inline void prefetcht0(void *p)
00162 {
00163 __asm__ volatile( "prefetcht0 (%0)\n\t"
00164 : : "r" (p)
00165 );
00166 }
00167
00168 static inline void prefetcht1(void *p)
00169 {
00170 __asm__ volatile( "prefetcht1 (%0)\n\t"
00171 : : "r" (p)
00172 );
00173 }
00174
00175 static inline void prefetcht2(void *p)
00176 {
00177 __asm__ volatile( "prefetcht2 (%0)\n\t"
00178 : : "r" (p)
00179 );
00180 }
00181 #endif
00182
00183
00184
00185
00189 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
00190 {
00191 int numEq= 0;
00192 int y;
00193 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00194 const int dcThreshold= dcOffset*2 + 1;
00195
00196 for(y=0; y<BLOCK_SIZE; y++){
00197 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
00198 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
00199 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
00200 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
00201 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
00202 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
00203 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
00204 src+= stride;
00205 }
00206 return numEq > c->ppMode.flatnessThreshold;
00207 }
00208
00212 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
00213 {
00214 int numEq= 0;
00215 int y;
00216 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00217 const int dcThreshold= dcOffset*2 + 1;
00218
00219 src+= stride*4;
00220 for(y=0; y<BLOCK_SIZE-1; y++){
00221 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
00222 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
00223 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
00224 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
00225 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
00226 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
00227 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
00228 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
00229 src+= stride;
00230 }
00231 return numEq > c->ppMode.flatnessThreshold;
00232 }
00233
00234 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
00235 {
00236 int i;
00237 #if 1
00238 for(i=0; i<2; i++){
00239 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
00240 src += stride;
00241 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
00242 src += stride;
00243 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
00244 src += stride;
00245 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
00246 src += stride;
00247 }
00248 #else
00249 for(i=0; i<8; i++){
00250 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
00251 src += stride;
00252 }
00253 #endif
00254 return 1;
00255 }
00256
00257 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
00258 {
00259 #if 1
00260 #if 1
00261 int x;
00262 src+= stride*4;
00263 for(x=0; x<BLOCK_SIZE; x+=4){
00264 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
00265 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
00266 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
00267 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
00268 }
00269 #else
00270 int x;
00271 src+= stride*3;
00272 for(x=0; x<BLOCK_SIZE; x++){
00273 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
00274 }
00275 #endif
00276 return 1;
00277 #else
00278 int x;
00279 src+= stride*4;
00280 for(x=0; x<BLOCK_SIZE; x++){
00281 int min=255;
00282 int max=0;
00283 int y;
00284 for(y=0; y<8; y++){
00285 int v= src[x + y*stride];
00286 if(v>max) max=v;
00287 if(v<min) min=v;
00288 }
00289 if(max-min > 2*QP) return 0;
00290 }
00291 return 1;
00292 #endif
00293 }
00294
00295 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
00296 {
00297 if( isHorizDC_C(src, stride, c) ){
00298 if( isHorizMinMaxOk_C(src, stride, c->QP) )
00299 return 1;
00300 else
00301 return 0;
00302 }else{
00303 return 2;
00304 }
00305 }
00306
00307 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
00308 {
00309 if( isVertDC_C(src, stride, c) ){
00310 if( isVertMinMaxOk_C(src, stride, c->QP) )
00311 return 1;
00312 else
00313 return 0;
00314 }else{
00315 return 2;
00316 }
00317 }
00318
00319 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
00320 {
00321 int y;
00322 for(y=0; y<BLOCK_SIZE; y++){
00323 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
00324
00325 if(FFABS(middleEnergy) < 8*c->QP){
00326 const int q=(dst[3] - dst[4])/2;
00327 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
00328 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
00329
00330 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00331 d= FFMAX(d, 0);
00332
00333 d= (5*d + 32) >> 6;
00334 d*= FFSIGN(-middleEnergy);
00335
00336 if(q>0)
00337 {
00338 d= d<0 ? 0 : d;
00339 d= d>q ? q : d;
00340 }
00341 else
00342 {
00343 d= d>0 ? 0 : d;
00344 d= d<q ? q : d;
00345 }
00346
00347 dst[3]-= d;
00348 dst[4]+= d;
00349 }
00350 dst+= stride;
00351 }
00352 }
00353
00358 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
00359 {
00360 int y;
00361 for(y=0; y<BLOCK_SIZE; y++){
00362 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
00363 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
00364
00365 int sums[10];
00366 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
00367 sums[1] = sums[0] - first + dst[3];
00368 sums[2] = sums[1] - first + dst[4];
00369 sums[3] = sums[2] - first + dst[5];
00370 sums[4] = sums[3] - first + dst[6];
00371 sums[5] = sums[4] - dst[0] + dst[7];
00372 sums[6] = sums[5] - dst[1] + last;
00373 sums[7] = sums[6] - dst[2] + last;
00374 sums[8] = sums[7] - dst[3] + last;
00375 sums[9] = sums[8] - dst[4] + last;
00376
00377 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
00378 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
00379 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
00380 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
00381 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
00382 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
00383 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
00384 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
00385
00386 dst+= stride;
00387 }
00388 }
00389
00398 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
00399 {
00400 int y;
00401 static uint64_t *lut= NULL;
00402 if(lut==NULL)
00403 {
00404 int i;
00405 lut = av_malloc(256*8);
00406 for(i=0; i<256; i++)
00407 {
00408 int v= i < 128 ? 2*i : 2*(i-256);
00409
00410
00411
00412
00413
00414
00415
00416
00417 uint64_t a= (v/16) & 0xFF;
00418 uint64_t b= (v*3/16) & 0xFF;
00419 uint64_t c= (v*5/16) & 0xFF;
00420 uint64_t d= (7*v/16) & 0xFF;
00421 uint64_t A= (0x100 - a)&0xFF;
00422 uint64_t B= (0x100 - b)&0xFF;
00423 uint64_t C= (0x100 - c)&0xFF;
00424 uint64_t D= (0x100 - c)&0xFF;
00425
00426 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
00427 (D<<24) | (C<<16) | (B<<8) | (A);
00428
00429 }
00430 }
00431
00432 for(y=0; y<BLOCK_SIZE; y++){
00433 int a= src[1] - src[2];
00434 int b= src[3] - src[4];
00435 int c= src[5] - src[6];
00436
00437 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
00438
00439 if(d < QP){
00440 int v = d * FFSIGN(-b);
00441
00442 src[1] +=v/8;
00443 src[2] +=v/4;
00444 src[3] +=3*v/8;
00445 src[4] -=3*v/8;
00446 src[5] -=v/4;
00447 src[6] -=v/8;
00448 }
00449 src+=stride;
00450 }
00451 }
00452
00456 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
00457 int y;
00458 const int QP= c->QP;
00459 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00460 const int dcThreshold= dcOffset*2 + 1;
00461
00462 src+= step*4;
00463 for(y=0; y<8; y++){
00464 int numEq= 0;
00465
00466 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
00467 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
00468 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
00469 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
00470 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
00471 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
00472 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
00473 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
00474 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
00475 if(numEq > c->ppMode.flatnessThreshold){
00476 int min, max, x;
00477
00478 if(src[0] > src[step]){
00479 max= src[0];
00480 min= src[step];
00481 }else{
00482 max= src[step];
00483 min= src[0];
00484 }
00485 for(x=2; x<8; x+=2){
00486 if(src[x*step] > src[(x+1)*step]){
00487 if(src[x *step] > max) max= src[ x *step];
00488 if(src[(x+1)*step] < min) min= src[(x+1)*step];
00489 }else{
00490 if(src[(x+1)*step] > max) max= src[(x+1)*step];
00491 if(src[ x *step] < min) min= src[ x *step];
00492 }
00493 }
00494 if(max-min < 2*QP){
00495 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
00496 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
00497
00498 int sums[10];
00499 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
00500 sums[1] = sums[0] - first + src[3*step];
00501 sums[2] = sums[1] - first + src[4*step];
00502 sums[3] = sums[2] - first + src[5*step];
00503 sums[4] = sums[3] - first + src[6*step];
00504 sums[5] = sums[4] - src[0*step] + src[7*step];
00505 sums[6] = sums[5] - src[1*step] + last;
00506 sums[7] = sums[6] - src[2*step] + last;
00507 sums[8] = sums[7] - src[3*step] + last;
00508 sums[9] = sums[8] - src[4*step] + last;
00509
00510 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
00511 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
00512 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
00513 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
00514 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
00515 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
00516 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
00517 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
00518 }
00519 }else{
00520 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
00521
00522 if(FFABS(middleEnergy) < 8*QP){
00523 const int q=(src[3*step] - src[4*step])/2;
00524 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
00525 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
00526
00527 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00528 d= FFMAX(d, 0);
00529
00530 d= (5*d + 32) >> 6;
00531 d*= FFSIGN(-middleEnergy);
00532
00533 if(q>0){
00534 d= d<0 ? 0 : d;
00535 d= d>q ? q : d;
00536 }else{
00537 d= d>0 ? 0 : d;
00538 d= d<q ? q : d;
00539 }
00540
00541 src[3*step]-= d;
00542 src[4*step]+= d;
00543 }
00544 }
00545
00546 src += stride;
00547 }
00548
00549
00550
00551
00552
00553 }
00554
00555
00556
00557 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
00558 #define COMPILE_C
00559 #endif
00560
00561 #if HAVE_ALTIVEC
00562 #define COMPILE_ALTIVEC
00563 #endif //HAVE_ALTIVEC
00564
00565 #if ARCH_X86
00566
00567 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00568 #define COMPILE_MMX
00569 #endif
00570
00571 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
00572 #define COMPILE_MMX2
00573 #endif
00574
00575 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00576 #define COMPILE_3DNOW
00577 #endif
00578 #endif
00579
00580 #undef HAVE_MMX
00581 #define HAVE_MMX 0
00582 #undef HAVE_MMX2
00583 #define HAVE_MMX2 0
00584 #undef HAVE_AMD3DNOW
00585 #define HAVE_AMD3DNOW 0
00586 #undef HAVE_ALTIVEC
00587 #define HAVE_ALTIVEC 0
00588
00589 #ifdef COMPILE_C
00590 #define RENAME(a) a ## _C
00591 #include "postprocess_template.c"
00592 #endif
00593
00594 #ifdef COMPILE_ALTIVEC
00595 #undef RENAME
00596 #undef HAVE_ALTIVEC
00597 #define HAVE_ALTIVEC 1
00598 #define RENAME(a) a ## _altivec
00599 #include "postprocess_altivec_template.c"
00600 #include "postprocess_template.c"
00601 #endif
00602
00603
00604 #ifdef COMPILE_MMX
00605 #undef RENAME
00606 #undef HAVE_MMX
00607 #define HAVE_MMX 1
00608 #define RENAME(a) a ## _MMX
00609 #include "postprocess_template.c"
00610 #endif
00611
00612
00613 #ifdef COMPILE_MMX2
00614 #undef RENAME
00615 #undef HAVE_MMX
00616 #undef HAVE_MMX2
00617 #define HAVE_MMX 1
00618 #define HAVE_MMX2 1
00619 #define RENAME(a) a ## _MMX2
00620 #include "postprocess_template.c"
00621 #endif
00622
00623
00624 #ifdef COMPILE_3DNOW
00625 #undef RENAME
00626 #undef HAVE_MMX
00627 #undef HAVE_MMX2
00628 #undef HAVE_AMD3DNOW
00629 #define HAVE_MMX 1
00630 #define HAVE_MMX2 0
00631 #define HAVE_AMD3DNOW 1
00632 #define RENAME(a) a ## _3DNow
00633 #include "postprocess_template.c"
00634 #endif
00635
00636
00637
00638 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00639 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
00640 {
00641 PPContext *c= (PPContext *)vc;
00642 PPMode *ppMode= (PPMode *)vm;
00643 c->ppMode= *ppMode;
00644
00645
00646
00647
00648 #if CONFIG_RUNTIME_CPUDETECT
00649 #if ARCH_X86
00650
00651 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
00652 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00653 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
00654 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00655 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
00656 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00657 else
00658 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00659 #else
00660 #if HAVE_ALTIVEC
00661 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
00662 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00663 else
00664 #endif
00665 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00666 #endif
00667 #else //CONFIG_RUNTIME_CPUDETECT
00668 #if HAVE_MMX2
00669 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00670 #elif HAVE_AMD3DNOW
00671 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00672 #elif HAVE_MMX
00673 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00674 #elif HAVE_ALTIVEC
00675 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00676 #else
00677 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00678 #endif
00679 #endif
00680 }
00681
00682
00683
00684
00685
00686
00687 #if LIBPOSTPROC_VERSION_INT < (52<<16)
00688 const char *const pp_help=
00689 #else
00690 const char pp_help[] =
00691 #endif
00692 "Available postprocessing filters:\n"
00693 "Filters Options\n"
00694 "short long name short long option Description\n"
00695 "* * a autoq CPU power dependent enabler\n"
00696 " c chrom chrominance filtering enabled\n"
00697 " y nochrom chrominance filtering disabled\n"
00698 " n noluma luma filtering disabled\n"
00699 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
00700 " 1. difference factor: default=32, higher -> more deblocking\n"
00701 " 2. flatness threshold: default=39, lower -> more deblocking\n"
00702 " the h & v deblocking filters share these\n"
00703 " so you can't set different thresholds for h / v\n"
00704 "vb vdeblock (2 threshold) vertical deblocking filter\n"
00705 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
00706 "va vadeblock (2 threshold) vertical deblocking filter\n"
00707 "h1 x1hdeblock experimental h deblock filter 1\n"
00708 "v1 x1vdeblock experimental v deblock filter 1\n"
00709 "dr dering deringing filter\n"
00710 "al autolevels automatic brightness / contrast\n"
00711 " f fullyrange stretch luminance to (0..255)\n"
00712 "lb linblenddeint linear blend deinterlacer\n"
00713 "li linipoldeint linear interpolating deinterlace\n"
00714 "ci cubicipoldeint cubic interpolating deinterlacer\n"
00715 "md mediandeint median deinterlacer\n"
00716 "fd ffmpegdeint ffmpeg deinterlacer\n"
00717 "l5 lowpass5 FIR lowpass deinterlacer\n"
00718 "de default hb:a,vb:a,dr:a\n"
00719 "fa fast h1:a,v1:a,dr:a\n"
00720 "ac ha:a:128:7,va:a,dr:a\n"
00721 "tn tmpnoise (3 threshold) temporal noise reducer\n"
00722 " 1. <= 2. <= 3. larger -> stronger filtering\n"
00723 "fq forceQuant <quantizer> force quantizer\n"
00724 "Usage:\n"
00725 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
00726 "long form example:\n"
00727 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
00728 "short form example:\n"
00729 "vb:a/hb:a/lb de,-vb\n"
00730 "more examples:\n"
00731 "tn:64:128:256\n"
00732 "\n"
00733 ;
00734
00735 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
00736 {
00737 char temp[GET_MODE_BUFFER_SIZE];
00738 char *p= temp;
00739 static const char filterDelimiters[] = ",/";
00740 static const char optionDelimiters[] = ":";
00741 struct PPMode *ppMode;
00742 char *filterToken;
00743
00744 ppMode= av_malloc(sizeof(PPMode));
00745
00746 ppMode->lumMode= 0;
00747 ppMode->chromMode= 0;
00748 ppMode->maxTmpNoise[0]= 700;
00749 ppMode->maxTmpNoise[1]= 1500;
00750 ppMode->maxTmpNoise[2]= 3000;
00751 ppMode->maxAllowedY= 234;
00752 ppMode->minAllowedY= 16;
00753 ppMode->baseDcDiff= 256/8;
00754 ppMode->flatnessThreshold= 56-16-1;
00755 ppMode->maxClippedThreshold= 0.01;
00756 ppMode->error=0;
00757
00758 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
00759
00760 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
00761
00762 for(;;){
00763 char *filterName;
00764 int q= 1000000;
00765 int chrom=-1;
00766 int luma=-1;
00767 char *option;
00768 char *options[OPTIONS_ARRAY_SIZE];
00769 int i;
00770 int filterNameOk=0;
00771 int numOfUnknownOptions=0;
00772 int enable=1;
00773
00774 filterToken= strtok(p, filterDelimiters);
00775 if(filterToken == NULL) break;
00776 p+= strlen(filterToken) + 1;
00777 filterName= strtok(filterToken, optionDelimiters);
00778 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
00779
00780 if(*filterName == '-'){
00781 enable=0;
00782 filterName++;
00783 }
00784
00785 for(;;){
00786 option= strtok(NULL, optionDelimiters);
00787 if(option == NULL) break;
00788
00789 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
00790 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
00791 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
00792 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
00793 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
00794 else{
00795 options[numOfUnknownOptions] = option;
00796 numOfUnknownOptions++;
00797 }
00798 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
00799 }
00800 options[numOfUnknownOptions] = NULL;
00801
00802
00803 for(i=0; replaceTable[2*i]!=NULL; i++){
00804 if(!strcmp(replaceTable[2*i], filterName)){
00805 int newlen= strlen(replaceTable[2*i + 1]);
00806 int plen;
00807 int spaceLeft;
00808
00809 if(p==NULL) p= temp, *p=0;
00810 else p--, *p=',';
00811
00812 plen= strlen(p);
00813 spaceLeft= p - temp + plen;
00814 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE){
00815 ppMode->error++;
00816 break;
00817 }
00818 memmove(p + newlen, p, plen+1);
00819 memcpy(p, replaceTable[2*i + 1], newlen);
00820 filterNameOk=1;
00821 }
00822 }
00823
00824 for(i=0; filters[i].shortName!=NULL; i++){
00825 if( !strcmp(filters[i].longName, filterName)
00826 || !strcmp(filters[i].shortName, filterName)){
00827 ppMode->lumMode &= ~filters[i].mask;
00828 ppMode->chromMode &= ~filters[i].mask;
00829
00830 filterNameOk=1;
00831 if(!enable) break;
00832
00833 if(q >= filters[i].minLumQuality && luma)
00834 ppMode->lumMode|= filters[i].mask;
00835 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
00836 if(q >= filters[i].minChromQuality)
00837 ppMode->chromMode|= filters[i].mask;
00838
00839 if(filters[i].mask == LEVEL_FIX){
00840 int o;
00841 ppMode->minAllowedY= 16;
00842 ppMode->maxAllowedY= 234;
00843 for(o=0; options[o]!=NULL; o++){
00844 if( !strcmp(options[o],"fullyrange")
00845 ||!strcmp(options[o],"f")){
00846 ppMode->minAllowedY= 0;
00847 ppMode->maxAllowedY= 255;
00848 numOfUnknownOptions--;
00849 }
00850 }
00851 }
00852 else if(filters[i].mask == TEMP_NOISE_FILTER)
00853 {
00854 int o;
00855 int numOfNoises=0;
00856
00857 for(o=0; options[o]!=NULL; o++){
00858 char *tail;
00859 ppMode->maxTmpNoise[numOfNoises]=
00860 strtol(options[o], &tail, 0);
00861 if(tail!=options[o]){
00862 numOfNoises++;
00863 numOfUnknownOptions--;
00864 if(numOfNoises >= 3) break;
00865 }
00866 }
00867 }
00868 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
00869 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
00870 int o;
00871
00872 for(o=0; options[o]!=NULL && o<2; o++){
00873 char *tail;
00874 int val= strtol(options[o], &tail, 0);
00875 if(tail==options[o]) break;
00876
00877 numOfUnknownOptions--;
00878 if(o==0) ppMode->baseDcDiff= val;
00879 else ppMode->flatnessThreshold= val;
00880 }
00881 }
00882 else if(filters[i].mask == FORCE_QUANT){
00883 int o;
00884 ppMode->forcedQuant= 15;
00885
00886 for(o=0; options[o]!=NULL && o<1; o++){
00887 char *tail;
00888 int val= strtol(options[o], &tail, 0);
00889 if(tail==options[o]) break;
00890
00891 numOfUnknownOptions--;
00892 ppMode->forcedQuant= val;
00893 }
00894 }
00895 }
00896 }
00897 if(!filterNameOk) ppMode->error++;
00898 ppMode->error += numOfUnknownOptions;
00899 }
00900
00901 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
00902 if(ppMode->error){
00903 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
00904 av_free(ppMode);
00905 return NULL;
00906 }
00907 return ppMode;
00908 }
00909
00910 void pp_free_mode(pp_mode *mode){
00911 av_free(mode);
00912 }
00913
00914 static void reallocAlign(void **p, int alignment, int size){
00915 av_free(*p);
00916 *p= av_mallocz(size);
00917 }
00918
00919 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
00920 int mbWidth = (width+15)>>4;
00921 int mbHeight= (height+15)>>4;
00922 int i;
00923
00924 c->stride= stride;
00925 c->qpStride= qpStride;
00926
00927 reallocAlign((void **)&c->tempDst, 8, stride*24);
00928 reallocAlign((void **)&c->tempSrc, 8, stride*24);
00929 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
00930 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
00931 for(i=0; i<256; i++)
00932 c->yHistogram[i]= width*height/64*15/256;
00933
00934 for(i=0; i<3; i++){
00935
00936 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
00937 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);
00938 }
00939
00940 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
00941 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00942 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00943 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
00944 }
00945
00946 static const char * context_to_name(void * ptr) {
00947 return "postproc";
00948 }
00949
00950 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
00951
00952 pp_context *pp_get_context(int width, int height, int cpuCaps){
00953 PPContext *c= av_malloc(sizeof(PPContext));
00954 int stride= (width+15)&(~15);
00955 int qpStride= (width+15)/16 + 2;
00956
00957 memset(c, 0, sizeof(PPContext));
00958 c->av_class = &av_codec_context_class;
00959 c->cpuCaps= cpuCaps;
00960 if(cpuCaps&PP_FORMAT){
00961 c->hChromaSubSample= cpuCaps&0x3;
00962 c->vChromaSubSample= (cpuCaps>>4)&0x3;
00963 }else{
00964 c->hChromaSubSample= 1;
00965 c->vChromaSubSample= 1;
00966 }
00967
00968 reallocBuffers(c, width, height, stride, qpStride);
00969
00970 c->frameNum=-1;
00971
00972 return c;
00973 }
00974
00975 void pp_free_context(void *vc){
00976 PPContext *c = (PPContext*)vc;
00977 int i;
00978
00979 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
00980 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
00981
00982 av_free(c->tempBlocks);
00983 av_free(c->yHistogram);
00984 av_free(c->tempDst);
00985 av_free(c->tempSrc);
00986 av_free(c->deintTemp);
00987 av_free(c->stdQPTable);
00988 av_free(c->nonBQPTable);
00989 av_free(c->forcedQPTable);
00990
00991 memset(c, 0, sizeof(PPContext));
00992
00993 av_free(c);
00994 }
00995
00996 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
00997 uint8_t * dst[3], const int dstStride[3],
00998 int width, int height,
00999 const QP_STORE_T *QP_store, int QPStride,
01000 pp_mode *vm, void *vc, int pict_type)
01001 {
01002 int mbWidth = (width+15)>>4;
01003 int mbHeight= (height+15)>>4;
01004 PPMode *mode = (PPMode*)vm;
01005 PPContext *c = (PPContext*)vc;
01006 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
01007 int absQPStride = FFABS(QPStride);
01008
01009
01010 if(c->stride < minStride || c->qpStride < absQPStride)
01011 reallocBuffers(c, width, height,
01012 FFMAX(minStride, c->stride),
01013 FFMAX(c->qpStride, absQPStride));
01014
01015 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
01016 int i;
01017 QP_store= c->forcedQPTable;
01018 absQPStride = QPStride = 0;
01019 if(mode->lumMode & FORCE_QUANT)
01020 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
01021 else
01022 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
01023 }
01024
01025 if(pict_type & PP_PICT_TYPE_QP2){
01026 int i;
01027 const int count= mbHeight * absQPStride;
01028 for(i=0; i<(count>>2); i++){
01029 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
01030 }
01031 for(i<<=2; i<count; i++){
01032 c->stdQPTable[i] = QP_store[i]>>1;
01033 }
01034 QP_store= c->stdQPTable;
01035 QPStride= absQPStride;
01036 }
01037
01038 if(0){
01039 int x,y;
01040 for(y=0; y<mbHeight; y++){
01041 for(x=0; x<mbWidth; x++){
01042 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
01043 }
01044 av_log(c, AV_LOG_INFO, "\n");
01045 }
01046 av_log(c, AV_LOG_INFO, "\n");
01047 }
01048
01049 if((pict_type&7)!=3){
01050 if (QPStride >= 0){
01051 int i;
01052 const int count= mbHeight * QPStride;
01053 for(i=0; i<(count>>2); i++){
01054 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
01055 }
01056 for(i<<=2; i<count; i++){
01057 c->nonBQPTable[i] = QP_store[i] & 0x3F;
01058 }
01059 } else {
01060 int i,j;
01061 for(i=0; i<mbHeight; i++) {
01062 for(j=0; j<absQPStride; j++) {
01063 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
01064 }
01065 }
01066 }
01067 }
01068
01069 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
01070 mode->lumMode, mode->chromMode);
01071
01072 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
01073 width, height, QP_store, QPStride, 0, mode, c);
01074
01075 width = (width )>>c->hChromaSubSample;
01076 height = (height)>>c->vChromaSubSample;
01077
01078 if(mode->chromMode){
01079 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
01080 width, height, QP_store, QPStride, 1, mode, c);
01081 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
01082 width, height, QP_store, QPStride, 2, mode, c);
01083 }
01084 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
01085 linecpy(dst[1], src[1], height, srcStride[1]);
01086 linecpy(dst[2], src[2], height, srcStride[2]);
01087 }else{
01088 int y;
01089 for(y=0; y<height; y++){
01090 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
01091 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
01092 }
01093 }
01094 }
01095