00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #include "libavutil/imgutils.h"
00031 #include "avcodec.h"
00032 #include "dsputil.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "faanidct.h"
00036 #include "mathops.h"
00037 #include "mpegvideo.h"
00038 #include "config.h"
00039 #include "vorbis.h"
00040 #include "diracdsp.h"
00041
00042 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00043 uint32_t ff_squareTbl[512] = {0, };
00044
00045 #define pixeltmp int16_t
00046 #define BIT_DEPTH 9
00047 #include "dsputil_template.c"
00048 #undef BIT_DEPTH
00049
00050 #define BIT_DEPTH 10
00051 #include "dsputil_template.c"
00052 #undef BIT_DEPTH
00053
00054 #undef pixeltmp
00055 #define pixeltmp int32_t
00056 #define BIT_DEPTH 12
00057 #include "dsputil_template.c"
00058 #undef BIT_DEPTH
00059
00060 #define BIT_DEPTH 14
00061 #include "dsputil_template.c"
00062 #undef BIT_DEPTH
00063
00064 #undef pixeltmp
00065 #define pixeltmp int16_t
00066 #define BIT_DEPTH 8
00067 #include "dsputil_template.c"
00068 #undef pixeltmp
00069
00070
00071 #define pb_7f (~0UL/255 * 0x7f)
00072 #define pb_80 (~0UL/255 * 0x80)
00073
00074 const uint8_t ff_zigzag_direct[64] = {
00075 0, 1, 8, 16, 9, 2, 3, 10,
00076 17, 24, 32, 25, 18, 11, 4, 5,
00077 12, 19, 26, 33, 40, 48, 41, 34,
00078 27, 20, 13, 6, 7, 14, 21, 28,
00079 35, 42, 49, 56, 57, 50, 43, 36,
00080 29, 22, 15, 23, 30, 37, 44, 51,
00081 58, 59, 52, 45, 38, 31, 39, 46,
00082 53, 60, 61, 54, 47, 55, 62, 63
00083 };
00084
00085
00086
00087 const uint8_t ff_zigzag248_direct[64] = {
00088 0, 8, 1, 9, 16, 24, 2, 10,
00089 17, 25, 32, 40, 48, 56, 33, 41,
00090 18, 26, 3, 11, 4, 12, 19, 27,
00091 34, 42, 49, 57, 50, 58, 35, 43,
00092 20, 28, 5, 13, 6, 14, 21, 29,
00093 36, 44, 51, 59, 52, 60, 37, 45,
00094 22, 30, 7, 15, 23, 31, 38, 46,
00095 53, 61, 54, 62, 39, 47, 55, 63,
00096 };
00097
00098
00099 DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
00100
00101 const uint8_t ff_alternate_horizontal_scan[64] = {
00102 0, 1, 2, 3, 8, 9, 16, 17,
00103 10, 11, 4, 5, 6, 7, 15, 14,
00104 13, 12, 19, 18, 24, 25, 32, 33,
00105 26, 27, 20, 21, 22, 23, 28, 29,
00106 30, 31, 34, 35, 40, 41, 48, 49,
00107 42, 43, 36, 37, 38, 39, 44, 45,
00108 46, 47, 50, 51, 56, 57, 58, 59,
00109 52, 53, 54, 55, 60, 61, 62, 63,
00110 };
00111
00112 const uint8_t ff_alternate_vertical_scan[64] = {
00113 0, 8, 16, 24, 1, 9, 2, 10,
00114 17, 25, 32, 40, 48, 56, 57, 49,
00115 41, 33, 26, 18, 3, 11, 4, 12,
00116 19, 27, 34, 42, 50, 58, 35, 43,
00117 51, 59, 20, 28, 5, 13, 6, 14,
00118 21, 29, 36, 44, 52, 60, 37, 45,
00119 53, 61, 22, 30, 7, 15, 23, 31,
00120 38, 46, 54, 62, 39, 47, 55, 63,
00121 };
00122
00123
00124 static const uint8_t simple_mmx_permutation[64]={
00125 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00126 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00127 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00128 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00129 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00130 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00131 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00132 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00133 };
00134
00135 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00136
00137 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00138 int i;
00139 int end;
00140
00141 st->scantable= src_scantable;
00142
00143 for(i=0; i<64; i++){
00144 int j;
00145 j = src_scantable[i];
00146 st->permutated[i] = permutation[j];
00147 }
00148
00149 end=-1;
00150 for(i=0; i<64; i++){
00151 int j;
00152 j = st->permutated[i];
00153 if(j>end) end=j;
00154 st->raster_end[i]= end;
00155 }
00156 }
00157
00158 void ff_init_scantable_permutation(uint8_t *idct_permutation,
00159 int idct_permutation_type)
00160 {
00161 int i;
00162
00163 switch(idct_permutation_type){
00164 case FF_NO_IDCT_PERM:
00165 for(i=0; i<64; i++)
00166 idct_permutation[i]= i;
00167 break;
00168 case FF_LIBMPEG2_IDCT_PERM:
00169 for(i=0; i<64; i++)
00170 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00171 break;
00172 case FF_SIMPLE_IDCT_PERM:
00173 for(i=0; i<64; i++)
00174 idct_permutation[i]= simple_mmx_permutation[i];
00175 break;
00176 case FF_TRANSPOSE_IDCT_PERM:
00177 for(i=0; i<64; i++)
00178 idct_permutation[i]= ((i&7)<<3) | (i>>3);
00179 break;
00180 case FF_PARTTRANS_IDCT_PERM:
00181 for(i=0; i<64; i++)
00182 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
00183 break;
00184 case FF_SSE2_IDCT_PERM:
00185 for(i=0; i<64; i++)
00186 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
00187 break;
00188 default:
00189 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
00190 }
00191 }
00192
00193 static int pix_sum_c(uint8_t * pix, int line_size)
00194 {
00195 int s, i, j;
00196
00197 s = 0;
00198 for (i = 0; i < 16; i++) {
00199 for (j = 0; j < 16; j += 8) {
00200 s += pix[0];
00201 s += pix[1];
00202 s += pix[2];
00203 s += pix[3];
00204 s += pix[4];
00205 s += pix[5];
00206 s += pix[6];
00207 s += pix[7];
00208 pix += 8;
00209 }
00210 pix += line_size - 16;
00211 }
00212 return s;
00213 }
00214
00215 static int pix_norm1_c(uint8_t * pix, int line_size)
00216 {
00217 int s, i, j;
00218 uint32_t *sq = ff_squareTbl + 256;
00219
00220 s = 0;
00221 for (i = 0; i < 16; i++) {
00222 for (j = 0; j < 16; j += 8) {
00223 #if 0
00224 s += sq[pix[0]];
00225 s += sq[pix[1]];
00226 s += sq[pix[2]];
00227 s += sq[pix[3]];
00228 s += sq[pix[4]];
00229 s += sq[pix[5]];
00230 s += sq[pix[6]];
00231 s += sq[pix[7]];
00232 #else
00233 #if HAVE_FAST_64BIT
00234 register uint64_t x=*(uint64_t*)pix;
00235 s += sq[x&0xff];
00236 s += sq[(x>>8)&0xff];
00237 s += sq[(x>>16)&0xff];
00238 s += sq[(x>>24)&0xff];
00239 s += sq[(x>>32)&0xff];
00240 s += sq[(x>>40)&0xff];
00241 s += sq[(x>>48)&0xff];
00242 s += sq[(x>>56)&0xff];
00243 #else
00244 register uint32_t x=*(uint32_t*)pix;
00245 s += sq[x&0xff];
00246 s += sq[(x>>8)&0xff];
00247 s += sq[(x>>16)&0xff];
00248 s += sq[(x>>24)&0xff];
00249 x=*(uint32_t*)(pix+4);
00250 s += sq[x&0xff];
00251 s += sq[(x>>8)&0xff];
00252 s += sq[(x>>16)&0xff];
00253 s += sq[(x>>24)&0xff];
00254 #endif
00255 #endif
00256 pix += 8;
00257 }
00258 pix += line_size - 16;
00259 }
00260 return s;
00261 }
00262
00263 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00264 int i;
00265
00266 for(i=0; i+8<=w; i+=8){
00267 dst[i+0]= av_bswap32(src[i+0]);
00268 dst[i+1]= av_bswap32(src[i+1]);
00269 dst[i+2]= av_bswap32(src[i+2]);
00270 dst[i+3]= av_bswap32(src[i+3]);
00271 dst[i+4]= av_bswap32(src[i+4]);
00272 dst[i+5]= av_bswap32(src[i+5]);
00273 dst[i+6]= av_bswap32(src[i+6]);
00274 dst[i+7]= av_bswap32(src[i+7]);
00275 }
00276 for(;i<w; i++){
00277 dst[i+0]= av_bswap32(src[i+0]);
00278 }
00279 }
00280
00281 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
00282 {
00283 while (len--)
00284 *dst++ = av_bswap16(*src++);
00285 }
00286
00287 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00288 {
00289 int s, i;
00290 uint32_t *sq = ff_squareTbl + 256;
00291
00292 s = 0;
00293 for (i = 0; i < h; i++) {
00294 s += sq[pix1[0] - pix2[0]];
00295 s += sq[pix1[1] - pix2[1]];
00296 s += sq[pix1[2] - pix2[2]];
00297 s += sq[pix1[3] - pix2[3]];
00298 pix1 += line_size;
00299 pix2 += line_size;
00300 }
00301 return s;
00302 }
00303
00304 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00305 {
00306 int s, i;
00307 uint32_t *sq = ff_squareTbl + 256;
00308
00309 s = 0;
00310 for (i = 0; i < h; i++) {
00311 s += sq[pix1[0] - pix2[0]];
00312 s += sq[pix1[1] - pix2[1]];
00313 s += sq[pix1[2] - pix2[2]];
00314 s += sq[pix1[3] - pix2[3]];
00315 s += sq[pix1[4] - pix2[4]];
00316 s += sq[pix1[5] - pix2[5]];
00317 s += sq[pix1[6] - pix2[6]];
00318 s += sq[pix1[7] - pix2[7]];
00319 pix1 += line_size;
00320 pix2 += line_size;
00321 }
00322 return s;
00323 }
00324
00325 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00326 {
00327 int s, i;
00328 uint32_t *sq = ff_squareTbl + 256;
00329
00330 s = 0;
00331 for (i = 0; i < h; i++) {
00332 s += sq[pix1[ 0] - pix2[ 0]];
00333 s += sq[pix1[ 1] - pix2[ 1]];
00334 s += sq[pix1[ 2] - pix2[ 2]];
00335 s += sq[pix1[ 3] - pix2[ 3]];
00336 s += sq[pix1[ 4] - pix2[ 4]];
00337 s += sq[pix1[ 5] - pix2[ 5]];
00338 s += sq[pix1[ 6] - pix2[ 6]];
00339 s += sq[pix1[ 7] - pix2[ 7]];
00340 s += sq[pix1[ 8] - pix2[ 8]];
00341 s += sq[pix1[ 9] - pix2[ 9]];
00342 s += sq[pix1[10] - pix2[10]];
00343 s += sq[pix1[11] - pix2[11]];
00344 s += sq[pix1[12] - pix2[12]];
00345 s += sq[pix1[13] - pix2[13]];
00346 s += sq[pix1[14] - pix2[14]];
00347 s += sq[pix1[15] - pix2[15]];
00348
00349 pix1 += line_size;
00350 pix2 += line_size;
00351 }
00352 return s;
00353 }
00354
00355 static void diff_pixels_c(DCTELEM *av_restrict block, const uint8_t *s1,
00356 const uint8_t *s2, int stride){
00357 int i;
00358
00359
00360 for(i=0;i<8;i++) {
00361 block[0] = s1[0] - s2[0];
00362 block[1] = s1[1] - s2[1];
00363 block[2] = s1[2] - s2[2];
00364 block[3] = s1[3] - s2[3];
00365 block[4] = s1[4] - s2[4];
00366 block[5] = s1[5] - s2[5];
00367 block[6] = s1[6] - s2[6];
00368 block[7] = s1[7] - s2[7];
00369 s1 += stride;
00370 s2 += stride;
00371 block += 8;
00372 }
00373 }
00374
00375
00376 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *av_restrict pixels,
00377 int line_size)
00378 {
00379 int i;
00380
00381
00382 for(i=0;i<8;i++) {
00383 pixels[0] = av_clip_uint8(block[0]);
00384 pixels[1] = av_clip_uint8(block[1]);
00385 pixels[2] = av_clip_uint8(block[2]);
00386 pixels[3] = av_clip_uint8(block[3]);
00387 pixels[4] = av_clip_uint8(block[4]);
00388 pixels[5] = av_clip_uint8(block[5]);
00389 pixels[6] = av_clip_uint8(block[6]);
00390 pixels[7] = av_clip_uint8(block[7]);
00391
00392 pixels += line_size;
00393 block += 8;
00394 }
00395 }
00396
00397 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *av_restrict pixels,
00398 int line_size)
00399 {
00400 int i;
00401
00402
00403 for(i=0;i<4;i++) {
00404 pixels[0] = av_clip_uint8(block[0]);
00405 pixels[1] = av_clip_uint8(block[1]);
00406 pixels[2] = av_clip_uint8(block[2]);
00407 pixels[3] = av_clip_uint8(block[3]);
00408
00409 pixels += line_size;
00410 block += 8;
00411 }
00412 }
00413
00414 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *av_restrict pixels,
00415 int line_size)
00416 {
00417 int i;
00418
00419
00420 for(i=0;i<2;i++) {
00421 pixels[0] = av_clip_uint8(block[0]);
00422 pixels[1] = av_clip_uint8(block[1]);
00423
00424 pixels += line_size;
00425 block += 8;
00426 }
00427 }
00428
00429 static void put_signed_pixels_clamped_c(const DCTELEM *block,
00430 uint8_t *av_restrict pixels,
00431 int line_size)
00432 {
00433 int i, j;
00434
00435 for (i = 0; i < 8; i++) {
00436 for (j = 0; j < 8; j++) {
00437 if (*block < -128)
00438 *pixels = 0;
00439 else if (*block > 127)
00440 *pixels = 255;
00441 else
00442 *pixels = (uint8_t)(*block + 128);
00443 block++;
00444 pixels++;
00445 }
00446 pixels += (line_size - 8);
00447 }
00448 }
00449
00450 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *av_restrict pixels,
00451 int line_size)
00452 {
00453 int i;
00454
00455
00456 for(i=0;i<8;i++) {
00457 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00458 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00459 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
00460 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
00461 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
00462 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
00463 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
00464 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
00465 pixels += line_size;
00466 block += 8;
00467 }
00468 }
00469
00470 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *av_restrict pixels,
00471 int line_size)
00472 {
00473 int i;
00474
00475
00476 for(i=0;i<4;i++) {
00477 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00478 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00479 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
00480 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
00481 pixels += line_size;
00482 block += 8;
00483 }
00484 }
00485
00486 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *av_restrict pixels,
00487 int line_size)
00488 {
00489 int i;
00490
00491
00492 for(i=0;i<2;i++) {
00493 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00494 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00495 pixels += line_size;
00496 block += 8;
00497 }
00498 }
00499
00500 static int sum_abs_dctelem_c(DCTELEM *block)
00501 {
00502 int sum=0, i;
00503 for(i=0; i<64; i++)
00504 sum+= FFABS(block[i]);
00505 return sum;
00506 }
00507
00508 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
00509 {
00510 int i;
00511
00512 for (i = 0; i < h; i++) {
00513 memset(block, value, 16);
00514 block += line_size;
00515 }
00516 }
00517
00518 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
00519 {
00520 int i;
00521
00522 for (i = 0; i < h; i++) {
00523 memset(block, value, 8);
00524 block += line_size;
00525 }
00526 }
00527
00528 #define avg2(a,b) ((a+b+1)>>1)
00529 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
00530
00531 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
00532 {
00533 const int A=(16-x16)*(16-y16);
00534 const int B=( x16)*(16-y16);
00535 const int C=(16-x16)*( y16);
00536 const int D=( x16)*( y16);
00537 int i;
00538
00539 for(i=0; i<h; i++)
00540 {
00541 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
00542 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
00543 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
00544 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
00545 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
00546 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
00547 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
00548 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
00549 dst+= stride;
00550 src+= stride;
00551 }
00552 }
00553
00554 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
00555 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
00556 {
00557 int y, vx, vy;
00558 const int s= 1<<shift;
00559
00560 width--;
00561 height--;
00562
00563 for(y=0; y<h; y++){
00564 int x;
00565
00566 vx= ox;
00567 vy= oy;
00568 for(x=0; x<8; x++){
00569 int src_x, src_y, frac_x, frac_y, index;
00570
00571 src_x= vx>>16;
00572 src_y= vy>>16;
00573 frac_x= src_x&(s-1);
00574 frac_y= src_y&(s-1);
00575 src_x>>=shift;
00576 src_y>>=shift;
00577
00578 if((unsigned)src_x < width){
00579 if((unsigned)src_y < height){
00580 index= src_x + src_y*stride;
00581 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00582 + src[index +1]* frac_x )*(s-frac_y)
00583 + ( src[index+stride ]*(s-frac_x)
00584 + src[index+stride+1]* frac_x )* frac_y
00585 + r)>>(shift*2);
00586 }else{
00587 index= src_x + av_clip(src_y, 0, height)*stride;
00588 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00589 + src[index +1]* frac_x )*s
00590 + r)>>(shift*2);
00591 }
00592 }else{
00593 if((unsigned)src_y < height){
00594 index= av_clip(src_x, 0, width) + src_y*stride;
00595 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
00596 + src[index+stride ]* frac_y )*s
00597 + r)>>(shift*2);
00598 }else{
00599 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
00600 dst[y*stride + x]= src[index ];
00601 }
00602 }
00603
00604 vx+= dxx;
00605 vy+= dyx;
00606 }
00607 ox += dxy;
00608 oy += dyy;
00609 }
00610 }
00611
00612 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00613 switch(width){
00614 case 2: put_pixels2_8_c (dst, src, stride, height); break;
00615 case 4: put_pixels4_8_c (dst, src, stride, height); break;
00616 case 8: put_pixels8_8_c (dst, src, stride, height); break;
00617 case 16:put_pixels16_8_c(dst, src, stride, height); break;
00618 }
00619 }
00620
00621 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00622 int i,j;
00623 for (i=0; i < height; i++) {
00624 for (j=0; j < width; j++) {
00625 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
00626 }
00627 src += stride;
00628 dst += stride;
00629 }
00630 }
00631
00632 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00633 int i,j;
00634 for (i=0; i < height; i++) {
00635 for (j=0; j < width; j++) {
00636 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
00637 }
00638 src += stride;
00639 dst += stride;
00640 }
00641 }
00642
00643 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00644 int i,j;
00645 for (i=0; i < height; i++) {
00646 for (j=0; j < width; j++) {
00647 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
00648 }
00649 src += stride;
00650 dst += stride;
00651 }
00652 }
00653
00654 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00655 int i,j;
00656 for (i=0; i < height; i++) {
00657 for (j=0; j < width; j++) {
00658 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
00659 }
00660 src += stride;
00661 dst += stride;
00662 }
00663 }
00664
00665 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00666 int i,j;
00667 for (i=0; i < height; i++) {
00668 for (j=0; j < width; j++) {
00669 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00670 }
00671 src += stride;
00672 dst += stride;
00673 }
00674 }
00675
00676 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00677 int i,j;
00678 for (i=0; i < height; i++) {
00679 for (j=0; j < width; j++) {
00680 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
00681 }
00682 src += stride;
00683 dst += stride;
00684 }
00685 }
00686
00687 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00688 int i,j;
00689 for (i=0; i < height; i++) {
00690 for (j=0; j < width; j++) {
00691 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00692 }
00693 src += stride;
00694 dst += stride;
00695 }
00696 }
00697
00698 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00699 int i,j;
00700 for (i=0; i < height; i++) {
00701 for (j=0; j < width; j++) {
00702 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
00703 }
00704 src += stride;
00705 dst += stride;
00706 }
00707 }
00708
00709 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00710 switch(width){
00711 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
00712 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
00713 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
00714 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
00715 }
00716 }
00717
00718 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00719 int i,j;
00720 for (i=0; i < height; i++) {
00721 for (j=0; j < width; j++) {
00722 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
00723 }
00724 src += stride;
00725 dst += stride;
00726 }
00727 }
00728
00729 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00730 int i,j;
00731 for (i=0; i < height; i++) {
00732 for (j=0; j < width; j++) {
00733 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
00734 }
00735 src += stride;
00736 dst += stride;
00737 }
00738 }
00739
00740 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00741 int i,j;
00742 for (i=0; i < height; i++) {
00743 for (j=0; j < width; j++) {
00744 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
00745 }
00746 src += stride;
00747 dst += stride;
00748 }
00749 }
00750
00751 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00752 int i,j;
00753 for (i=0; i < height; i++) {
00754 for (j=0; j < width; j++) {
00755 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00756 }
00757 src += stride;
00758 dst += stride;
00759 }
00760 }
00761
00762 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00763 int i,j;
00764 for (i=0; i < height; i++) {
00765 for (j=0; j < width; j++) {
00766 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00767 }
00768 src += stride;
00769 dst += stride;
00770 }
00771 }
00772
00773 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00774 int i,j;
00775 for (i=0; i < height; i++) {
00776 for (j=0; j < width; j++) {
00777 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
00778 }
00779 src += stride;
00780 dst += stride;
00781 }
00782 }
00783
00784 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00785 int i,j;
00786 for (i=0; i < height; i++) {
00787 for (j=0; j < width; j++) {
00788 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00789 }
00790 src += stride;
00791 dst += stride;
00792 }
00793 }
00794
00795 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00796 int i,j;
00797 for (i=0; i < height; i++) {
00798 for (j=0; j < width; j++) {
00799 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00800 }
00801 src += stride;
00802 dst += stride;
00803 }
00804 }
00805
00806 #define QPEL_MC(r, OPNAME, RND, OP) \
00807 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00808 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00809 int i;\
00810 for(i=0; i<h; i++)\
00811 {\
00812 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
00813 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
00814 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
00815 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
00816 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
00817 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
00818 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
00819 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
00820 dst+=dstStride;\
00821 src+=srcStride;\
00822 }\
00823 }\
00824 \
00825 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00826 const int w=8;\
00827 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00828 int i;\
00829 for(i=0; i<w; i++)\
00830 {\
00831 const int src0= src[0*srcStride];\
00832 const int src1= src[1*srcStride];\
00833 const int src2= src[2*srcStride];\
00834 const int src3= src[3*srcStride];\
00835 const int src4= src[4*srcStride];\
00836 const int src5= src[5*srcStride];\
00837 const int src6= src[6*srcStride];\
00838 const int src7= src[7*srcStride];\
00839 const int src8= src[8*srcStride];\
00840 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
00841 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
00842 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
00843 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
00844 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
00845 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
00846 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
00847 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00848 dst++;\
00849 src++;\
00850 }\
00851 }\
00852 \
00853 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00854 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00855 int i;\
00856 \
00857 for(i=0; i<h; i++)\
00858 {\
00859 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
00860 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
00861 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
00862 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
00863 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
00864 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
00865 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
00866 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
00867 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
00868 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
00869 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
00870 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
00871 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
00872 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
00873 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
00874 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
00875 dst+=dstStride;\
00876 src+=srcStride;\
00877 }\
00878 }\
00879 \
00880 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00881 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00882 int i;\
00883 const int w=16;\
00884 for(i=0; i<w; i++)\
00885 {\
00886 const int src0= src[0*srcStride];\
00887 const int src1= src[1*srcStride];\
00888 const int src2= src[2*srcStride];\
00889 const int src3= src[3*srcStride];\
00890 const int src4= src[4*srcStride];\
00891 const int src5= src[5*srcStride];\
00892 const int src6= src[6*srcStride];\
00893 const int src7= src[7*srcStride];\
00894 const int src8= src[8*srcStride];\
00895 const int src9= src[9*srcStride];\
00896 const int src10= src[10*srcStride];\
00897 const int src11= src[11*srcStride];\
00898 const int src12= src[12*srcStride];\
00899 const int src13= src[13*srcStride];\
00900 const int src14= src[14*srcStride];\
00901 const int src15= src[15*srcStride];\
00902 const int src16= src[16*srcStride];\
00903 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
00904 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
00905 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
00906 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
00907 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
00908 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
00909 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
00910 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
00911 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
00912 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
00913 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
00914 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
00915 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
00916 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
00917 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
00918 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00919 dst++;\
00920 src++;\
00921 }\
00922 }\
00923 \
00924 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
00925 uint8_t half[64];\
00926 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00927 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
00928 }\
00929 \
00930 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
00931 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
00932 }\
00933 \
00934 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
00935 uint8_t half[64];\
00936 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00937 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
00938 }\
00939 \
00940 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
00941 uint8_t full[16*9];\
00942 uint8_t half[64];\
00943 copy_block9(full, src, 16, stride, 9);\
00944 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00945 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
00946 }\
00947 \
00948 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
00949 uint8_t full[16*9];\
00950 copy_block9(full, src, 16, stride, 9);\
00951 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
00952 }\
00953 \
00954 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
00955 uint8_t full[16*9];\
00956 uint8_t half[64];\
00957 copy_block9(full, src, 16, stride, 9);\
00958 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00959 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
00960 }\
00961 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
00962 uint8_t full[16*9];\
00963 uint8_t halfH[72];\
00964 uint8_t halfV[64];\
00965 uint8_t halfHV[64];\
00966 copy_block9(full, src, 16, stride, 9);\
00967 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00968 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00969 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00970 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00971 }\
00972 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
00973 uint8_t full[16*9];\
00974 uint8_t halfH[72];\
00975 uint8_t halfHV[64];\
00976 copy_block9(full, src, 16, stride, 9);\
00977 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00978 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
00979 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00980 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00981 }\
00982 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
00983 uint8_t full[16*9];\
00984 uint8_t halfH[72];\
00985 uint8_t halfV[64];\
00986 uint8_t halfHV[64];\
00987 copy_block9(full, src, 16, stride, 9);\
00988 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00989 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
00990 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00991 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00992 }\
00993 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
00994 uint8_t full[16*9];\
00995 uint8_t halfH[72];\
00996 uint8_t halfHV[64];\
00997 copy_block9(full, src, 16, stride, 9);\
00998 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00999 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01000 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01001 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01002 }\
01003 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01004 uint8_t full[16*9];\
01005 uint8_t halfH[72];\
01006 uint8_t halfV[64];\
01007 uint8_t halfHV[64];\
01008 copy_block9(full, src, 16, stride, 9);\
01009 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01010 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01011 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01012 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01013 }\
01014 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01015 uint8_t full[16*9];\
01016 uint8_t halfH[72];\
01017 uint8_t halfHV[64];\
01018 copy_block9(full, src, 16, stride, 9);\
01019 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01020 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01021 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01022 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01023 }\
01024 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01025 uint8_t full[16*9];\
01026 uint8_t halfH[72];\
01027 uint8_t halfV[64];\
01028 uint8_t halfHV[64];\
01029 copy_block9(full, src, 16, stride, 9);\
01030 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
01031 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01032 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01033 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01034 }\
01035 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01036 uint8_t full[16*9];\
01037 uint8_t halfH[72];\
01038 uint8_t halfHV[64];\
01039 copy_block9(full, src, 16, stride, 9);\
01040 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01041 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01042 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01043 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01044 }\
01045 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01046 uint8_t halfH[72];\
01047 uint8_t halfHV[64];\
01048 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01049 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01050 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01051 }\
01052 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01053 uint8_t halfH[72];\
01054 uint8_t halfHV[64];\
01055 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01056 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01057 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01058 }\
01059 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01060 uint8_t full[16*9];\
01061 uint8_t halfH[72];\
01062 uint8_t halfV[64];\
01063 uint8_t halfHV[64];\
01064 copy_block9(full, src, 16, stride, 9);\
01065 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01066 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01067 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01068 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01069 }\
01070 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01071 uint8_t full[16*9];\
01072 uint8_t halfH[72];\
01073 copy_block9(full, src, 16, stride, 9);\
01074 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01075 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01076 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01077 }\
01078 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01079 uint8_t full[16*9];\
01080 uint8_t halfH[72];\
01081 uint8_t halfV[64];\
01082 uint8_t halfHV[64];\
01083 copy_block9(full, src, 16, stride, 9);\
01084 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01085 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01086 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01087 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01088 }\
01089 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01090 uint8_t full[16*9];\
01091 uint8_t halfH[72];\
01092 copy_block9(full, src, 16, stride, 9);\
01093 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01094 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01095 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01096 }\
01097 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01098 uint8_t halfH[72];\
01099 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01100 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01101 }\
01102 \
01103 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01104 uint8_t half[256];\
01105 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01106 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
01107 }\
01108 \
01109 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01110 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01111 }\
01112 \
01113 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01114 uint8_t half[256];\
01115 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01116 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
01117 }\
01118 \
01119 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01120 uint8_t full[24*17];\
01121 uint8_t half[256];\
01122 copy_block17(full, src, 24, stride, 17);\
01123 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01124 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
01125 }\
01126 \
01127 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01128 uint8_t full[24*17];\
01129 copy_block17(full, src, 24, stride, 17);\
01130 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01131 }\
01132 \
01133 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01134 uint8_t full[24*17];\
01135 uint8_t half[256];\
01136 copy_block17(full, src, 24, stride, 17);\
01137 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01138 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
01139 }\
01140 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01141 uint8_t full[24*17];\
01142 uint8_t halfH[272];\
01143 uint8_t halfV[256];\
01144 uint8_t halfHV[256];\
01145 copy_block17(full, src, 24, stride, 17);\
01146 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01147 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01148 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01149 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01150 }\
01151 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01152 uint8_t full[24*17];\
01153 uint8_t halfH[272];\
01154 uint8_t halfHV[256];\
01155 copy_block17(full, src, 24, stride, 17);\
01156 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01157 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01158 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01159 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01160 }\
01161 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01162 uint8_t full[24*17];\
01163 uint8_t halfH[272];\
01164 uint8_t halfV[256];\
01165 uint8_t halfHV[256];\
01166 copy_block17(full, src, 24, stride, 17);\
01167 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01168 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01169 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01170 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01171 }\
01172 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01173 uint8_t full[24*17];\
01174 uint8_t halfH[272];\
01175 uint8_t halfHV[256];\
01176 copy_block17(full, src, 24, stride, 17);\
01177 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01178 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01179 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01180 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01181 }\
01182 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01183 uint8_t full[24*17];\
01184 uint8_t halfH[272];\
01185 uint8_t halfV[256];\
01186 uint8_t halfHV[256];\
01187 copy_block17(full, src, 24, stride, 17);\
01188 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01189 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01190 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01191 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01192 }\
01193 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01194 uint8_t full[24*17];\
01195 uint8_t halfH[272];\
01196 uint8_t halfHV[256];\
01197 copy_block17(full, src, 24, stride, 17);\
01198 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01199 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01200 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01201 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01202 }\
01203 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01204 uint8_t full[24*17];\
01205 uint8_t halfH[272];\
01206 uint8_t halfV[256];\
01207 uint8_t halfHV[256];\
01208 copy_block17(full, src, 24, stride, 17);\
01209 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
01210 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01211 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01212 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01213 }\
01214 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01215 uint8_t full[24*17];\
01216 uint8_t halfH[272];\
01217 uint8_t halfHV[256];\
01218 copy_block17(full, src, 24, stride, 17);\
01219 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01220 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01221 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01222 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01223 }\
01224 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01225 uint8_t halfH[272];\
01226 uint8_t halfHV[256];\
01227 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01228 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01229 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01230 }\
01231 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01232 uint8_t halfH[272];\
01233 uint8_t halfHV[256];\
01234 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01235 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01236 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01237 }\
01238 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01239 uint8_t full[24*17];\
01240 uint8_t halfH[272];\
01241 uint8_t halfV[256];\
01242 uint8_t halfHV[256];\
01243 copy_block17(full, src, 24, stride, 17);\
01244 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01245 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01246 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01247 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01248 }\
01249 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01250 uint8_t full[24*17];\
01251 uint8_t halfH[272];\
01252 copy_block17(full, src, 24, stride, 17);\
01253 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01254 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01255 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01256 }\
01257 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01258 uint8_t full[24*17];\
01259 uint8_t halfH[272];\
01260 uint8_t halfV[256];\
01261 uint8_t halfHV[256];\
01262 copy_block17(full, src, 24, stride, 17);\
01263 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01264 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01265 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01266 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01267 }\
01268 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01269 uint8_t full[24*17];\
01270 uint8_t halfH[272];\
01271 copy_block17(full, src, 24, stride, 17);\
01272 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01273 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01274 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01275 }\
01276 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01277 uint8_t halfH[272];\
01278 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01279 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01280 }
01281
01282 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01283 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
01284 #define op_put(a, b) a = cm[((b) + 16)>>5]
01285 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
01286
01287 QPEL_MC(0, put_ , _ , op_put)
01288 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
01289 QPEL_MC(0, avg_ , _ , op_avg)
01290
01291 #undef op_avg
01292 #undef op_avg_no_rnd
01293 #undef op_put
01294 #undef op_put_no_rnd
01295
01296 #define put_qpel8_mc00_c ff_put_pixels8x8_c
01297 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
01298 #define put_qpel16_mc00_c ff_put_pixels16x16_c
01299 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
01300 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
01301 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
01302
01303 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
01304 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01305 int i;
01306
01307 for(i=0; i<h; i++){
01308 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
01309 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
01310 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
01311 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
01312 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
01313 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
01314 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
01315 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
01316 dst+=dstStride;
01317 src+=srcStride;
01318 }
01319 }
01320
01321 #if CONFIG_RV40_DECODER
01322 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01323 put_pixels16_xy2_8_c(dst, src, stride, 16);
01324 }
01325 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01326 avg_pixels16_xy2_8_c(dst, src, stride, 16);
01327 }
01328 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01329 put_pixels8_xy2_8_c(dst, src, stride, 8);
01330 }
01331 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01332 avg_pixels8_xy2_8_c(dst, src, stride, 8);
01333 }
01334 #endif
01335
01336 #if CONFIG_DIRAC_DECODER
01337 #define DIRAC_MC(OPNAME)\
01338 void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01339 {\
01340 OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
01341 }\
01342 void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01343 {\
01344 OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
01345 }\
01346 void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01347 {\
01348 OPNAME ## _pixels16_8_c(dst , src[0] , stride, h);\
01349 OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
01350 }\
01351 void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01352 {\
01353 OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
01354 }\
01355 void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01356 {\
01357 OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
01358 }\
01359 void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01360 {\
01361 OPNAME ## _pixels16_l2_8(dst , src[0] , src[1] , stride, stride, stride, h);\
01362 OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
01363 }\
01364 void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01365 {\
01366 OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
01367 }\
01368 void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01369 {\
01370 OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
01371 }\
01372 void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01373 {\
01374 OPNAME ## _pixels16_l4_8(dst , src[0] , src[1] , src[2] , src[3] , stride, stride, stride, stride, stride, h);\
01375 OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
01376 }
01377 DIRAC_MC(put)
01378 DIRAC_MC(avg)
01379 #endif
01380
01381 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
01382 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01383 int i;
01384
01385 for(i=0; i<w; i++){
01386 const int src_1= src[ -srcStride];
01387 const int src0 = src[0 ];
01388 const int src1 = src[ srcStride];
01389 const int src2 = src[2*srcStride];
01390 const int src3 = src[3*srcStride];
01391 const int src4 = src[4*srcStride];
01392 const int src5 = src[5*srcStride];
01393 const int src6 = src[6*srcStride];
01394 const int src7 = src[7*srcStride];
01395 const int src8 = src[8*srcStride];
01396 const int src9 = src[9*srcStride];
01397 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
01398 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
01399 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
01400 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
01401 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
01402 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
01403 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
01404 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
01405 src++;
01406 dst++;
01407 }
01408 }
01409
01410 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
01411 uint8_t half[64];
01412 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01413 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
01414 }
01415
01416 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
01417 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
01418 }
01419
01420 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
01421 uint8_t half[64];
01422 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01423 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
01424 }
01425
01426 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
01427 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
01428 }
01429
01430 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
01431 uint8_t halfH[88];
01432 uint8_t halfV[64];
01433 uint8_t halfHV[64];
01434 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01435 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
01436 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01437 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01438 }
01439 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
01440 uint8_t halfH[88];
01441 uint8_t halfV[64];
01442 uint8_t halfHV[64];
01443 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01444 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
01445 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01446 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01447 }
01448 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
01449 uint8_t halfH[88];
01450 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01451 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
01452 }
01453
01454 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
01455 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01456 int x;
01457 const int strength= ff_h263_loop_filter_strength[qscale];
01458
01459 for(x=0; x<8; x++){
01460 int d1, d2, ad1;
01461 int p0= src[x-2*stride];
01462 int p1= src[x-1*stride];
01463 int p2= src[x+0*stride];
01464 int p3= src[x+1*stride];
01465 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01466
01467 if (d<-2*strength) d1= 0;
01468 else if(d<- strength) d1=-2*strength - d;
01469 else if(d< strength) d1= d;
01470 else if(d< 2*strength) d1= 2*strength - d;
01471 else d1= 0;
01472
01473 p1 += d1;
01474 p2 -= d1;
01475 if(p1&256) p1= ~(p1>>31);
01476 if(p2&256) p2= ~(p2>>31);
01477
01478 src[x-1*stride] = p1;
01479 src[x+0*stride] = p2;
01480
01481 ad1= FFABS(d1)>>1;
01482
01483 d2= av_clip((p0-p3)/4, -ad1, ad1);
01484
01485 src[x-2*stride] = p0 - d2;
01486 src[x+ stride] = p3 + d2;
01487 }
01488 }
01489 }
01490
01491 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
01492 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01493 int y;
01494 const int strength= ff_h263_loop_filter_strength[qscale];
01495
01496 for(y=0; y<8; y++){
01497 int d1, d2, ad1;
01498 int p0= src[y*stride-2];
01499 int p1= src[y*stride-1];
01500 int p2= src[y*stride+0];
01501 int p3= src[y*stride+1];
01502 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01503
01504 if (d<-2*strength) d1= 0;
01505 else if(d<- strength) d1=-2*strength - d;
01506 else if(d< strength) d1= d;
01507 else if(d< 2*strength) d1= 2*strength - d;
01508 else d1= 0;
01509
01510 p1 += d1;
01511 p2 -= d1;
01512 if(p1&256) p1= ~(p1>>31);
01513 if(p2&256) p2= ~(p2>>31);
01514
01515 src[y*stride-1] = p1;
01516 src[y*stride+0] = p2;
01517
01518 ad1= FFABS(d1)>>1;
01519
01520 d2= av_clip((p0-p3)/4, -ad1, ad1);
01521
01522 src[y*stride-2] = p0 - d2;
01523 src[y*stride+1] = p3 + d2;
01524 }
01525 }
01526 }
01527
01528 static void h261_loop_filter_c(uint8_t *src, int stride){
01529 int x,y,xy,yz;
01530 int temp[64];
01531
01532 for(x=0; x<8; x++){
01533 temp[x ] = 4*src[x ];
01534 temp[x + 7*8] = 4*src[x + 7*stride];
01535 }
01536 for(y=1; y<7; y++){
01537 for(x=0; x<8; x++){
01538 xy = y * stride + x;
01539 yz = y * 8 + x;
01540 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
01541 }
01542 }
01543
01544 for(y=0; y<8; y++){
01545 src[ y*stride] = (temp[ y*8] + 2)>>2;
01546 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
01547 for(x=1; x<7; x++){
01548 xy = y * stride + x;
01549 yz = y * 8 + x;
01550 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
01551 }
01552 }
01553 }
01554
01555 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01556 {
01557 int s, i;
01558
01559 s = 0;
01560 for(i=0;i<h;i++) {
01561 s += abs(pix1[0] - pix2[0]);
01562 s += abs(pix1[1] - pix2[1]);
01563 s += abs(pix1[2] - pix2[2]);
01564 s += abs(pix1[3] - pix2[3]);
01565 s += abs(pix1[4] - pix2[4]);
01566 s += abs(pix1[5] - pix2[5]);
01567 s += abs(pix1[6] - pix2[6]);
01568 s += abs(pix1[7] - pix2[7]);
01569 s += abs(pix1[8] - pix2[8]);
01570 s += abs(pix1[9] - pix2[9]);
01571 s += abs(pix1[10] - pix2[10]);
01572 s += abs(pix1[11] - pix2[11]);
01573 s += abs(pix1[12] - pix2[12]);
01574 s += abs(pix1[13] - pix2[13]);
01575 s += abs(pix1[14] - pix2[14]);
01576 s += abs(pix1[15] - pix2[15]);
01577 pix1 += line_size;
01578 pix2 += line_size;
01579 }
01580 return s;
01581 }
01582
01583 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01584 {
01585 int s, i;
01586
01587 s = 0;
01588 for(i=0;i<h;i++) {
01589 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01590 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01591 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01592 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01593 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01594 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01595 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01596 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01597 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
01598 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
01599 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
01600 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
01601 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
01602 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
01603 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
01604 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
01605 pix1 += line_size;
01606 pix2 += line_size;
01607 }
01608 return s;
01609 }
01610
01611 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01612 {
01613 int s, i;
01614 uint8_t *pix3 = pix2 + line_size;
01615
01616 s = 0;
01617 for(i=0;i<h;i++) {
01618 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01619 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01620 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01621 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01622 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01623 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01624 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01625 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01626 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
01627 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
01628 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
01629 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
01630 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
01631 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
01632 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
01633 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
01634 pix1 += line_size;
01635 pix2 += line_size;
01636 pix3 += line_size;
01637 }
01638 return s;
01639 }
01640
01641 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01642 {
01643 int s, i;
01644 uint8_t *pix3 = pix2 + line_size;
01645
01646 s = 0;
01647 for(i=0;i<h;i++) {
01648 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01649 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01650 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01651 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01652 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01653 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01654 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01655 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01656 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
01657 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
01658 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
01659 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
01660 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
01661 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
01662 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
01663 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
01664 pix1 += line_size;
01665 pix2 += line_size;
01666 pix3 += line_size;
01667 }
01668 return s;
01669 }
01670
01671 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01672 {
01673 int s, i;
01674
01675 s = 0;
01676 for(i=0;i<h;i++) {
01677 s += abs(pix1[0] - pix2[0]);
01678 s += abs(pix1[1] - pix2[1]);
01679 s += abs(pix1[2] - pix2[2]);
01680 s += abs(pix1[3] - pix2[3]);
01681 s += abs(pix1[4] - pix2[4]);
01682 s += abs(pix1[5] - pix2[5]);
01683 s += abs(pix1[6] - pix2[6]);
01684 s += abs(pix1[7] - pix2[7]);
01685 pix1 += line_size;
01686 pix2 += line_size;
01687 }
01688 return s;
01689 }
01690
01691 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01692 {
01693 int s, i;
01694
01695 s = 0;
01696 for(i=0;i<h;i++) {
01697 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01698 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01699 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01700 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01701 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01702 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01703 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01704 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01705 pix1 += line_size;
01706 pix2 += line_size;
01707 }
01708 return s;
01709 }
01710
01711 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01712 {
01713 int s, i;
01714 uint8_t *pix3 = pix2 + line_size;
01715
01716 s = 0;
01717 for(i=0;i<h;i++) {
01718 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01719 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01720 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01721 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01722 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01723 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01724 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01725 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01726 pix1 += line_size;
01727 pix2 += line_size;
01728 pix3 += line_size;
01729 }
01730 return s;
01731 }
01732
01733 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01734 {
01735 int s, i;
01736 uint8_t *pix3 = pix2 + line_size;
01737
01738 s = 0;
01739 for(i=0;i<h;i++) {
01740 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01741 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01742 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01743 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01744 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01745 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01746 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01747 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01748 pix1 += line_size;
01749 pix2 += line_size;
01750 pix3 += line_size;
01751 }
01752 return s;
01753 }
01754
01755 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01756 MpegEncContext *c = v;
01757 int score1=0;
01758 int score2=0;
01759 int x,y;
01760
01761 for(y=0; y<h; y++){
01762 for(x=0; x<16; x++){
01763 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
01764 }
01765 if(y+1<h){
01766 for(x=0; x<15; x++){
01767 score2+= FFABS( s1[x ] - s1[x +stride]
01768 - s1[x+1] + s1[x+1+stride])
01769 -FFABS( s2[x ] - s2[x +stride]
01770 - s2[x+1] + s2[x+1+stride]);
01771 }
01772 }
01773 s1+= stride;
01774 s2+= stride;
01775 }
01776
01777 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01778 else return score1 + FFABS(score2)*8;
01779 }
01780
01781 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01782 MpegEncContext *c = v;
01783 int score1=0;
01784 int score2=0;
01785 int x,y;
01786
01787 for(y=0; y<h; y++){
01788 for(x=0; x<8; x++){
01789 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
01790 }
01791 if(y+1<h){
01792 for(x=0; x<7; x++){
01793 score2+= FFABS( s1[x ] - s1[x +stride]
01794 - s1[x+1] + s1[x+1+stride])
01795 -FFABS( s2[x ] - s2[x +stride]
01796 - s2[x+1] + s2[x+1+stride]);
01797 }
01798 }
01799 s1+= stride;
01800 s2+= stride;
01801 }
01802
01803 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01804 else return score1 + FFABS(score2)*8;
01805 }
01806
01807 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
01808 int i;
01809 unsigned int sum=0;
01810
01811 for(i=0; i<8*8; i++){
01812 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
01813 int w= weight[i];
01814 b>>= RECON_SHIFT;
01815 assert(-512<b && b<512);
01816
01817 sum += (w*b)*(w*b)>>4;
01818 }
01819 return sum>>2;
01820 }
01821
01822 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
01823 int i;
01824
01825 for(i=0; i<8*8; i++){
01826 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
01827 }
01828 }
01829
01838 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
01839 {
01840 int i;
01841 DCTELEM temp[64];
01842
01843 if(last<=0) return;
01844
01845
01846 for(i=0; i<=last; i++){
01847 const int j= scantable[i];
01848 temp[j]= block[j];
01849 block[j]=0;
01850 }
01851
01852 for(i=0; i<=last; i++){
01853 const int j= scantable[i];
01854 const int perm_j= permutation[j];
01855 block[perm_j]= temp[j];
01856 }
01857 }
01858
01859 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
01860 return 0;
01861 }
01862
01863 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
01864 int i;
01865
01866 memset(cmp, 0, sizeof(void*)*6);
01867
01868 for(i=0; i<6; i++){
01869 switch(type&0xFF){
01870 case FF_CMP_SAD:
01871 cmp[i]= c->sad[i];
01872 break;
01873 case FF_CMP_SATD:
01874 cmp[i]= c->hadamard8_diff[i];
01875 break;
01876 case FF_CMP_SSE:
01877 cmp[i]= c->sse[i];
01878 break;
01879 case FF_CMP_DCT:
01880 cmp[i]= c->dct_sad[i];
01881 break;
01882 case FF_CMP_DCT264:
01883 cmp[i]= c->dct264_sad[i];
01884 break;
01885 case FF_CMP_DCTMAX:
01886 cmp[i]= c->dct_max[i];
01887 break;
01888 case FF_CMP_PSNR:
01889 cmp[i]= c->quant_psnr[i];
01890 break;
01891 case FF_CMP_BIT:
01892 cmp[i]= c->bit[i];
01893 break;
01894 case FF_CMP_RD:
01895 cmp[i]= c->rd[i];
01896 break;
01897 case FF_CMP_VSAD:
01898 cmp[i]= c->vsad[i];
01899 break;
01900 case FF_CMP_VSSE:
01901 cmp[i]= c->vsse[i];
01902 break;
01903 case FF_CMP_ZERO:
01904 cmp[i]= zero_cmp;
01905 break;
01906 case FF_CMP_NSSE:
01907 cmp[i]= c->nsse[i];
01908 break;
01909 #if CONFIG_DWT
01910 case FF_CMP_W53:
01911 cmp[i]= c->w53[i];
01912 break;
01913 case FF_CMP_W97:
01914 cmp[i]= c->w97[i];
01915 break;
01916 #endif
01917 default:
01918 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
01919 }
01920 }
01921 }
01922
01923 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
01924 long i;
01925 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01926 long a = *(long*)(src+i);
01927 long b = *(long*)(dst+i);
01928 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
01929 }
01930 for(; i<w; i++)
01931 dst[i+0] += src[i+0];
01932 }
01933
01934 static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w){
01935 long i;
01936 #if !HAVE_FAST_UNALIGNED
01937 if((long)src2 & (sizeof(long)-1)){
01938 for(i=0; i+7<w; i+=8){
01939 dst[i+0] = src1[i+0]-src2[i+0];
01940 dst[i+1] = src1[i+1]-src2[i+1];
01941 dst[i+2] = src1[i+2]-src2[i+2];
01942 dst[i+3] = src1[i+3]-src2[i+3];
01943 dst[i+4] = src1[i+4]-src2[i+4];
01944 dst[i+5] = src1[i+5]-src2[i+5];
01945 dst[i+6] = src1[i+6]-src2[i+6];
01946 dst[i+7] = src1[i+7]-src2[i+7];
01947 }
01948 }else
01949 #endif
01950 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01951 long a = *(long*)(src1+i);
01952 long b = *(long*)(src2+i);
01953 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
01954 }
01955 for(; i<w; i++)
01956 dst[i+0] = src1[i+0]-src2[i+0];
01957 }
01958
01959 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
01960 int i;
01961 uint8_t l, lt;
01962
01963 l= *left;
01964 lt= *left_top;
01965
01966 for(i=0; i<w; i++){
01967 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
01968 lt= src1[i];
01969 dst[i]= l;
01970 }
01971
01972 *left= l;
01973 *left_top= lt;
01974 }
01975
01976 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
01977 int i;
01978 uint8_t l, lt;
01979
01980 l= *left;
01981 lt= *left_top;
01982
01983 for(i=0; i<w; i++){
01984 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
01985 lt= src1[i];
01986 l= src2[i];
01987 dst[i]= l - pred;
01988 }
01989
01990 *left= l;
01991 *left_top= lt;
01992 }
01993
01994 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
01995 int i;
01996
01997 for(i=0; i<w-1; i++){
01998 acc+= src[i];
01999 dst[i]= acc;
02000 i++;
02001 acc+= src[i];
02002 dst[i]= acc;
02003 }
02004
02005 for(; i<w; i++){
02006 acc+= src[i];
02007 dst[i]= acc;
02008 }
02009
02010 return acc;
02011 }
02012
02013 #if HAVE_BIGENDIAN
02014 #define B 3
02015 #define G 2
02016 #define R 1
02017 #define A 0
02018 #else
02019 #define B 0
02020 #define G 1
02021 #define R 2
02022 #define A 3
02023 #endif
02024 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
02025 int i;
02026 int r,g,b,a;
02027 r= *red;
02028 g= *green;
02029 b= *blue;
02030 a= *alpha;
02031
02032 for(i=0; i<w; i++){
02033 b+= src[4*i+B];
02034 g+= src[4*i+G];
02035 r+= src[4*i+R];
02036 a+= src[4*i+A];
02037
02038 dst[4*i+B]= b;
02039 dst[4*i+G]= g;
02040 dst[4*i+R]= r;
02041 dst[4*i+A]= a;
02042 }
02043
02044 *red= r;
02045 *green= g;
02046 *blue= b;
02047 *alpha= a;
02048 }
02049 #undef B
02050 #undef G
02051 #undef R
02052 #undef A
02053
02054 #define BUTTERFLY2(o1,o2,i1,i2) \
02055 o1= (i1)+(i2);\
02056 o2= (i1)-(i2);
02057
02058 #define BUTTERFLY1(x,y) \
02059 {\
02060 int a,b;\
02061 a= x;\
02062 b= y;\
02063 x= a+b;\
02064 y= a-b;\
02065 }
02066
02067 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
02068
02069 static int hadamard8_diff8x8_c( void *s, uint8_t *dst, uint8_t *src, int stride, int h){
02070 int i;
02071 int temp[64];
02072 int sum=0;
02073
02074 av_assert2(h==8);
02075
02076 for(i=0; i<8; i++){
02077
02078 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
02079 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
02080 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
02081 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
02082
02083 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02084 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02085 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02086 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02087
02088 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02089 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02090 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02091 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02092 }
02093
02094 for(i=0; i<8; i++){
02095 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02096 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02097 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02098 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02099
02100 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02101 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02102 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02103 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02104
02105 sum +=
02106 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02107 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02108 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02109 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02110 }
02111 return sum;
02112 }
02113
02114 static int hadamard8_intra8x8_c( void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
02115 int i;
02116 int temp[64];
02117 int sum=0;
02118
02119 av_assert2(h==8);
02120
02121 for(i=0; i<8; i++){
02122
02123 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
02124 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
02125 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
02126 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
02127
02128 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02129 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02130 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02131 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02132
02133 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02134 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02135 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02136 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02137 }
02138
02139 for(i=0; i<8; i++){
02140 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02141 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02142 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02143 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02144
02145 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02146 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02147 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02148 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02149
02150 sum +=
02151 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02152 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02153 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02154 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02155 }
02156
02157 sum -= FFABS(temp[8*0] + temp[8*4]);
02158
02159 return sum;
02160 }
02161
02162 static int dct_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02163 MpegEncContext * const s= (MpegEncContext *)c;
02164 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02165
02166 av_assert2(h==8);
02167
02168 s->dsp.diff_pixels(temp, src1, src2, stride);
02169 s->dsp.fdct(temp);
02170 return s->dsp.sum_abs_dctelem(temp);
02171 }
02172
02173 #if CONFIG_GPL
02174 #define DCT8_1D {\
02175 const int s07 = SRC(0) + SRC(7);\
02176 const int s16 = SRC(1) + SRC(6);\
02177 const int s25 = SRC(2) + SRC(5);\
02178 const int s34 = SRC(3) + SRC(4);\
02179 const int a0 = s07 + s34;\
02180 const int a1 = s16 + s25;\
02181 const int a2 = s07 - s34;\
02182 const int a3 = s16 - s25;\
02183 const int d07 = SRC(0) - SRC(7);\
02184 const int d16 = SRC(1) - SRC(6);\
02185 const int d25 = SRC(2) - SRC(5);\
02186 const int d34 = SRC(3) - SRC(4);\
02187 const int a4 = d16 + d25 + (d07 + (d07>>1));\
02188 const int a5 = d07 - d34 - (d25 + (d25>>1));\
02189 const int a6 = d07 + d34 - (d16 + (d16>>1));\
02190 const int a7 = d16 - d25 + (d34 + (d34>>1));\
02191 DST(0, a0 + a1 ) ;\
02192 DST(1, a4 + (a7>>2)) ;\
02193 DST(2, a2 + (a3>>1)) ;\
02194 DST(3, a5 + (a6>>2)) ;\
02195 DST(4, a0 - a1 ) ;\
02196 DST(5, a6 - (a5>>2)) ;\
02197 DST(6, (a2>>1) - a3 ) ;\
02198 DST(7, (a4>>2) - a7 ) ;\
02199 }
02200
02201 static int dct264_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02202 MpegEncContext * const s= (MpegEncContext *)c;
02203 DCTELEM dct[8][8];
02204 int i;
02205 int sum=0;
02206
02207 s->dsp.diff_pixels(dct[0], src1, src2, stride);
02208
02209 #define SRC(x) dct[i][x]
02210 #define DST(x,v) dct[i][x]= v
02211 for( i = 0; i < 8; i++ )
02212 DCT8_1D
02213 #undef SRC
02214 #undef DST
02215
02216 #define SRC(x) dct[x][i]
02217 #define DST(x,v) sum += FFABS(v)
02218 for( i = 0; i < 8; i++ )
02219 DCT8_1D
02220 #undef SRC
02221 #undef DST
02222 return sum;
02223 }
02224 #endif
02225
02226 static int dct_max8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02227 MpegEncContext * const s= (MpegEncContext *)c;
02228 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02229 int sum=0, i;
02230
02231 av_assert2(h==8);
02232
02233 s->dsp.diff_pixels(temp, src1, src2, stride);
02234 s->dsp.fdct(temp);
02235
02236 for(i=0; i<64; i++)
02237 sum= FFMAX(sum, FFABS(temp[i]));
02238
02239 return sum;
02240 }
02241
02242 static int quant_psnr8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02243 MpegEncContext * const s= (MpegEncContext *)c;
02244 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
02245 DCTELEM * const bak = temp+64;
02246 int sum=0, i;
02247
02248 av_assert2(h==8);
02249 s->mb_intra=0;
02250
02251 s->dsp.diff_pixels(temp, src1, src2, stride);
02252
02253 memcpy(bak, temp, 64*sizeof(DCTELEM));
02254
02255 s->block_last_index[0]= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02256 s->dct_unquantize_inter(s, temp, 0, s->qscale);
02257 ff_simple_idct_8(temp);
02258
02259 for(i=0; i<64; i++)
02260 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
02261
02262 return sum;
02263 }
02264
02265 static int rd8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02266 MpegEncContext * const s= (MpegEncContext *)c;
02267 const uint8_t *scantable= s->intra_scantable.permutated;
02268 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02269 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
02270 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
02271 int i, last, run, bits, level, distortion, start_i;
02272 const int esc_length= s->ac_esc_length;
02273 uint8_t * length;
02274 uint8_t * last_length;
02275
02276 av_assert2(h==8);
02277
02278 copy_block8(lsrc1, src1, 8, stride, 8);
02279 copy_block8(lsrc2, src2, 8, stride, 8);
02280
02281 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
02282
02283 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02284
02285 bits=0;
02286
02287 if (s->mb_intra) {
02288 start_i = 1;
02289 length = s->intra_ac_vlc_length;
02290 last_length= s->intra_ac_vlc_last_length;
02291 bits+= s->luma_dc_vlc_length[temp[0] + 256];
02292 } else {
02293 start_i = 0;
02294 length = s->inter_ac_vlc_length;
02295 last_length= s->inter_ac_vlc_last_length;
02296 }
02297
02298 if(last>=start_i){
02299 run=0;
02300 for(i=start_i; i<last; i++){
02301 int j= scantable[i];
02302 level= temp[j];
02303
02304 if(level){
02305 level+=64;
02306 if((level&(~127)) == 0){
02307 bits+= length[UNI_AC_ENC_INDEX(run, level)];
02308 }else
02309 bits+= esc_length;
02310 run=0;
02311 }else
02312 run++;
02313 }
02314 i= scantable[last];
02315
02316 level= temp[i] + 64;
02317
02318 assert(level - 64);
02319
02320 if((level&(~127)) == 0){
02321 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02322 }else
02323 bits+= esc_length;
02324
02325 }
02326
02327 if(last>=0){
02328 if(s->mb_intra)
02329 s->dct_unquantize_intra(s, temp, 0, s->qscale);
02330 else
02331 s->dct_unquantize_inter(s, temp, 0, s->qscale);
02332 }
02333
02334 s->dsp.idct_add(lsrc2, 8, temp);
02335
02336 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
02337
02338 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
02339 }
02340
02341 static int bit8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02342 MpegEncContext * const s= (MpegEncContext *)c;
02343 const uint8_t *scantable= s->intra_scantable.permutated;
02344 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02345 int i, last, run, bits, level, start_i;
02346 const int esc_length= s->ac_esc_length;
02347 uint8_t * length;
02348 uint8_t * last_length;
02349
02350 av_assert2(h==8);
02351
02352 s->dsp.diff_pixels(temp, src1, src2, stride);
02353
02354 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02355
02356 bits=0;
02357
02358 if (s->mb_intra) {
02359 start_i = 1;
02360 length = s->intra_ac_vlc_length;
02361 last_length= s->intra_ac_vlc_last_length;
02362 bits+= s->luma_dc_vlc_length[temp[0] + 256];
02363 } else {
02364 start_i = 0;
02365 length = s->inter_ac_vlc_length;
02366 last_length= s->inter_ac_vlc_last_length;
02367 }
02368
02369 if(last>=start_i){
02370 run=0;
02371 for(i=start_i; i<last; i++){
02372 int j= scantable[i];
02373 level= temp[j];
02374
02375 if(level){
02376 level+=64;
02377 if((level&(~127)) == 0){
02378 bits+= length[UNI_AC_ENC_INDEX(run, level)];
02379 }else
02380 bits+= esc_length;
02381 run=0;
02382 }else
02383 run++;
02384 }
02385 i= scantable[last];
02386
02387 level= temp[i] + 64;
02388
02389 assert(level - 64);
02390
02391 if((level&(~127)) == 0){
02392 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02393 }else
02394 bits+= esc_length;
02395 }
02396
02397 return bits;
02398 }
02399
02400 #define VSAD_INTRA(size) \
02401 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02402 int score=0; \
02403 int x,y; \
02404 \
02405 for(y=1; y<h; y++){ \
02406 for(x=0; x<size; x+=4){ \
02407 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
02408 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
02409 } \
02410 s+= stride; \
02411 } \
02412 \
02413 return score; \
02414 }
02415 VSAD_INTRA(8)
02416 VSAD_INTRA(16)
02417
02418 static int vsad16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02419 int score=0;
02420 int x,y;
02421
02422 for(y=1; y<h; y++){
02423 for(x=0; x<16; x++){
02424 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
02425 }
02426 s1+= stride;
02427 s2+= stride;
02428 }
02429
02430 return score;
02431 }
02432
02433 #define SQ(a) ((a)*(a))
02434 #define VSSE_INTRA(size) \
02435 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02436 int score=0; \
02437 int x,y; \
02438 \
02439 for(y=1; y<h; y++){ \
02440 for(x=0; x<size; x+=4){ \
02441 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
02442 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
02443 } \
02444 s+= stride; \
02445 } \
02446 \
02447 return score; \
02448 }
02449 VSSE_INTRA(8)
02450 VSSE_INTRA(16)
02451
02452 static int vsse16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02453 int score=0;
02454 int x,y;
02455
02456 for(y=1; y<h; y++){
02457 for(x=0; x<16; x++){
02458 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
02459 }
02460 s1+= stride;
02461 s2+= stride;
02462 }
02463
02464 return score;
02465 }
02466
02467 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
02468 int size){
02469 int score=0;
02470 int i;
02471 for(i=0; i<size; i++)
02472 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
02473 return score;
02474 }
02475
02476 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
02477 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
02478 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
02479 #if CONFIG_GPL
02480 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
02481 #endif
02482 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
02483 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
02484 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
02485 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
02486
02487 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
02488 int i;
02489 src1 += len-1;
02490 for(i=0; i<len; i++)
02491 dst[i] = src0[i] * src1[-i];
02492 }
02493
02494 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
02495 int i;
02496 for(i=0; i<len; i++)
02497 dst[i] = src0[i] * src1[i] + src2[i];
02498 }
02499
02500 static void vector_fmul_window_c(float *dst, const float *src0,
02501 const float *src1, const float *win, int len)
02502 {
02503 int i,j;
02504 dst += len;
02505 win += len;
02506 src0+= len;
02507 for(i=-len, j=len-1; i<0; i++, j--) {
02508 float s0 = src0[i];
02509 float s1 = src1[j];
02510 float wi = win[i];
02511 float wj = win[j];
02512 dst[i] = s0*wj - s1*wi;
02513 dst[j] = s0*wi + s1*wj;
02514 }
02515 }
02516
02517 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
02518 int len)
02519 {
02520 int i;
02521 for (i = 0; i < len; i++)
02522 dst[i] = src[i] * mul;
02523 }
02524
02525 static void butterflies_float_c(float *av_restrict v1, float *av_restrict v2,
02526 int len)
02527 {
02528 int i;
02529 for (i = 0; i < len; i++) {
02530 float t = v1[i] - v2[i];
02531 v1[i] += v2[i];
02532 v2[i] = t;
02533 }
02534 }
02535
02536 static void butterflies_float_interleave_c(float *dst, const float *src0,
02537 const float *src1, int len)
02538 {
02539 int i;
02540 for (i = 0; i < len; i++) {
02541 float f1 = src0[i];
02542 float f2 = src1[i];
02543 dst[2*i ] = f1 + f2;
02544 dst[2*i + 1] = f1 - f2;
02545 }
02546 }
02547
02548 float ff_scalarproduct_float_c(const float *v1, const float *v2, int len)
02549 {
02550 float p = 0.0;
02551 int i;
02552
02553 for (i = 0; i < len; i++)
02554 p += v1[i] * v2[i];
02555
02556 return p;
02557 }
02558
02559 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
02560 uint32_t maxi, uint32_t maxisign)
02561 {
02562
02563 if(a > mini) return mini;
02564 else if((a^(1U<<31)) > maxisign) return maxi;
02565 else return a;
02566 }
02567
02568 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
02569 int i;
02570 uint32_t mini = *(uint32_t*)min;
02571 uint32_t maxi = *(uint32_t*)max;
02572 uint32_t maxisign = maxi ^ (1U<<31);
02573 uint32_t *dsti = (uint32_t*)dst;
02574 const uint32_t *srci = (const uint32_t*)src;
02575 for(i=0; i<len; i+=8) {
02576 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
02577 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
02578 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
02579 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
02580 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
02581 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
02582 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
02583 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
02584 }
02585 }
02586 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
02587 int i;
02588 if(min < 0 && max > 0) {
02589 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
02590 } else {
02591 for(i=0; i < len; i+=8) {
02592 dst[i ] = av_clipf(src[i ], min, max);
02593 dst[i + 1] = av_clipf(src[i + 1], min, max);
02594 dst[i + 2] = av_clipf(src[i + 2], min, max);
02595 dst[i + 3] = av_clipf(src[i + 3], min, max);
02596 dst[i + 4] = av_clipf(src[i + 4], min, max);
02597 dst[i + 5] = av_clipf(src[i + 5], min, max);
02598 dst[i + 6] = av_clipf(src[i + 6], min, max);
02599 dst[i + 7] = av_clipf(src[i + 7], min, max);
02600 }
02601 }
02602 }
02603
02604 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
02605 {
02606 int res = 0;
02607
02608 while (order--)
02609 res += *v1++ * *v2++;
02610
02611 return res;
02612 }
02613
02614 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
02615 {
02616 int res = 0;
02617 while (order--) {
02618 res += *v1 * *v2++;
02619 *v1++ += mul * *v3++;
02620 }
02621 return res;
02622 }
02623
02624 static void apply_window_int16_c(int16_t *output, const int16_t *input,
02625 const int16_t *window, unsigned int len)
02626 {
02627 int i;
02628 int len2 = len >> 1;
02629
02630 for (i = 0; i < len2; i++) {
02631 int16_t w = window[i];
02632 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
02633 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
02634 }
02635 }
02636
02637 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
02638 int32_t max, unsigned int len)
02639 {
02640 do {
02641 *dst++ = av_clip(*src++, min, max);
02642 *dst++ = av_clip(*src++, min, max);
02643 *dst++ = av_clip(*src++, min, max);
02644 *dst++ = av_clip(*src++, min, max);
02645 *dst++ = av_clip(*src++, min, max);
02646 *dst++ = av_clip(*src++, min, max);
02647 *dst++ = av_clip(*src++, min, max);
02648 *dst++ = av_clip(*src++, min, max);
02649 len -= 8;
02650 } while (len > 0);
02651 }
02652
02653 #define W0 2048
02654 #define W1 2841
02655 #define W2 2676
02656 #define W3 2408
02657 #define W4 2048
02658 #define W5 1609
02659 #define W6 1108
02660 #define W7 565
02661
02662 static void wmv2_idct_row(short * b)
02663 {
02664 int s1,s2;
02665 int a0,a1,a2,a3,a4,a5,a6,a7;
02666
02667 a1 = W1*b[1]+W7*b[7];
02668 a7 = W7*b[1]-W1*b[7];
02669 a5 = W5*b[5]+W3*b[3];
02670 a3 = W3*b[5]-W5*b[3];
02671 a2 = W2*b[2]+W6*b[6];
02672 a6 = W6*b[2]-W2*b[6];
02673 a0 = W0*b[0]+W0*b[4];
02674 a4 = W0*b[0]-W0*b[4];
02675
02676 s1 = (181*(a1-a5+a7-a3)+128)>>8;
02677 s2 = (181*(a1-a5-a7+a3)+128)>>8;
02678
02679 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
02680 b[1] = (a4+a6 +s1 + (1<<7))>>8;
02681 b[2] = (a4-a6 +s2 + (1<<7))>>8;
02682 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
02683 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
02684 b[5] = (a4-a6 -s2 + (1<<7))>>8;
02685 b[6] = (a4+a6 -s1 + (1<<7))>>8;
02686 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
02687 }
02688 static void wmv2_idct_col(short * b)
02689 {
02690 int s1,s2;
02691 int a0,a1,a2,a3,a4,a5,a6,a7;
02692
02693 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
02694 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
02695 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
02696 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
02697 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
02698 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
02699 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
02700 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
02701
02702 s1 = (181*(a1-a5+a7-a3)+128)>>8;
02703 s2 = (181*(a1-a5-a7+a3)+128)>>8;
02704
02705 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
02706 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
02707 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
02708 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
02709
02710 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
02711 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
02712 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
02713 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
02714 }
02715 void ff_wmv2_idct_c(short * block){
02716 int i;
02717
02718 for(i=0;i<64;i+=8){
02719 wmv2_idct_row(block+i);
02720 }
02721 for(i=0;i<8;i++){
02722 wmv2_idct_col(block+i);
02723 }
02724 }
02725
02726
02727 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
02728 {
02729 ff_wmv2_idct_c(block);
02730 put_pixels_clamped_c(block, dest, line_size);
02731 }
02732 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
02733 {
02734 ff_wmv2_idct_c(block);
02735 add_pixels_clamped_c(block, dest, line_size);
02736 }
02737 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
02738 {
02739 ff_j_rev_dct (block);
02740 put_pixels_clamped_c(block, dest, line_size);
02741 }
02742 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
02743 {
02744 ff_j_rev_dct (block);
02745 add_pixels_clamped_c(block, dest, line_size);
02746 }
02747
02748 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
02749 {
02750 ff_j_rev_dct4 (block);
02751 put_pixels_clamped4_c(block, dest, line_size);
02752 }
02753 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
02754 {
02755 ff_j_rev_dct4 (block);
02756 add_pixels_clamped4_c(block, dest, line_size);
02757 }
02758
02759 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
02760 {
02761 ff_j_rev_dct2 (block);
02762 put_pixels_clamped2_c(block, dest, line_size);
02763 }
02764 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
02765 {
02766 ff_j_rev_dct2 (block);
02767 add_pixels_clamped2_c(block, dest, line_size);
02768 }
02769
02770 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
02771 {
02772 dest[0] = av_clip_uint8((block[0] + 4)>>3);
02773 }
02774 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
02775 {
02776 dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
02777 }
02778
02779 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
02780
02781
02782 av_cold void ff_dsputil_static_init(void)
02783 {
02784 int i;
02785
02786 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
02787 for(i=0;i<MAX_NEG_CROP;i++) {
02788 ff_cropTbl[i] = 0;
02789 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
02790 }
02791
02792 for(i=0;i<512;i++) {
02793 ff_squareTbl[i] = (i - 256) * (i - 256);
02794 }
02795
02796 for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
02797 }
02798
02799 int ff_check_alignment(void){
02800 static int did_fail=0;
02801 LOCAL_ALIGNED_16(int, aligned, [4]);
02802
02803 if((intptr_t)aligned & 15){
02804 if(!did_fail){
02805 #if HAVE_MMX || HAVE_ALTIVEC
02806 av_log(NULL, AV_LOG_ERROR,
02807 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
02808 "and may be very slow or crash. This is not a bug in libavcodec,\n"
02809 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
02810 "Do not report crashes to FFmpeg developers.\n");
02811 #endif
02812 did_fail=1;
02813 }
02814 return -1;
02815 }
02816 return 0;
02817 }
02818
02819 av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
02820 {
02821 int i, j;
02822
02823 ff_check_alignment();
02824
02825 #if CONFIG_ENCODERS
02826 if (avctx->bits_per_raw_sample == 10) {
02827 c->fdct = ff_jpeg_fdct_islow_10;
02828 c->fdct248 = ff_fdct248_islow_10;
02829 } else {
02830 if(avctx->dct_algo==FF_DCT_FASTINT) {
02831 c->fdct = ff_fdct_ifast;
02832 c->fdct248 = ff_fdct_ifast248;
02833 }
02834 else if(avctx->dct_algo==FF_DCT_FAAN) {
02835 c->fdct = ff_faandct;
02836 c->fdct248 = ff_faandct248;
02837 }
02838 else {
02839 c->fdct = ff_jpeg_fdct_islow_8;
02840 c->fdct248 = ff_fdct248_islow_8;
02841 }
02842 }
02843 #endif //CONFIG_ENCODERS
02844
02845 if(avctx->lowres==1){
02846 c->idct_put= ff_jref_idct4_put;
02847 c->idct_add= ff_jref_idct4_add;
02848 c->idct = ff_j_rev_dct4;
02849 c->idct_permutation_type= FF_NO_IDCT_PERM;
02850 }else if(avctx->lowres==2){
02851 c->idct_put= ff_jref_idct2_put;
02852 c->idct_add= ff_jref_idct2_add;
02853 c->idct = ff_j_rev_dct2;
02854 c->idct_permutation_type= FF_NO_IDCT_PERM;
02855 }else if(avctx->lowres==3){
02856 c->idct_put= ff_jref_idct1_put;
02857 c->idct_add= ff_jref_idct1_add;
02858 c->idct = ff_j_rev_dct1;
02859 c->idct_permutation_type= FF_NO_IDCT_PERM;
02860 }else{
02861 if (avctx->bits_per_raw_sample == 10) {
02862 c->idct_put = ff_simple_idct_put_10;
02863 c->idct_add = ff_simple_idct_add_10;
02864 c->idct = ff_simple_idct_10;
02865 c->idct_permutation_type = FF_NO_IDCT_PERM;
02866 } else {
02867 if(avctx->idct_algo==FF_IDCT_INT){
02868 c->idct_put= ff_jref_idct_put;
02869 c->idct_add= ff_jref_idct_add;
02870 c->idct = ff_j_rev_dct;
02871 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
02872 }else if(avctx->idct_algo==FF_IDCT_WMV2){
02873 c->idct_put= ff_wmv2_idct_put_c;
02874 c->idct_add= ff_wmv2_idct_add_c;
02875 c->idct = ff_wmv2_idct_c;
02876 c->idct_permutation_type= FF_NO_IDCT_PERM;
02877 }else if(avctx->idct_algo==FF_IDCT_FAAN){
02878 c->idct_put= ff_faanidct_put;
02879 c->idct_add= ff_faanidct_add;
02880 c->idct = ff_faanidct;
02881 c->idct_permutation_type= FF_NO_IDCT_PERM;
02882 }else{
02883 c->idct_put = ff_simple_idct_put_8;
02884 c->idct_add = ff_simple_idct_add_8;
02885 c->idct = ff_simple_idct_8;
02886 c->idct_permutation_type= FF_NO_IDCT_PERM;
02887 }
02888 }
02889 }
02890
02891 c->diff_pixels = diff_pixels_c;
02892 c->put_pixels_clamped = put_pixels_clamped_c;
02893 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
02894 c->add_pixels_clamped = add_pixels_clamped_c;
02895 c->sum_abs_dctelem = sum_abs_dctelem_c;
02896 c->gmc1 = gmc1_c;
02897 c->gmc = ff_gmc_c;
02898 c->pix_sum = pix_sum_c;
02899 c->pix_norm1 = pix_norm1_c;
02900
02901 c->fill_block_tab[0] = fill_block16_c;
02902 c->fill_block_tab[1] = fill_block8_c;
02903
02904
02905 c->pix_abs[0][0] = pix_abs16_c;
02906 c->pix_abs[0][1] = pix_abs16_x2_c;
02907 c->pix_abs[0][2] = pix_abs16_y2_c;
02908 c->pix_abs[0][3] = pix_abs16_xy2_c;
02909 c->pix_abs[1][0] = pix_abs8_c;
02910 c->pix_abs[1][1] = pix_abs8_x2_c;
02911 c->pix_abs[1][2] = pix_abs8_y2_c;
02912 c->pix_abs[1][3] = pix_abs8_xy2_c;
02913
02914 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
02915 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
02916 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
02917 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
02918 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
02919 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
02920 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
02921 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
02922 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
02923
02924 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
02925 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
02926 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
02927 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
02928 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
02929 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
02930 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
02931 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
02932 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
02933
02934 #define dspfunc(PFX, IDX, NUM) \
02935 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
02936 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
02937 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
02938 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
02939 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
02940 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
02941 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
02942 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
02943 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
02944 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
02945 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
02946 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
02947 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
02948 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
02949 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
02950 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
02951
02952 dspfunc(put_qpel, 0, 16);
02953 dspfunc(put_no_rnd_qpel, 0, 16);
02954
02955 dspfunc(avg_qpel, 0, 16);
02956
02957
02958 dspfunc(put_qpel, 1, 8);
02959 dspfunc(put_no_rnd_qpel, 1, 8);
02960
02961 dspfunc(avg_qpel, 1, 8);
02962
02963
02964 #undef dspfunc
02965
02966 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
02967 ff_mlp_init(c, avctx);
02968 #endif
02969
02970 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
02971 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
02972 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
02973 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
02974 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
02975 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
02976 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
02977 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
02978
02979 #define SET_CMP_FUNC(name) \
02980 c->name[0]= name ## 16_c;\
02981 c->name[1]= name ## 8x8_c;
02982
02983 SET_CMP_FUNC(hadamard8_diff)
02984 c->hadamard8_diff[4]= hadamard8_intra16_c;
02985 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
02986 SET_CMP_FUNC(dct_sad)
02987 SET_CMP_FUNC(dct_max)
02988 #if CONFIG_GPL
02989 SET_CMP_FUNC(dct264_sad)
02990 #endif
02991 c->sad[0]= pix_abs16_c;
02992 c->sad[1]= pix_abs8_c;
02993 c->sse[0]= sse16_c;
02994 c->sse[1]= sse8_c;
02995 c->sse[2]= sse4_c;
02996 SET_CMP_FUNC(quant_psnr)
02997 SET_CMP_FUNC(rd)
02998 SET_CMP_FUNC(bit)
02999 c->vsad[0]= vsad16_c;
03000 c->vsad[4]= vsad_intra16_c;
03001 c->vsad[5]= vsad_intra8_c;
03002 c->vsse[0]= vsse16_c;
03003 c->vsse[4]= vsse_intra16_c;
03004 c->vsse[5]= vsse_intra8_c;
03005 c->nsse[0]= nsse16_c;
03006 c->nsse[1]= nsse8_c;
03007 #if CONFIG_DWT
03008 ff_dsputil_init_dwt(c);
03009 #endif
03010
03011 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
03012
03013 c->add_bytes= add_bytes_c;
03014 c->diff_bytes= diff_bytes_c;
03015 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
03016 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
03017 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
03018 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
03019 c->bswap_buf= bswap_buf;
03020 c->bswap16_buf = bswap16_buf;
03021
03022 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
03023 c->h263_h_loop_filter= h263_h_loop_filter_c;
03024 c->h263_v_loop_filter= h263_v_loop_filter_c;
03025 }
03026
03027 c->h261_loop_filter= h261_loop_filter_c;
03028
03029 c->try_8x8basis= try_8x8basis_c;
03030 c->add_8x8basis= add_8x8basis_c;
03031
03032 #if CONFIG_VORBIS_DECODER
03033 c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling;
03034 #endif
03035 c->vector_fmul_reverse = vector_fmul_reverse_c;
03036 c->vector_fmul_add = vector_fmul_add_c;
03037 c->vector_fmul_window = vector_fmul_window_c;
03038 c->vector_clipf = vector_clipf_c;
03039 c->scalarproduct_int16 = scalarproduct_int16_c;
03040 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
03041 c->apply_window_int16 = apply_window_int16_c;
03042 c->vector_clip_int32 = vector_clip_int32_c;
03043 c->scalarproduct_float = ff_scalarproduct_float_c;
03044 c->butterflies_float = butterflies_float_c;
03045 c->butterflies_float_interleave = butterflies_float_interleave_c;
03046 c->vector_fmul_scalar = vector_fmul_scalar_c;
03047
03048 c->shrink[0]= av_image_copy_plane;
03049 c->shrink[1]= ff_shrink22;
03050 c->shrink[2]= ff_shrink44;
03051 c->shrink[3]= ff_shrink88;
03052
03053 c->prefetch= just_return;
03054
03055 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
03056 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
03057
03058 #undef FUNC
03059 #undef FUNCC
03060 #define FUNC(f, depth) f ## _ ## depth
03061 #define FUNCC(f, depth) f ## _ ## depth ## _c
03062
03063 #define dspfunc1(PFX, IDX, NUM, depth)\
03064 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
03065 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
03066 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
03067 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
03068
03069 #define dspfunc2(PFX, IDX, NUM, depth)\
03070 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
03071 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
03072 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
03073 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
03074 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
03075 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
03076 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
03077 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
03078 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
03079 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
03080 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
03081 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
03082 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
03083 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
03084 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
03085 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
03086
03087
03088 #define BIT_DEPTH_FUNCS(depth, dct)\
03089 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
03090 c->draw_edges = FUNCC(draw_edges , depth);\
03091 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
03092 c->clear_block = FUNCC(clear_block ## dct , depth);\
03093 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
03094 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
03095 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
03096 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
03097 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
03098 \
03099 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
03100 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
03101 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
03102 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
03103 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
03104 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
03105 \
03106 dspfunc1(put , 0, 16, depth);\
03107 dspfunc1(put , 1, 8, depth);\
03108 dspfunc1(put , 2, 4, depth);\
03109 dspfunc1(put , 3, 2, depth);\
03110 dspfunc1(put_no_rnd, 0, 16, depth);\
03111 dspfunc1(put_no_rnd, 1, 8, depth);\
03112 dspfunc1(avg , 0, 16, depth);\
03113 dspfunc1(avg , 1, 8, depth);\
03114 dspfunc1(avg , 2, 4, depth);\
03115 dspfunc1(avg , 3, 2, depth);\
03116 dspfunc1(avg_no_rnd, 0, 16, depth);\
03117 dspfunc1(avg_no_rnd, 1, 8, depth);\
03118 \
03119 dspfunc2(put_h264_qpel, 0, 16, depth);\
03120 dspfunc2(put_h264_qpel, 1, 8, depth);\
03121 dspfunc2(put_h264_qpel, 2, 4, depth);\
03122 dspfunc2(put_h264_qpel, 3, 2, depth);\
03123 dspfunc2(avg_h264_qpel, 0, 16, depth);\
03124 dspfunc2(avg_h264_qpel, 1, 8, depth);\
03125 dspfunc2(avg_h264_qpel, 2, 4, depth);
03126
03127 switch (avctx->bits_per_raw_sample) {
03128 case 9:
03129 if (c->dct_bits == 32) {
03130 BIT_DEPTH_FUNCS(9, _32);
03131 } else {
03132 BIT_DEPTH_FUNCS(9, _16);
03133 }
03134 break;
03135 case 10:
03136 if (c->dct_bits == 32) {
03137 BIT_DEPTH_FUNCS(10, _32);
03138 } else {
03139 BIT_DEPTH_FUNCS(10, _16);
03140 }
03141 break;
03142 case 12:
03143 if (c->dct_bits == 32) {
03144 BIT_DEPTH_FUNCS(12, _32);
03145 } else {
03146 BIT_DEPTH_FUNCS(12, _16);
03147 }
03148 break;
03149 case 14:
03150 if (c->dct_bits == 32) {
03151 BIT_DEPTH_FUNCS(14, _32);
03152 } else {
03153 BIT_DEPTH_FUNCS(14, _16);
03154 }
03155 break;
03156 default:
03157 if(avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
03158 BIT_DEPTH_FUNCS(8, _16);
03159 }
03160 break;
03161 }
03162
03163
03164 if (HAVE_MMX) ff_dsputil_init_mmx (c, avctx);
03165 if (ARCH_ARM) ff_dsputil_init_arm (c, avctx);
03166 if (HAVE_VIS) ff_dsputil_init_vis (c, avctx);
03167 if (ARCH_ALPHA) ff_dsputil_init_alpha (c, avctx);
03168 if (ARCH_PPC) ff_dsputil_init_ppc (c, avctx);
03169 if (HAVE_MMI) ff_dsputil_init_mmi (c, avctx);
03170 if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx);
03171 if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx);
03172 if (HAVE_MIPSFPU) ff_dsputil_init_mips (c, avctx);
03173
03174 for (i = 0; i < 4; i++) {
03175 for (j = 0; j < 16; j++) {
03176 if(!c->put_2tap_qpel_pixels_tab[i][j])
03177 c->put_2tap_qpel_pixels_tab[i][j] =
03178 c->put_h264_qpel_pixels_tab[i][j];
03179 if(!c->avg_2tap_qpel_pixels_tab[i][j])
03180 c->avg_2tap_qpel_pixels_tab[i][j] =
03181 c->avg_h264_qpel_pixels_tab[i][j];
03182 }
03183 }
03184
03185 ff_init_scantable_permutation(c->idct_permutation,
03186 c->idct_permutation_type);
03187 }
03188
03189 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
03190 {
03191 ff_dsputil_init(c, avctx);
03192 }