FFmpeg: libavcodec/dsputil.c Source File

00001 /*
00002  * DSP utils
00003  * Copyright (c) 2000, 2001 Fabrice Bellard
00004  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
00005  *
00006  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
00007  *
00008  * This file is part of FFmpeg.
00009  *
00010  * FFmpeg is free software; you can redistribute it and/or
00011  * modify it under the terms of the GNU Lesser General Public
00012  * License as published by the Free Software Foundation; either
00013  * version 2.1 of the License, or (at your option) any later version.
00014  *
00015  * FFmpeg is distributed in the hope that it will be useful,
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018  * Lesser General Public License for more details.
00019  *
00020  * You should have received a copy of the GNU Lesser General Public
00021  * License along with FFmpeg; if not, write to the Free Software
00022  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00023  */
00024 
00030 #include "libavutil/imgutils.h"
00031 #include "avcodec.h"
00032 #include "dsputil.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "faanidct.h"
00036 #include "mathops.h"
00037 #include "mpegvideo.h"
00038 #include "config.h"
00039 #include "vorbis.h"
00040 #include "diracdsp.h"
00041 
00042 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00043 uint32_t ff_squareTbl[512] = {0, };
00044 
00045 #define pixeltmp int16_t
00046 #define BIT_DEPTH 9
00047 #include "dsputil_template.c"
00048 #undef BIT_DEPTH
00049 
00050 #define BIT_DEPTH 10
00051 #include "dsputil_template.c"
00052 #undef BIT_DEPTH
00053 
00054 #undef pixeltmp
00055 #define pixeltmp int32_t
00056 #define BIT_DEPTH 12
00057 #include "dsputil_template.c"
00058 #undef BIT_DEPTH
00059 
00060 #define BIT_DEPTH 14
00061 #include "dsputil_template.c"
00062 #undef BIT_DEPTH
00063 
00064 #undef pixeltmp
00065 #define pixeltmp int16_t
00066 #define BIT_DEPTH 8
00067 #include "dsputil_template.c"
00068 #undef pixeltmp
00069 
00070 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
00071 #define pb_7f (~0UL/255 * 0x7f)
00072 #define pb_80 (~0UL/255 * 0x80)
00073 
00074 const uint8_t ff_zigzag_direct[64] = {
00075     0,   1,  8, 16,  9,  2,  3, 10,
00076     17, 24, 32, 25, 18, 11,  4,  5,
00077     12, 19, 26, 33, 40, 48, 41, 34,
00078     27, 20, 13,  6,  7, 14, 21, 28,
00079     35, 42, 49, 56, 57, 50, 43, 36,
00080     29, 22, 15, 23, 30, 37, 44, 51,
00081     58, 59, 52, 45, 38, 31, 39, 46,
00082     53, 60, 61, 54, 47, 55, 62, 63
00083 };
00084 
00085 /* Specific zigzag scan for 248 idct. NOTE that unlike the
00086    specification, we interleave the fields */
00087 const uint8_t ff_zigzag248_direct[64] = {
00088      0,  8,  1,  9, 16, 24,  2, 10,
00089     17, 25, 32, 40, 48, 56, 33, 41,
00090     18, 26,  3, 11,  4, 12, 19, 27,
00091     34, 42, 49, 57, 50, 58, 35, 43,
00092     20, 28,  5, 13,  6, 14, 21, 29,
00093     36, 44, 51, 59, 52, 60, 37, 45,
00094     22, 30,  7, 15, 23, 31, 38, 46,
00095     53, 61, 54, 62, 39, 47, 55, 63,
00096 };
00097 
00098 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
00099 DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
00100 
00101 const uint8_t ff_alternate_horizontal_scan[64] = {
00102     0,  1,   2,  3,  8,  9, 16, 17,
00103     10, 11,  4,  5,  6,  7, 15, 14,
00104     13, 12, 19, 18, 24, 25, 32, 33,
00105     26, 27, 20, 21, 22, 23, 28, 29,
00106     30, 31, 34, 35, 40, 41, 48, 49,
00107     42, 43, 36, 37, 38, 39, 44, 45,
00108     46, 47, 50, 51, 56, 57, 58, 59,
00109     52, 53, 54, 55, 60, 61, 62, 63,
00110 };
00111 
00112 const uint8_t ff_alternate_vertical_scan[64] = {
00113     0,  8,  16, 24,  1,  9,  2, 10,
00114     17, 25, 32, 40, 48, 56, 57, 49,
00115     41, 33, 26, 18,  3, 11,  4, 12,
00116     19, 27, 34, 42, 50, 58, 35, 43,
00117     51, 59, 20, 28,  5, 13,  6, 14,
00118     21, 29, 36, 44, 52, 60, 37, 45,
00119     53, 61, 22, 30,  7, 15, 23, 31,
00120     38, 46, 54, 62, 39, 47, 55, 63,
00121 };
00122 
00123 /* Input permutation for the simple_idct_mmx */
00124 static const uint8_t simple_mmx_permutation[64]={
00125         0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00126         0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00127         0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00128         0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00129         0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00130         0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00131         0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00132         0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00133 };
00134 
00135 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00136 
00137 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00138     int i;
00139     int end;
00140 
00141     st->scantable= src_scantable;
00142 
00143     for(i=0; i<64; i++){
00144         int j;
00145         j = src_scantable[i];
00146         st->permutated[i] = permutation[j];
00147     }
00148 
00149     end=-1;
00150     for(i=0; i<64; i++){
00151         int j;
00152         j = st->permutated[i];
00153         if(j>end) end=j;
00154         st->raster_end[i]= end;
00155     }
00156 }
00157 
00158 void ff_init_scantable_permutation(uint8_t *idct_permutation,
00159                                    int idct_permutation_type)
00160 {
00161     int i;
00162 
00163     switch(idct_permutation_type){
00164     case FF_NO_IDCT_PERM:
00165         for(i=0; i<64; i++)
00166             idct_permutation[i]= i;
00167         break;
00168     case FF_LIBMPEG2_IDCT_PERM:
00169         for(i=0; i<64; i++)
00170             idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00171         break;
00172     case FF_SIMPLE_IDCT_PERM:
00173         for(i=0; i<64; i++)
00174             idct_permutation[i]= simple_mmx_permutation[i];
00175         break;
00176     case FF_TRANSPOSE_IDCT_PERM:
00177         for(i=0; i<64; i++)
00178             idct_permutation[i]= ((i&7)<<3) | (i>>3);
00179         break;
00180     case FF_PARTTRANS_IDCT_PERM:
00181         for(i=0; i<64; i++)
00182             idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
00183         break;
00184     case FF_SSE2_IDCT_PERM:
00185         for(i=0; i<64; i++)
00186             idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
00187         break;
00188     default:
00189         av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
00190     }
00191 }
00192 
00193 static int pix_sum_c(uint8_t * pix, int line_size)
00194 {
00195     int s, i, j;
00196 
00197     s = 0;
00198     for (i = 0; i < 16; i++) {
00199         for (j = 0; j < 16; j += 8) {
00200             s += pix[0];
00201             s += pix[1];
00202             s += pix[2];
00203             s += pix[3];
00204             s += pix[4];
00205             s += pix[5];
00206             s += pix[6];
00207             s += pix[7];
00208             pix += 8;
00209         }
00210         pix += line_size - 16;
00211     }
00212     return s;
00213 }
00214 
00215 static int pix_norm1_c(uint8_t * pix, int line_size)
00216 {
00217     int s, i, j;
00218     uint32_t *sq = ff_squareTbl + 256;
00219 
00220     s = 0;
00221     for (i = 0; i < 16; i++) {
00222         for (j = 0; j < 16; j += 8) {
00223 #if 0
00224             s += sq[pix[0]];
00225             s += sq[pix[1]];
00226             s += sq[pix[2]];
00227             s += sq[pix[3]];
00228             s += sq[pix[4]];
00229             s += sq[pix[5]];
00230             s += sq[pix[6]];
00231             s += sq[pix[7]];
00232 #else
00233 #if HAVE_FAST_64BIT
00234             register uint64_t x=*(uint64_t*)pix;
00235             s += sq[x&0xff];
00236             s += sq[(x>>8)&0xff];
00237             s += sq[(x>>16)&0xff];
00238             s += sq[(x>>24)&0xff];
00239             s += sq[(x>>32)&0xff];
00240             s += sq[(x>>40)&0xff];
00241             s += sq[(x>>48)&0xff];
00242             s += sq[(x>>56)&0xff];
00243 #else
00244             register uint32_t x=*(uint32_t*)pix;
00245             s += sq[x&0xff];
00246             s += sq[(x>>8)&0xff];
00247             s += sq[(x>>16)&0xff];
00248             s += sq[(x>>24)&0xff];
00249             x=*(uint32_t*)(pix+4);
00250             s += sq[x&0xff];
00251             s += sq[(x>>8)&0xff];
00252             s += sq[(x>>16)&0xff];
00253             s += sq[(x>>24)&0xff];
00254 #endif
00255 #endif
00256             pix += 8;
00257         }
00258         pix += line_size - 16;
00259     }
00260     return s;
00261 }
00262 
00263 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00264     int i;
00265 
00266     for(i=0; i+8<=w; i+=8){
00267         dst[i+0]= av_bswap32(src[i+0]);
00268         dst[i+1]= av_bswap32(src[i+1]);
00269         dst[i+2]= av_bswap32(src[i+2]);
00270         dst[i+3]= av_bswap32(src[i+3]);
00271         dst[i+4]= av_bswap32(src[i+4]);
00272         dst[i+5]= av_bswap32(src[i+5]);
00273         dst[i+6]= av_bswap32(src[i+6]);
00274         dst[i+7]= av_bswap32(src[i+7]);
00275     }
00276     for(;i<w; i++){
00277         dst[i+0]= av_bswap32(src[i+0]);
00278     }
00279 }
00280 
00281 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
00282 {
00283     while (len--)
00284         *dst++ = av_bswap16(*src++);
00285 }
00286 
00287 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00288 {
00289     int s, i;
00290     uint32_t *sq = ff_squareTbl + 256;
00291 
00292     s = 0;
00293     for (i = 0; i < h; i++) {
00294         s += sq[pix1[0] - pix2[0]];
00295         s += sq[pix1[1] - pix2[1]];
00296         s += sq[pix1[2] - pix2[2]];
00297         s += sq[pix1[3] - pix2[3]];
00298         pix1 += line_size;
00299         pix2 += line_size;
00300     }
00301     return s;
00302 }
00303 
00304 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00305 {
00306     int s, i;
00307     uint32_t *sq = ff_squareTbl + 256;
00308 
00309     s = 0;
00310     for (i = 0; i < h; i++) {
00311         s += sq[pix1[0] - pix2[0]];
00312         s += sq[pix1[1] - pix2[1]];
00313         s += sq[pix1[2] - pix2[2]];
00314         s += sq[pix1[3] - pix2[3]];
00315         s += sq[pix1[4] - pix2[4]];
00316         s += sq[pix1[5] - pix2[5]];
00317         s += sq[pix1[6] - pix2[6]];
00318         s += sq[pix1[7] - pix2[7]];
00319         pix1 += line_size;
00320         pix2 += line_size;
00321     }
00322     return s;
00323 }
00324 
00325 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00326 {
00327     int s, i;
00328     uint32_t *sq = ff_squareTbl + 256;
00329 
00330     s = 0;
00331     for (i = 0; i < h; i++) {
00332         s += sq[pix1[ 0] - pix2[ 0]];
00333         s += sq[pix1[ 1] - pix2[ 1]];
00334         s += sq[pix1[ 2] - pix2[ 2]];
00335         s += sq[pix1[ 3] - pix2[ 3]];
00336         s += sq[pix1[ 4] - pix2[ 4]];
00337         s += sq[pix1[ 5] - pix2[ 5]];
00338         s += sq[pix1[ 6] - pix2[ 6]];
00339         s += sq[pix1[ 7] - pix2[ 7]];
00340         s += sq[pix1[ 8] - pix2[ 8]];
00341         s += sq[pix1[ 9] - pix2[ 9]];
00342         s += sq[pix1[10] - pix2[10]];
00343         s += sq[pix1[11] - pix2[11]];
00344         s += sq[pix1[12] - pix2[12]];
00345         s += sq[pix1[13] - pix2[13]];
00346         s += sq[pix1[14] - pix2[14]];
00347         s += sq[pix1[15] - pix2[15]];
00348 
00349         pix1 += line_size;
00350         pix2 += line_size;
00351     }
00352     return s;
00353 }
00354 
00355 static void diff_pixels_c(DCTELEM *av_restrict block, const uint8_t *s1,
00356                           const uint8_t *s2, int stride){
00357     int i;
00358 
00359     /* read the pixels */
00360     for(i=0;i<8;i++) {
00361         block[0] = s1[0] - s2[0];
00362         block[1] = s1[1] - s2[1];
00363         block[2] = s1[2] - s2[2];
00364         block[3] = s1[3] - s2[3];
00365         block[4] = s1[4] - s2[4];
00366         block[5] = s1[5] - s2[5];
00367         block[6] = s1[6] - s2[6];
00368         block[7] = s1[7] - s2[7];
00369         s1 += stride;
00370         s2 += stride;
00371         block += 8;
00372     }
00373 }
00374 
00375 
00376 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *av_restrict pixels,
00377                                  int line_size)
00378 {
00379     int i;
00380 
00381     /* read the pixels */
00382     for(i=0;i<8;i++) {
00383         pixels[0] = av_clip_uint8(block[0]);
00384         pixels[1] = av_clip_uint8(block[1]);
00385         pixels[2] = av_clip_uint8(block[2]);
00386         pixels[3] = av_clip_uint8(block[3]);
00387         pixels[4] = av_clip_uint8(block[4]);
00388         pixels[5] = av_clip_uint8(block[5]);
00389         pixels[6] = av_clip_uint8(block[6]);
00390         pixels[7] = av_clip_uint8(block[7]);
00391 
00392         pixels += line_size;
00393         block += 8;
00394     }
00395 }
00396 
00397 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *av_restrict pixels,
00398                                  int line_size)
00399 {
00400     int i;
00401 
00402     /* read the pixels */
00403     for(i=0;i<4;i++) {
00404         pixels[0] = av_clip_uint8(block[0]);
00405         pixels[1] = av_clip_uint8(block[1]);
00406         pixels[2] = av_clip_uint8(block[2]);
00407         pixels[3] = av_clip_uint8(block[3]);
00408 
00409         pixels += line_size;
00410         block += 8;
00411     }
00412 }
00413 
00414 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *av_restrict pixels,
00415                                  int line_size)
00416 {
00417     int i;
00418 
00419     /* read the pixels */
00420     for(i=0;i<2;i++) {
00421         pixels[0] = av_clip_uint8(block[0]);
00422         pixels[1] = av_clip_uint8(block[1]);
00423 
00424         pixels += line_size;
00425         block += 8;
00426     }
00427 }
00428 
00429 static void put_signed_pixels_clamped_c(const DCTELEM *block,
00430                                         uint8_t *av_restrict pixels,
00431                                         int line_size)
00432 {
00433     int i, j;
00434 
00435     for (i = 0; i < 8; i++) {
00436         for (j = 0; j < 8; j++) {
00437             if (*block < -128)
00438                 *pixels = 0;
00439             else if (*block > 127)
00440                 *pixels = 255;
00441             else
00442                 *pixels = (uint8_t)(*block + 128);
00443             block++;
00444             pixels++;
00445         }
00446         pixels += (line_size - 8);
00447     }
00448 }
00449 
00450 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *av_restrict pixels,
00451                                  int line_size)
00452 {
00453     int i;
00454 
00455     /* read the pixels */
00456     for(i=0;i<8;i++) {
00457         pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00458         pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00459         pixels[2] = av_clip_uint8(pixels[2] + block[2]);
00460         pixels[3] = av_clip_uint8(pixels[3] + block[3]);
00461         pixels[4] = av_clip_uint8(pixels[4] + block[4]);
00462         pixels[5] = av_clip_uint8(pixels[5] + block[5]);
00463         pixels[6] = av_clip_uint8(pixels[6] + block[6]);
00464         pixels[7] = av_clip_uint8(pixels[7] + block[7]);
00465         pixels += line_size;
00466         block += 8;
00467     }
00468 }
00469 
00470 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *av_restrict pixels,
00471                           int line_size)
00472 {
00473     int i;
00474 
00475     /* read the pixels */
00476     for(i=0;i<4;i++) {
00477         pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00478         pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00479         pixels[2] = av_clip_uint8(pixels[2] + block[2]);
00480         pixels[3] = av_clip_uint8(pixels[3] + block[3]);
00481         pixels += line_size;
00482         block += 8;
00483     }
00484 }
00485 
00486 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *av_restrict pixels,
00487                           int line_size)
00488 {
00489     int i;
00490 
00491     /* read the pixels */
00492     for(i=0;i<2;i++) {
00493         pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00494         pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00495         pixels += line_size;
00496         block += 8;
00497     }
00498 }
00499 
00500 static int sum_abs_dctelem_c(DCTELEM *block)
00501 {
00502     int sum=0, i;
00503     for(i=0; i<64; i++)
00504         sum+= FFABS(block[i]);
00505     return sum;
00506 }
00507 
00508 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
00509 {
00510     int i;
00511 
00512     for (i = 0; i < h; i++) {
00513         memset(block, value, 16);
00514         block += line_size;
00515     }
00516 }
00517 
00518 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
00519 {
00520     int i;
00521 
00522     for (i = 0; i < h; i++) {
00523         memset(block, value, 8);
00524         block += line_size;
00525     }
00526 }
00527 
00528 #define avg2(a,b) ((a+b+1)>>1)
00529 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
00530 
00531 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
00532 {
00533     const int A=(16-x16)*(16-y16);
00534     const int B=(   x16)*(16-y16);
00535     const int C=(16-x16)*(   y16);
00536     const int D=(   x16)*(   y16);
00537     int i;
00538 
00539     for(i=0; i<h; i++)
00540     {
00541         dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
00542         dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
00543         dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
00544         dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
00545         dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
00546         dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
00547         dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
00548         dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
00549         dst+= stride;
00550         src+= stride;
00551     }
00552 }
00553 
00554 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
00555                   int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
00556 {
00557     int y, vx, vy;
00558     const int s= 1<<shift;
00559 
00560     width--;
00561     height--;
00562 
00563     for(y=0; y<h; y++){
00564         int x;
00565 
00566         vx= ox;
00567         vy= oy;
00568         for(x=0; x<8; x++){ //XXX FIXME optimize
00569             int src_x, src_y, frac_x, frac_y, index;
00570 
00571             src_x= vx>>16;
00572             src_y= vy>>16;
00573             frac_x= src_x&(s-1);
00574             frac_y= src_y&(s-1);
00575             src_x>>=shift;
00576             src_y>>=shift;
00577 
00578             if((unsigned)src_x < width){
00579                 if((unsigned)src_y < height){
00580                     index= src_x + src_y*stride;
00581                     dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
00582                                            + src[index       +1]*   frac_x )*(s-frac_y)
00583                                         + (  src[index+stride  ]*(s-frac_x)
00584                                            + src[index+stride+1]*   frac_x )*   frac_y
00585                                         + r)>>(shift*2);
00586                 }else{
00587                     index= src_x + av_clip(src_y, 0, height)*stride;
00588                     dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
00589                                           + src[index       +1]*   frac_x )*s
00590                                         + r)>>(shift*2);
00591                 }
00592             }else{
00593                 if((unsigned)src_y < height){
00594                     index= av_clip(src_x, 0, width) + src_y*stride;
00595                     dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
00596                                            + src[index+stride  ]*   frac_y )*s
00597                                         + r)>>(shift*2);
00598                 }else{
00599                     index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
00600                     dst[y*stride + x]=    src[index         ];
00601                 }
00602             }
00603 
00604             vx+= dxx;
00605             vy+= dyx;
00606         }
00607         ox += dxy;
00608         oy += dyy;
00609     }
00610 }
00611 
00612 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00613     switch(width){
00614     case 2: put_pixels2_8_c (dst, src, stride, height); break;
00615     case 4: put_pixels4_8_c (dst, src, stride, height); break;
00616     case 8: put_pixels8_8_c (dst, src, stride, height); break;
00617     case 16:put_pixels16_8_c(dst, src, stride, height); break;
00618     }
00619 }
00620 
00621 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00622     int i,j;
00623     for (i=0; i < height; i++) {
00624       for (j=0; j < width; j++) {
00625         dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
00626       }
00627       src += stride;
00628       dst += stride;
00629     }
00630 }
00631 
00632 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00633     int i,j;
00634     for (i=0; i < height; i++) {
00635       for (j=0; j < width; j++) {
00636         dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
00637       }
00638       src += stride;
00639       dst += stride;
00640     }
00641 }
00642 
00643 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00644     int i,j;
00645     for (i=0; i < height; i++) {
00646       for (j=0; j < width; j++) {
00647         dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
00648       }
00649       src += stride;
00650       dst += stride;
00651     }
00652 }
00653 
00654 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00655     int i,j;
00656     for (i=0; i < height; i++) {
00657       for (j=0; j < width; j++) {
00658         dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
00659       }
00660       src += stride;
00661       dst += stride;
00662     }
00663 }
00664 
00665 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00666     int i,j;
00667     for (i=0; i < height; i++) {
00668       for (j=0; j < width; j++) {
00669         dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00670       }
00671       src += stride;
00672       dst += stride;
00673     }
00674 }
00675 
00676 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00677     int i,j;
00678     for (i=0; i < height; i++) {
00679       for (j=0; j < width; j++) {
00680         dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
00681       }
00682       src += stride;
00683       dst += stride;
00684     }
00685 }
00686 
00687 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00688     int i,j;
00689     for (i=0; i < height; i++) {
00690       for (j=0; j < width; j++) {
00691         dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00692       }
00693       src += stride;
00694       dst += stride;
00695     }
00696 }
00697 
00698 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00699     int i,j;
00700     for (i=0; i < height; i++) {
00701       for (j=0; j < width; j++) {
00702         dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
00703       }
00704       src += stride;
00705       dst += stride;
00706     }
00707 }
00708 
00709 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00710     switch(width){
00711     case 2: avg_pixels2_8_c (dst, src, stride, height); break;
00712     case 4: avg_pixels4_8_c (dst, src, stride, height); break;
00713     case 8: avg_pixels8_8_c (dst, src, stride, height); break;
00714     case 16:avg_pixels16_8_c(dst, src, stride, height); break;
00715     }
00716 }
00717 
00718 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00719     int i,j;
00720     for (i=0; i < height; i++) {
00721       for (j=0; j < width; j++) {
00722         dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
00723       }
00724       src += stride;
00725       dst += stride;
00726     }
00727 }
00728 
00729 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00730     int i,j;
00731     for (i=0; i < height; i++) {
00732       for (j=0; j < width; j++) {
00733         dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
00734       }
00735       src += stride;
00736       dst += stride;
00737     }
00738 }
00739 
00740 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00741     int i,j;
00742     for (i=0; i < height; i++) {
00743       for (j=0; j < width; j++) {
00744         dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
00745       }
00746       src += stride;
00747       dst += stride;
00748     }
00749 }
00750 
00751 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00752     int i,j;
00753     for (i=0; i < height; i++) {
00754       for (j=0; j < width; j++) {
00755         dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00756       }
00757       src += stride;
00758       dst += stride;
00759     }
00760 }
00761 
00762 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00763     int i,j;
00764     for (i=0; i < height; i++) {
00765       for (j=0; j < width; j++) {
00766         dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00767       }
00768       src += stride;
00769       dst += stride;
00770     }
00771 }
00772 
00773 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00774     int i,j;
00775     for (i=0; i < height; i++) {
00776       for (j=0; j < width; j++) {
00777         dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
00778       }
00779       src += stride;
00780       dst += stride;
00781     }
00782 }
00783 
00784 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00785     int i,j;
00786     for (i=0; i < height; i++) {
00787       for (j=0; j < width; j++) {
00788         dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00789       }
00790       src += stride;
00791       dst += stride;
00792     }
00793 }
00794 
00795 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00796     int i,j;
00797     for (i=0; i < height; i++) {
00798       for (j=0; j < width; j++) {
00799         dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00800       }
00801       src += stride;
00802       dst += stride;
00803     }
00804 }
00805 
00806 #define QPEL_MC(r, OPNAME, RND, OP) \
00807 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00808     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00809     int i;\
00810     for(i=0; i<h; i++)\
00811     {\
00812         OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
00813         OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
00814         OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
00815         OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
00816         OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
00817         OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
00818         OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
00819         OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
00820         dst+=dstStride;\
00821         src+=srcStride;\
00822     }\
00823 }\
00824 \
00825 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00826     const int w=8;\
00827     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00828     int i;\
00829     for(i=0; i<w; i++)\
00830     {\
00831         const int src0= src[0*srcStride];\
00832         const int src1= src[1*srcStride];\
00833         const int src2= src[2*srcStride];\
00834         const int src3= src[3*srcStride];\
00835         const int src4= src[4*srcStride];\
00836         const int src5= src[5*srcStride];\
00837         const int src6= src[6*srcStride];\
00838         const int src7= src[7*srcStride];\
00839         const int src8= src[8*srcStride];\
00840         OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
00841         OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
00842         OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
00843         OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
00844         OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
00845         OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
00846         OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
00847         OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00848         dst++;\
00849         src++;\
00850     }\
00851 }\
00852 \
00853 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00854     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00855     int i;\
00856     \
00857     for(i=0; i<h; i++)\
00858     {\
00859         OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
00860         OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
00861         OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
00862         OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
00863         OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
00864         OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
00865         OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
00866         OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
00867         OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
00868         OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
00869         OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
00870         OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
00871         OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
00872         OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
00873         OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
00874         OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
00875         dst+=dstStride;\
00876         src+=srcStride;\
00877     }\
00878 }\
00879 \
00880 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00881     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00882     int i;\
00883     const int w=16;\
00884     for(i=0; i<w; i++)\
00885     {\
00886         const int src0= src[0*srcStride];\
00887         const int src1= src[1*srcStride];\
00888         const int src2= src[2*srcStride];\
00889         const int src3= src[3*srcStride];\
00890         const int src4= src[4*srcStride];\
00891         const int src5= src[5*srcStride];\
00892         const int src6= src[6*srcStride];\
00893         const int src7= src[7*srcStride];\
00894         const int src8= src[8*srcStride];\
00895         const int src9= src[9*srcStride];\
00896         const int src10= src[10*srcStride];\
00897         const int src11= src[11*srcStride];\
00898         const int src12= src[12*srcStride];\
00899         const int src13= src[13*srcStride];\
00900         const int src14= src[14*srcStride];\
00901         const int src15= src[15*srcStride];\
00902         const int src16= src[16*srcStride];\
00903         OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
00904         OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
00905         OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
00906         OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
00907         OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
00908         OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
00909         OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
00910         OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
00911         OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
00912         OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
00913         OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
00914         OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
00915         OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
00916         OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
00917         OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
00918         OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00919         dst++;\
00920         src++;\
00921     }\
00922 }\
00923 \
00924 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
00925     uint8_t half[64];\
00926     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00927     OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
00928 }\
00929 \
00930 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
00931     OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
00932 }\
00933 \
00934 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
00935     uint8_t half[64];\
00936     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00937     OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
00938 }\
00939 \
00940 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
00941     uint8_t full[16*9];\
00942     uint8_t half[64];\
00943     copy_block9(full, src, 16, stride, 9);\
00944     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00945     OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
00946 }\
00947 \
00948 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
00949     uint8_t full[16*9];\
00950     copy_block9(full, src, 16, stride, 9);\
00951     OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
00952 }\
00953 \
00954 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
00955     uint8_t full[16*9];\
00956     uint8_t half[64];\
00957     copy_block9(full, src, 16, stride, 9);\
00958     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00959     OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
00960 }\
00961 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
00962     uint8_t full[16*9];\
00963     uint8_t halfH[72];\
00964     uint8_t halfV[64];\
00965     uint8_t halfHV[64];\
00966     copy_block9(full, src, 16, stride, 9);\
00967     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00968     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00969     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00970     OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00971 }\
00972 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
00973     uint8_t full[16*9];\
00974     uint8_t halfH[72];\
00975     uint8_t halfHV[64];\
00976     copy_block9(full, src, 16, stride, 9);\
00977     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00978     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
00979     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00980     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00981 }\
00982 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
00983     uint8_t full[16*9];\
00984     uint8_t halfH[72];\
00985     uint8_t halfV[64];\
00986     uint8_t halfHV[64];\
00987     copy_block9(full, src, 16, stride, 9);\
00988     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00989     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
00990     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00991     OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00992 }\
00993 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
00994     uint8_t full[16*9];\
00995     uint8_t halfH[72];\
00996     uint8_t halfHV[64];\
00997     copy_block9(full, src, 16, stride, 9);\
00998     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00999     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01000     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01001     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01002 }\
01003 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01004     uint8_t full[16*9];\
01005     uint8_t halfH[72];\
01006     uint8_t halfV[64];\
01007     uint8_t halfHV[64];\
01008     copy_block9(full, src, 16, stride, 9);\
01009     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01010     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01011     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01012     OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01013 }\
01014 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01015     uint8_t full[16*9];\
01016     uint8_t halfH[72];\
01017     uint8_t halfHV[64];\
01018     copy_block9(full, src, 16, stride, 9);\
01019     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01020     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01021     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01022     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01023 }\
01024 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01025     uint8_t full[16*9];\
01026     uint8_t halfH[72];\
01027     uint8_t halfV[64];\
01028     uint8_t halfHV[64];\
01029     copy_block9(full, src, 16, stride, 9);\
01030     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
01031     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01032     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01033     OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01034 }\
01035 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01036     uint8_t full[16*9];\
01037     uint8_t halfH[72];\
01038     uint8_t halfHV[64];\
01039     copy_block9(full, src, 16, stride, 9);\
01040     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01041     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01042     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01043     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01044 }\
01045 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01046     uint8_t halfH[72];\
01047     uint8_t halfHV[64];\
01048     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01049     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01050     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01051 }\
01052 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01053     uint8_t halfH[72];\
01054     uint8_t halfHV[64];\
01055     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01056     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01057     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01058 }\
01059 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01060     uint8_t full[16*9];\
01061     uint8_t halfH[72];\
01062     uint8_t halfV[64];\
01063     uint8_t halfHV[64];\
01064     copy_block9(full, src, 16, stride, 9);\
01065     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01066     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01067     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01068     OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01069 }\
01070 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01071     uint8_t full[16*9];\
01072     uint8_t halfH[72];\
01073     copy_block9(full, src, 16, stride, 9);\
01074     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01075     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01076     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01077 }\
01078 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01079     uint8_t full[16*9];\
01080     uint8_t halfH[72];\
01081     uint8_t halfV[64];\
01082     uint8_t halfHV[64];\
01083     copy_block9(full, src, 16, stride, 9);\
01084     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01085     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01086     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01087     OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01088 }\
01089 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01090     uint8_t full[16*9];\
01091     uint8_t halfH[72];\
01092     copy_block9(full, src, 16, stride, 9);\
01093     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01094     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01095     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01096 }\
01097 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01098     uint8_t halfH[72];\
01099     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01100     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01101 }\
01102 \
01103 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01104     uint8_t half[256];\
01105     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01106     OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
01107 }\
01108 \
01109 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01110     OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01111 }\
01112 \
01113 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01114     uint8_t half[256];\
01115     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01116     OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
01117 }\
01118 \
01119 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01120     uint8_t full[24*17];\
01121     uint8_t half[256];\
01122     copy_block17(full, src, 24, stride, 17);\
01123     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01124     OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
01125 }\
01126 \
01127 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01128     uint8_t full[24*17];\
01129     copy_block17(full, src, 24, stride, 17);\
01130     OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01131 }\
01132 \
01133 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01134     uint8_t full[24*17];\
01135     uint8_t half[256];\
01136     copy_block17(full, src, 24, stride, 17);\
01137     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01138     OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
01139 }\
01140 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01141     uint8_t full[24*17];\
01142     uint8_t halfH[272];\
01143     uint8_t halfV[256];\
01144     uint8_t halfHV[256];\
01145     copy_block17(full, src, 24, stride, 17);\
01146     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01147     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01148     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01149     OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01150 }\
01151 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01152     uint8_t full[24*17];\
01153     uint8_t halfH[272];\
01154     uint8_t halfHV[256];\
01155     copy_block17(full, src, 24, stride, 17);\
01156     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01157     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01158     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01159     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01160 }\
01161 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01162     uint8_t full[24*17];\
01163     uint8_t halfH[272];\
01164     uint8_t halfV[256];\
01165     uint8_t halfHV[256];\
01166     copy_block17(full, src, 24, stride, 17);\
01167     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01168     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01169     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01170     OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01171 }\
01172 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01173     uint8_t full[24*17];\
01174     uint8_t halfH[272];\
01175     uint8_t halfHV[256];\
01176     copy_block17(full, src, 24, stride, 17);\
01177     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01178     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01179     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01180     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01181 }\
01182 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01183     uint8_t full[24*17];\
01184     uint8_t halfH[272];\
01185     uint8_t halfV[256];\
01186     uint8_t halfHV[256];\
01187     copy_block17(full, src, 24, stride, 17);\
01188     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01189     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01190     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01191     OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01192 }\
01193 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01194     uint8_t full[24*17];\
01195     uint8_t halfH[272];\
01196     uint8_t halfHV[256];\
01197     copy_block17(full, src, 24, stride, 17);\
01198     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01199     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01200     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01201     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01202 }\
01203 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01204     uint8_t full[24*17];\
01205     uint8_t halfH[272];\
01206     uint8_t halfV[256];\
01207     uint8_t halfHV[256];\
01208     copy_block17(full, src, 24, stride, 17);\
01209     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
01210     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01211     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01212     OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01213 }\
01214 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01215     uint8_t full[24*17];\
01216     uint8_t halfH[272];\
01217     uint8_t halfHV[256];\
01218     copy_block17(full, src, 24, stride, 17);\
01219     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01220     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01221     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01222     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01223 }\
01224 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01225     uint8_t halfH[272];\
01226     uint8_t halfHV[256];\
01227     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01228     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01229     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01230 }\
01231 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01232     uint8_t halfH[272];\
01233     uint8_t halfHV[256];\
01234     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01235     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01236     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01237 }\
01238 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01239     uint8_t full[24*17];\
01240     uint8_t halfH[272];\
01241     uint8_t halfV[256];\
01242     uint8_t halfHV[256];\
01243     copy_block17(full, src, 24, stride, 17);\
01244     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01245     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01246     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01247     OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01248 }\
01249 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01250     uint8_t full[24*17];\
01251     uint8_t halfH[272];\
01252     copy_block17(full, src, 24, stride, 17);\
01253     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01254     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01255     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01256 }\
01257 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01258     uint8_t full[24*17];\
01259     uint8_t halfH[272];\
01260     uint8_t halfV[256];\
01261     uint8_t halfHV[256];\
01262     copy_block17(full, src, 24, stride, 17);\
01263     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01264     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01265     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01266     OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01267 }\
01268 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01269     uint8_t full[24*17];\
01270     uint8_t halfH[272];\
01271     copy_block17(full, src, 24, stride, 17);\
01272     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01273     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01274     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01275 }\
01276 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01277     uint8_t halfH[272];\
01278     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01279     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01280 }
01281 
01282 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01283 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
01284 #define op_put(a, b) a = cm[((b) + 16)>>5]
01285 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
01286 
01287 QPEL_MC(0, put_       , _       , op_put)
01288 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
01289 QPEL_MC(0, avg_       , _       , op_avg)
01290 //QPEL_MC(1, avg_no_rnd , _       , op_avg)
01291 #undef op_avg
01292 #undef op_avg_no_rnd
01293 #undef op_put
01294 #undef op_put_no_rnd
01295 
01296 #define put_qpel8_mc00_c  ff_put_pixels8x8_c
01297 #define avg_qpel8_mc00_c  ff_avg_pixels8x8_c
01298 #define put_qpel16_mc00_c ff_put_pixels16x16_c
01299 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
01300 #define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
01301 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
01302 
01303 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
01304     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01305     int i;
01306 
01307     for(i=0; i<h; i++){
01308         dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
01309         dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
01310         dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
01311         dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
01312         dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
01313         dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
01314         dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
01315         dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
01316         dst+=dstStride;
01317         src+=srcStride;
01318     }
01319 }
01320 
01321 #if CONFIG_RV40_DECODER
01322 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01323     put_pixels16_xy2_8_c(dst, src, stride, 16);
01324 }
01325 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01326     avg_pixels16_xy2_8_c(dst, src, stride, 16);
01327 }
01328 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01329     put_pixels8_xy2_8_c(dst, src, stride, 8);
01330 }
01331 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01332     avg_pixels8_xy2_8_c(dst, src, stride, 8);
01333 }
01334 #endif /* CONFIG_RV40_DECODER */
01335 
01336 #if CONFIG_DIRAC_DECODER
01337 #define DIRAC_MC(OPNAME)\
01338 void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01339 {\
01340      OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
01341 }\
01342 void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01343 {\
01344     OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
01345 }\
01346 void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01347 {\
01348     OPNAME ## _pixels16_8_c(dst   , src[0]   , stride, h);\
01349     OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
01350 }\
01351 void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01352 {\
01353     OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
01354 }\
01355 void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01356 {\
01357     OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
01358 }\
01359 void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01360 {\
01361     OPNAME ## _pixels16_l2_8(dst   , src[0]   , src[1]   , stride, stride, stride, h);\
01362     OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
01363 }\
01364 void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01365 {\
01366     OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
01367 }\
01368 void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01369 {\
01370     OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
01371 }\
01372 void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01373 {\
01374     OPNAME ## _pixels16_l4_8(dst   , src[0]   , src[1]   , src[2]   , src[3]   , stride, stride, stride, stride, stride, h);\
01375     OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
01376 }
01377 DIRAC_MC(put)
01378 DIRAC_MC(avg)
01379 #endif
01380 
01381 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
01382     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01383     int i;
01384 
01385     for(i=0; i<w; i++){
01386         const int src_1= src[ -srcStride];
01387         const int src0 = src[0          ];
01388         const int src1 = src[  srcStride];
01389         const int src2 = src[2*srcStride];
01390         const int src3 = src[3*srcStride];
01391         const int src4 = src[4*srcStride];
01392         const int src5 = src[5*srcStride];
01393         const int src6 = src[6*srcStride];
01394         const int src7 = src[7*srcStride];
01395         const int src8 = src[8*srcStride];
01396         const int src9 = src[9*srcStride];
01397         dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
01398         dst[1*dstStride]= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4];
01399         dst[2*dstStride]= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4];
01400         dst[3*dstStride]= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4];
01401         dst[4*dstStride]= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4];
01402         dst[5*dstStride]= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4];
01403         dst[6*dstStride]= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4];
01404         dst[7*dstStride]= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4];
01405         src++;
01406         dst++;
01407     }
01408 }
01409 
01410 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
01411     uint8_t half[64];
01412     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01413     put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
01414 }
01415 
01416 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
01417     wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
01418 }
01419 
01420 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
01421     uint8_t half[64];
01422     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01423     put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
01424 }
01425 
01426 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
01427     wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
01428 }
01429 
01430 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
01431     uint8_t halfH[88];
01432     uint8_t halfV[64];
01433     uint8_t halfHV[64];
01434     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01435     wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
01436     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01437     put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01438 }
01439 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
01440     uint8_t halfH[88];
01441     uint8_t halfV[64];
01442     uint8_t halfHV[64];
01443     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01444     wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
01445     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01446     put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01447 }
01448 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
01449     uint8_t halfH[88];
01450     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01451     wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
01452 }
01453 
01454 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
01455     if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01456     int x;
01457     const int strength= ff_h263_loop_filter_strength[qscale];
01458 
01459     for(x=0; x<8; x++){
01460         int d1, d2, ad1;
01461         int p0= src[x-2*stride];
01462         int p1= src[x-1*stride];
01463         int p2= src[x+0*stride];
01464         int p3= src[x+1*stride];
01465         int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01466 
01467         if     (d<-2*strength) d1= 0;
01468         else if(d<-  strength) d1=-2*strength - d;
01469         else if(d<   strength) d1= d;
01470         else if(d< 2*strength) d1= 2*strength - d;
01471         else                   d1= 0;
01472 
01473         p1 += d1;
01474         p2 -= d1;
01475         if(p1&256) p1= ~(p1>>31);
01476         if(p2&256) p2= ~(p2>>31);
01477 
01478         src[x-1*stride] = p1;
01479         src[x+0*stride] = p2;
01480 
01481         ad1= FFABS(d1)>>1;
01482 
01483         d2= av_clip((p0-p3)/4, -ad1, ad1);
01484 
01485         src[x-2*stride] = p0 - d2;
01486         src[x+  stride] = p3 + d2;
01487     }
01488     }
01489 }
01490 
01491 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
01492     if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01493     int y;
01494     const int strength= ff_h263_loop_filter_strength[qscale];
01495 
01496     for(y=0; y<8; y++){
01497         int d1, d2, ad1;
01498         int p0= src[y*stride-2];
01499         int p1= src[y*stride-1];
01500         int p2= src[y*stride+0];
01501         int p3= src[y*stride+1];
01502         int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01503 
01504         if     (d<-2*strength) d1= 0;
01505         else if(d<-  strength) d1=-2*strength - d;
01506         else if(d<   strength) d1= d;
01507         else if(d< 2*strength) d1= 2*strength - d;
01508         else                   d1= 0;
01509 
01510         p1 += d1;
01511         p2 -= d1;
01512         if(p1&256) p1= ~(p1>>31);
01513         if(p2&256) p2= ~(p2>>31);
01514 
01515         src[y*stride-1] = p1;
01516         src[y*stride+0] = p2;
01517 
01518         ad1= FFABS(d1)>>1;
01519 
01520         d2= av_clip((p0-p3)/4, -ad1, ad1);
01521 
01522         src[y*stride-2] = p0 - d2;
01523         src[y*stride+1] = p3 + d2;
01524     }
01525     }
01526 }
01527 
01528 static void h261_loop_filter_c(uint8_t *src, int stride){
01529     int x,y,xy,yz;
01530     int temp[64];
01531 
01532     for(x=0; x<8; x++){
01533         temp[x      ] = 4*src[x           ];
01534         temp[x + 7*8] = 4*src[x + 7*stride];
01535     }
01536     for(y=1; y<7; y++){
01537         for(x=0; x<8; x++){
01538             xy = y * stride + x;
01539             yz = y * 8 + x;
01540             temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
01541         }
01542     }
01543 
01544     for(y=0; y<8; y++){
01545         src[  y*stride] = (temp[  y*8] + 2)>>2;
01546         src[7+y*stride] = (temp[7+y*8] + 2)>>2;
01547         for(x=1; x<7; x++){
01548             xy = y * stride + x;
01549             yz = y * 8 + x;
01550             src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
01551         }
01552     }
01553 }
01554 
01555 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01556 {
01557     int s, i;
01558 
01559     s = 0;
01560     for(i=0;i<h;i++) {
01561         s += abs(pix1[0] - pix2[0]);
01562         s += abs(pix1[1] - pix2[1]);
01563         s += abs(pix1[2] - pix2[2]);
01564         s += abs(pix1[3] - pix2[3]);
01565         s += abs(pix1[4] - pix2[4]);
01566         s += abs(pix1[5] - pix2[5]);
01567         s += abs(pix1[6] - pix2[6]);
01568         s += abs(pix1[7] - pix2[7]);
01569         s += abs(pix1[8] - pix2[8]);
01570         s += abs(pix1[9] - pix2[9]);
01571         s += abs(pix1[10] - pix2[10]);
01572         s += abs(pix1[11] - pix2[11]);
01573         s += abs(pix1[12] - pix2[12]);
01574         s += abs(pix1[13] - pix2[13]);
01575         s += abs(pix1[14] - pix2[14]);
01576         s += abs(pix1[15] - pix2[15]);
01577         pix1 += line_size;
01578         pix2 += line_size;
01579     }
01580     return s;
01581 }
01582 
01583 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01584 {
01585     int s, i;
01586 
01587     s = 0;
01588     for(i=0;i<h;i++) {
01589         s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01590         s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01591         s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01592         s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01593         s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01594         s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01595         s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01596         s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01597         s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
01598         s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
01599         s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
01600         s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
01601         s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
01602         s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
01603         s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
01604         s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
01605         pix1 += line_size;
01606         pix2 += line_size;
01607     }
01608     return s;
01609 }
01610 
01611 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01612 {
01613     int s, i;
01614     uint8_t *pix3 = pix2 + line_size;
01615 
01616     s = 0;
01617     for(i=0;i<h;i++) {
01618         s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01619         s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01620         s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01621         s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01622         s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01623         s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01624         s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01625         s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01626         s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
01627         s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
01628         s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
01629         s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
01630         s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
01631         s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
01632         s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
01633         s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
01634         pix1 += line_size;
01635         pix2 += line_size;
01636         pix3 += line_size;
01637     }
01638     return s;
01639 }
01640 
01641 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01642 {
01643     int s, i;
01644     uint8_t *pix3 = pix2 + line_size;
01645 
01646     s = 0;
01647     for(i=0;i<h;i++) {
01648         s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01649         s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01650         s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01651         s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01652         s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01653         s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01654         s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01655         s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01656         s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
01657         s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
01658         s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
01659         s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
01660         s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
01661         s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
01662         s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
01663         s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
01664         pix1 += line_size;
01665         pix2 += line_size;
01666         pix3 += line_size;
01667     }
01668     return s;
01669 }
01670 
01671 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01672 {
01673     int s, i;
01674 
01675     s = 0;
01676     for(i=0;i<h;i++) {
01677         s += abs(pix1[0] - pix2[0]);
01678         s += abs(pix1[1] - pix2[1]);
01679         s += abs(pix1[2] - pix2[2]);
01680         s += abs(pix1[3] - pix2[3]);
01681         s += abs(pix1[4] - pix2[4]);
01682         s += abs(pix1[5] - pix2[5]);
01683         s += abs(pix1[6] - pix2[6]);
01684         s += abs(pix1[7] - pix2[7]);
01685         pix1 += line_size;
01686         pix2 += line_size;
01687     }
01688     return s;
01689 }
01690 
01691 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01692 {
01693     int s, i;
01694 
01695     s = 0;
01696     for(i=0;i<h;i++) {
01697         s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01698         s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01699         s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01700         s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01701         s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01702         s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01703         s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01704         s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01705         pix1 += line_size;
01706         pix2 += line_size;
01707     }
01708     return s;
01709 }
01710 
01711 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01712 {
01713     int s, i;
01714     uint8_t *pix3 = pix2 + line_size;
01715 
01716     s = 0;
01717     for(i=0;i<h;i++) {
01718         s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01719         s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01720         s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01721         s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01722         s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01723         s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01724         s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01725         s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01726         pix1 += line_size;
01727         pix2 += line_size;
01728         pix3 += line_size;
01729     }
01730     return s;
01731 }
01732 
01733 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01734 {
01735     int s, i;
01736     uint8_t *pix3 = pix2 + line_size;
01737 
01738     s = 0;
01739     for(i=0;i<h;i++) {
01740         s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01741         s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01742         s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01743         s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01744         s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01745         s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01746         s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01747         s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01748         pix1 += line_size;
01749         pix2 += line_size;
01750         pix3 += line_size;
01751     }
01752     return s;
01753 }
01754 
01755 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01756     MpegEncContext *c = v;
01757     int score1=0;
01758     int score2=0;
01759     int x,y;
01760 
01761     for(y=0; y<h; y++){
01762         for(x=0; x<16; x++){
01763             score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
01764         }
01765         if(y+1<h){
01766             for(x=0; x<15; x++){
01767                 score2+= FFABS(  s1[x  ] - s1[x  +stride]
01768                              - s1[x+1] + s1[x+1+stride])
01769                         -FFABS(  s2[x  ] - s2[x  +stride]
01770                              - s2[x+1] + s2[x+1+stride]);
01771             }
01772         }
01773         s1+= stride;
01774         s2+= stride;
01775     }
01776 
01777     if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01778     else  return score1 + FFABS(score2)*8;
01779 }
01780 
01781 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01782     MpegEncContext *c = v;
01783     int score1=0;
01784     int score2=0;
01785     int x,y;
01786 
01787     for(y=0; y<h; y++){
01788         for(x=0; x<8; x++){
01789             score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
01790         }
01791         if(y+1<h){
01792             for(x=0; x<7; x++){
01793                 score2+= FFABS(  s1[x  ] - s1[x  +stride]
01794                              - s1[x+1] + s1[x+1+stride])
01795                         -FFABS(  s2[x  ] - s2[x  +stride]
01796                              - s2[x+1] + s2[x+1+stride]);
01797             }
01798         }
01799         s1+= stride;
01800         s2+= stride;
01801     }
01802 
01803     if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01804     else  return score1 + FFABS(score2)*8;
01805 }
01806 
01807 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
01808     int i;
01809     unsigned int sum=0;
01810 
01811     for(i=0; i<8*8; i++){
01812         int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
01813         int w= weight[i];
01814         b>>= RECON_SHIFT;
01815         assert(-512<b && b<512);
01816 
01817         sum += (w*b)*(w*b)>>4;
01818     }
01819     return sum>>2;
01820 }
01821 
01822 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
01823     int i;
01824 
01825     for(i=0; i<8*8; i++){
01826         rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
01827     }
01828 }
01829 
01838 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
01839 {
01840     int i;
01841     DCTELEM temp[64];
01842 
01843     if(last<=0) return;
01844     //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
01845 
01846     for(i=0; i<=last; i++){
01847         const int j= scantable[i];
01848         temp[j]= block[j];
01849         block[j]=0;
01850     }
01851 
01852     for(i=0; i<=last; i++){
01853         const int j= scantable[i];
01854         const int perm_j= permutation[j];
01855         block[perm_j]= temp[j];
01856     }
01857 }
01858 
01859 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
01860     return 0;
01861 }
01862 
01863 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
01864     int i;
01865 
01866     memset(cmp, 0, sizeof(void*)*6);
01867 
01868     for(i=0; i<6; i++){
01869         switch(type&0xFF){
01870         case FF_CMP_SAD:
01871             cmp[i]= c->sad[i];
01872             break;
01873         case FF_CMP_SATD:
01874             cmp[i]= c->hadamard8_diff[i];
01875             break;
01876         case FF_CMP_SSE:
01877             cmp[i]= c->sse[i];
01878             break;
01879         case FF_CMP_DCT:
01880             cmp[i]= c->dct_sad[i];
01881             break;
01882         case FF_CMP_DCT264:
01883             cmp[i]= c->dct264_sad[i];
01884             break;
01885         case FF_CMP_DCTMAX:
01886             cmp[i]= c->dct_max[i];
01887             break;
01888         case FF_CMP_PSNR:
01889             cmp[i]= c->quant_psnr[i];
01890             break;
01891         case FF_CMP_BIT:
01892             cmp[i]= c->bit[i];
01893             break;
01894         case FF_CMP_RD:
01895             cmp[i]= c->rd[i];
01896             break;
01897         case FF_CMP_VSAD:
01898             cmp[i]= c->vsad[i];
01899             break;
01900         case FF_CMP_VSSE:
01901             cmp[i]= c->vsse[i];
01902             break;
01903         case FF_CMP_ZERO:
01904             cmp[i]= zero_cmp;
01905             break;
01906         case FF_CMP_NSSE:
01907             cmp[i]= c->nsse[i];
01908             break;
01909 #if CONFIG_DWT
01910         case FF_CMP_W53:
01911             cmp[i]= c->w53[i];
01912             break;
01913         case FF_CMP_W97:
01914             cmp[i]= c->w97[i];
01915             break;
01916 #endif
01917         default:
01918             av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
01919         }
01920     }
01921 }
01922 
01923 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
01924     long i;
01925     for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01926         long a = *(long*)(src+i);
01927         long b = *(long*)(dst+i);
01928         *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
01929     }
01930     for(; i<w; i++)
01931         dst[i+0] += src[i+0];
01932 }
01933 
01934 static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w){
01935     long i;
01936 #if !HAVE_FAST_UNALIGNED
01937     if((long)src2 & (sizeof(long)-1)){
01938         for(i=0; i+7<w; i+=8){
01939             dst[i+0] = src1[i+0]-src2[i+0];
01940             dst[i+1] = src1[i+1]-src2[i+1];
01941             dst[i+2] = src1[i+2]-src2[i+2];
01942             dst[i+3] = src1[i+3]-src2[i+3];
01943             dst[i+4] = src1[i+4]-src2[i+4];
01944             dst[i+5] = src1[i+5]-src2[i+5];
01945             dst[i+6] = src1[i+6]-src2[i+6];
01946             dst[i+7] = src1[i+7]-src2[i+7];
01947         }
01948     }else
01949 #endif
01950     for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01951         long a = *(long*)(src1+i);
01952         long b = *(long*)(src2+i);
01953         *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
01954     }
01955     for(; i<w; i++)
01956         dst[i+0] = src1[i+0]-src2[i+0];
01957 }
01958 
01959 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
01960     int i;
01961     uint8_t l, lt;
01962 
01963     l= *left;
01964     lt= *left_top;
01965 
01966     for(i=0; i<w; i++){
01967         l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
01968         lt= src1[i];
01969         dst[i]= l;
01970     }
01971 
01972     *left= l;
01973     *left_top= lt;
01974 }
01975 
01976 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
01977     int i;
01978     uint8_t l, lt;
01979 
01980     l= *left;
01981     lt= *left_top;
01982 
01983     for(i=0; i<w; i++){
01984         const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
01985         lt= src1[i];
01986         l= src2[i];
01987         dst[i]= l - pred;
01988     }
01989 
01990     *left= l;
01991     *left_top= lt;
01992 }
01993 
01994 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
01995     int i;
01996 
01997     for(i=0; i<w-1; i++){
01998         acc+= src[i];
01999         dst[i]= acc;
02000         i++;
02001         acc+= src[i];
02002         dst[i]= acc;
02003     }
02004 
02005     for(; i<w; i++){
02006         acc+= src[i];
02007         dst[i]= acc;
02008     }
02009 
02010     return acc;
02011 }
02012 
02013 #if HAVE_BIGENDIAN
02014 #define B 3
02015 #define G 2
02016 #define R 1
02017 #define A 0
02018 #else
02019 #define B 0
02020 #define G 1
02021 #define R 2
02022 #define A 3
02023 #endif
02024 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
02025     int i;
02026     int r,g,b,a;
02027     r= *red;
02028     g= *green;
02029     b= *blue;
02030     a= *alpha;
02031 
02032     for(i=0; i<w; i++){
02033         b+= src[4*i+B];
02034         g+= src[4*i+G];
02035         r+= src[4*i+R];
02036         a+= src[4*i+A];
02037 
02038         dst[4*i+B]= b;
02039         dst[4*i+G]= g;
02040         dst[4*i+R]= r;
02041         dst[4*i+A]= a;
02042     }
02043 
02044     *red= r;
02045     *green= g;
02046     *blue= b;
02047     *alpha= a;
02048 }
02049 #undef B
02050 #undef G
02051 #undef R
02052 #undef A
02053 
02054 #define BUTTERFLY2(o1,o2,i1,i2) \
02055 o1= (i1)+(i2);\
02056 o2= (i1)-(i2);
02057 
02058 #define BUTTERFLY1(x,y) \
02059 {\
02060     int a,b;\
02061     a= x;\
02062     b= y;\
02063     x= a+b;\
02064     y= a-b;\
02065 }
02066 
02067 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
02068 
02069 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
02070     int i;
02071     int temp[64];
02072     int sum=0;
02073 
02074     av_assert2(h==8);
02075 
02076     for(i=0; i<8; i++){
02077         //FIXME try pointer walks
02078         BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
02079         BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
02080         BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
02081         BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
02082 
02083         BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02084         BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02085         BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02086         BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02087 
02088         BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02089         BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02090         BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02091         BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02092     }
02093 
02094     for(i=0; i<8; i++){
02095         BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02096         BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02097         BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02098         BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02099 
02100         BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02101         BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02102         BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02103         BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02104 
02105         sum +=
02106              BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02107             +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02108             +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02109             +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02110     }
02111     return sum;
02112 }
02113 
02114 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
02115     int i;
02116     int temp[64];
02117     int sum=0;
02118 
02119     av_assert2(h==8);
02120 
02121     for(i=0; i<8; i++){
02122         //FIXME try pointer walks
02123         BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
02124         BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
02125         BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
02126         BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
02127 
02128         BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02129         BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02130         BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02131         BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02132 
02133         BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02134         BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02135         BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02136         BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02137     }
02138 
02139     for(i=0; i<8; i++){
02140         BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02141         BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02142         BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02143         BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02144 
02145         BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02146         BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02147         BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02148         BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02149 
02150         sum +=
02151              BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02152             +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02153             +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02154             +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02155     }
02156 
02157     sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
02158 
02159     return sum;
02160 }
02161 
02162 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02163     MpegEncContext * const s= (MpegEncContext *)c;
02164     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02165 
02166     av_assert2(h==8);
02167 
02168     s->dsp.diff_pixels(temp, src1, src2, stride);
02169     s->dsp.fdct(temp);
02170     return s->dsp.sum_abs_dctelem(temp);
02171 }
02172 
02173 #if CONFIG_GPL
02174 #define DCT8_1D {\
02175     const int s07 = SRC(0) + SRC(7);\
02176     const int s16 = SRC(1) + SRC(6);\
02177     const int s25 = SRC(2) + SRC(5);\
02178     const int s34 = SRC(3) + SRC(4);\
02179     const int a0 = s07 + s34;\
02180     const int a1 = s16 + s25;\
02181     const int a2 = s07 - s34;\
02182     const int a3 = s16 - s25;\
02183     const int d07 = SRC(0) - SRC(7);\
02184     const int d16 = SRC(1) - SRC(6);\
02185     const int d25 = SRC(2) - SRC(5);\
02186     const int d34 = SRC(3) - SRC(4);\
02187     const int a4 = d16 + d25 + (d07 + (d07>>1));\
02188     const int a5 = d07 - d34 - (d25 + (d25>>1));\
02189     const int a6 = d07 + d34 - (d16 + (d16>>1));\
02190     const int a7 = d16 - d25 + (d34 + (d34>>1));\
02191     DST(0,  a0 + a1     ) ;\
02192     DST(1,  a4 + (a7>>2)) ;\
02193     DST(2,  a2 + (a3>>1)) ;\
02194     DST(3,  a5 + (a6>>2)) ;\
02195     DST(4,  a0 - a1     ) ;\
02196     DST(5,  a6 - (a5>>2)) ;\
02197     DST(6, (a2>>1) - a3 ) ;\
02198     DST(7, (a4>>2) - a7 ) ;\
02199 }
02200 
02201 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02202     MpegEncContext * const s= (MpegEncContext *)c;
02203     DCTELEM dct[8][8];
02204     int i;
02205     int sum=0;
02206 
02207     s->dsp.diff_pixels(dct[0], src1, src2, stride);
02208 
02209 #define SRC(x) dct[i][x]
02210 #define DST(x,v) dct[i][x]= v
02211     for( i = 0; i < 8; i++ )
02212         DCT8_1D
02213 #undef SRC
02214 #undef DST
02215 
02216 #define SRC(x) dct[x][i]
02217 #define DST(x,v) sum += FFABS(v)
02218     for( i = 0; i < 8; i++ )
02219         DCT8_1D
02220 #undef SRC
02221 #undef DST
02222     return sum;
02223 }
02224 #endif
02225 
02226 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02227     MpegEncContext * const s= (MpegEncContext *)c;
02228     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02229     int sum=0, i;
02230 
02231     av_assert2(h==8);
02232 
02233     s->dsp.diff_pixels(temp, src1, src2, stride);
02234     s->dsp.fdct(temp);
02235 
02236     for(i=0; i<64; i++)
02237         sum= FFMAX(sum, FFABS(temp[i]));
02238 
02239     return sum;
02240 }
02241 
02242 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02243     MpegEncContext * const s= (MpegEncContext *)c;
02244     LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
02245     DCTELEM * const bak = temp+64;
02246     int sum=0, i;
02247 
02248     av_assert2(h==8);
02249     s->mb_intra=0;
02250 
02251     s->dsp.diff_pixels(temp, src1, src2, stride);
02252 
02253     memcpy(bak, temp, 64*sizeof(DCTELEM));
02254 
02255     s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02256     s->dct_unquantize_inter(s, temp, 0, s->qscale);
02257     ff_simple_idct_8(temp); //FIXME
02258 
02259     for(i=0; i<64; i++)
02260         sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
02261 
02262     return sum;
02263 }
02264 
02265 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02266     MpegEncContext * const s= (MpegEncContext *)c;
02267     const uint8_t *scantable= s->intra_scantable.permutated;
02268     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02269     LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
02270     LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
02271     int i, last, run, bits, level, distortion, start_i;
02272     const int esc_length= s->ac_esc_length;
02273     uint8_t * length;
02274     uint8_t * last_length;
02275 
02276     av_assert2(h==8);
02277 
02278     copy_block8(lsrc1, src1, 8, stride, 8);
02279     copy_block8(lsrc2, src2, 8, stride, 8);
02280 
02281     s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
02282 
02283     s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02284 
02285     bits=0;
02286 
02287     if (s->mb_intra) {
02288         start_i = 1;
02289         length     = s->intra_ac_vlc_length;
02290         last_length= s->intra_ac_vlc_last_length;
02291         bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
02292     } else {
02293         start_i = 0;
02294         length     = s->inter_ac_vlc_length;
02295         last_length= s->inter_ac_vlc_last_length;
02296     }
02297 
02298     if(last>=start_i){
02299         run=0;
02300         for(i=start_i; i<last; i++){
02301             int j= scantable[i];
02302             level= temp[j];
02303 
02304             if(level){
02305                 level+=64;
02306                 if((level&(~127)) == 0){
02307                     bits+= length[UNI_AC_ENC_INDEX(run, level)];
02308                 }else
02309                     bits+= esc_length;
02310                 run=0;
02311             }else
02312                 run++;
02313         }
02314         i= scantable[last];
02315 
02316         level= temp[i] + 64;
02317 
02318         assert(level - 64);
02319 
02320         if((level&(~127)) == 0){
02321             bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02322         }else
02323             bits+= esc_length;
02324 
02325     }
02326 
02327     if(last>=0){
02328         if(s->mb_intra)
02329             s->dct_unquantize_intra(s, temp, 0, s->qscale);
02330         else
02331             s->dct_unquantize_inter(s, temp, 0, s->qscale);
02332     }
02333 
02334     s->dsp.idct_add(lsrc2, 8, temp);
02335 
02336     distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
02337 
02338     return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
02339 }
02340 
02341 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02342     MpegEncContext * const s= (MpegEncContext *)c;
02343     const uint8_t *scantable= s->intra_scantable.permutated;
02344     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02345     int i, last, run, bits, level, start_i;
02346     const int esc_length= s->ac_esc_length;
02347     uint8_t * length;
02348     uint8_t * last_length;
02349 
02350     av_assert2(h==8);
02351 
02352     s->dsp.diff_pixels(temp, src1, src2, stride);
02353 
02354     s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02355 
02356     bits=0;
02357 
02358     if (s->mb_intra) {
02359         start_i = 1;
02360         length     = s->intra_ac_vlc_length;
02361         last_length= s->intra_ac_vlc_last_length;
02362         bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
02363     } else {
02364         start_i = 0;
02365         length     = s->inter_ac_vlc_length;
02366         last_length= s->inter_ac_vlc_last_length;
02367     }
02368 
02369     if(last>=start_i){
02370         run=0;
02371         for(i=start_i; i<last; i++){
02372             int j= scantable[i];
02373             level= temp[j];
02374 
02375             if(level){
02376                 level+=64;
02377                 if((level&(~127)) == 0){
02378                     bits+= length[UNI_AC_ENC_INDEX(run, level)];
02379                 }else
02380                     bits+= esc_length;
02381                 run=0;
02382             }else
02383                 run++;
02384         }
02385         i= scantable[last];
02386 
02387         level= temp[i] + 64;
02388 
02389         assert(level - 64);
02390 
02391         if((level&(~127)) == 0){
02392             bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02393         }else
02394             bits+= esc_length;
02395     }
02396 
02397     return bits;
02398 }
02399 
02400 #define VSAD_INTRA(size) \
02401 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02402     int score=0;                                                                                            \
02403     int x,y;                                                                                                \
02404                                                                                                             \
02405     for(y=1; y<h; y++){                                                                                     \
02406         for(x=0; x<size; x+=4){                                                                             \
02407             score+= FFABS(s[x  ] - s[x  +stride]) + FFABS(s[x+1] - s[x+1+stride])                           \
02408                    +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]);                          \
02409         }                                                                                                   \
02410         s+= stride;                                                                                         \
02411     }                                                                                                       \
02412                                                                                                             \
02413     return score;                                                                                           \
02414 }
02415 VSAD_INTRA(8)
02416 VSAD_INTRA(16)
02417 
02418 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02419     int score=0;
02420     int x,y;
02421 
02422     for(y=1; y<h; y++){
02423         for(x=0; x<16; x++){
02424             score+= FFABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
02425         }
02426         s1+= stride;
02427         s2+= stride;
02428     }
02429 
02430     return score;
02431 }
02432 
02433 #define SQ(a) ((a)*(a))
02434 #define VSSE_INTRA(size) \
02435 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02436     int score=0;                                                                                            \
02437     int x,y;                                                                                                \
02438                                                                                                             \
02439     for(y=1; y<h; y++){                                                                                     \
02440         for(x=0; x<size; x+=4){                                                                               \
02441             score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])                                 \
02442                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);                                \
02443         }                                                                                                   \
02444         s+= stride;                                                                                         \
02445     }                                                                                                       \
02446                                                                                                             \
02447     return score;                                                                                           \
02448 }
02449 VSSE_INTRA(8)
02450 VSSE_INTRA(16)
02451 
02452 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02453     int score=0;
02454     int x,y;
02455 
02456     for(y=1; y<h; y++){
02457         for(x=0; x<16; x++){
02458             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
02459         }
02460         s1+= stride;
02461         s2+= stride;
02462     }
02463 
02464     return score;
02465 }
02466 
02467 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
02468                                int size){
02469     int score=0;
02470     int i;
02471     for(i=0; i<size; i++)
02472         score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
02473     return score;
02474 }
02475 
02476 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
02477 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
02478 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
02479 #if CONFIG_GPL
02480 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
02481 #endif
02482 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
02483 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
02484 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
02485 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
02486 
02487 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
02488     int i;
02489     src1 += len-1;
02490     for(i=0; i<len; i++)
02491         dst[i] = src0[i] * src1[-i];
02492 }
02493 
02494 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
02495     int i;
02496     for(i=0; i<len; i++)
02497         dst[i] = src0[i] * src1[i] + src2[i];
02498 }
02499 
02500 static void vector_fmul_window_c(float *dst, const float *src0,
02501                                  const float *src1, const float *win, int len)
02502 {
02503     int i,j;
02504     dst += len;
02505     win += len;
02506     src0+= len;
02507     for(i=-len, j=len-1; i<0; i++, j--) {
02508         float s0 = src0[i];
02509         float s1 = src1[j];
02510         float wi = win[i];
02511         float wj = win[j];
02512         dst[i] = s0*wj - s1*wi;
02513         dst[j] = s0*wi + s1*wj;
02514     }
02515 }
02516 
02517 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
02518                                  int len)
02519 {
02520     int i;
02521     for (i = 0; i < len; i++)
02522         dst[i] = src[i] * mul;
02523 }
02524 
02525 static void butterflies_float_c(float *av_restrict v1, float *av_restrict v2,
02526                                 int len)
02527 {
02528     int i;
02529     for (i = 0; i < len; i++) {
02530         float t = v1[i] - v2[i];
02531         v1[i] += v2[i];
02532         v2[i] = t;
02533     }
02534 }
02535 
02536 static void butterflies_float_interleave_c(float *dst, const float *src0,
02537                                            const float *src1, int len)
02538 {
02539     int i;
02540     for (i = 0; i < len; i++) {
02541         float f1 = src0[i];
02542         float f2 = src1[i];
02543         dst[2*i    ] = f1 + f2;
02544         dst[2*i + 1] = f1 - f2;
02545     }
02546 }
02547 
02548 float ff_scalarproduct_float_c(const float *v1, const float *v2, int len)
02549 {
02550     float p = 0.0;
02551     int i;
02552 
02553     for (i = 0; i < len; i++)
02554         p += v1[i] * v2[i];
02555 
02556     return p;
02557 }
02558 
02559 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
02560                    uint32_t maxi, uint32_t maxisign)
02561 {
02562 
02563     if(a > mini) return mini;
02564     else if((a^(1U<<31)) > maxisign) return maxi;
02565     else return a;
02566 }
02567 
02568 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
02569     int i;
02570     uint32_t mini = *(uint32_t*)min;
02571     uint32_t maxi = *(uint32_t*)max;
02572     uint32_t maxisign = maxi ^ (1U<<31);
02573     uint32_t *dsti = (uint32_t*)dst;
02574     const uint32_t *srci = (const uint32_t*)src;
02575     for(i=0; i<len; i+=8) {
02576         dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
02577         dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
02578         dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
02579         dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
02580         dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
02581         dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
02582         dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
02583         dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
02584     }
02585 }
02586 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
02587     int i;
02588     if(min < 0 && max > 0) {
02589         vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
02590     } else {
02591         for(i=0; i < len; i+=8) {
02592             dst[i    ] = av_clipf(src[i    ], min, max);
02593             dst[i + 1] = av_clipf(src[i + 1], min, max);
02594             dst[i + 2] = av_clipf(src[i + 2], min, max);
02595             dst[i + 3] = av_clipf(src[i + 3], min, max);
02596             dst[i + 4] = av_clipf(src[i + 4], min, max);
02597             dst[i + 5] = av_clipf(src[i + 5], min, max);
02598             dst[i + 6] = av_clipf(src[i + 6], min, max);
02599             dst[i + 7] = av_clipf(src[i + 7], min, max);
02600         }
02601     }
02602 }
02603 
02604 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
02605 {
02606     int res = 0;
02607 
02608     while (order--)
02609         res += *v1++ * *v2++;
02610 
02611     return res;
02612 }
02613 
02614 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
02615 {
02616     int res = 0;
02617     while (order--) {
02618         res   += *v1 * *v2++;
02619         *v1++ += mul * *v3++;
02620     }
02621     return res;
02622 }
02623 
02624 static void apply_window_int16_c(int16_t *output, const int16_t *input,
02625                                  const int16_t *window, unsigned int len)
02626 {
02627     int i;
02628     int len2 = len >> 1;
02629 
02630     for (i = 0; i < len2; i++) {
02631         int16_t w       = window[i];
02632         output[i]       = (MUL16(input[i],       w) + (1 << 14)) >> 15;
02633         output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
02634     }
02635 }
02636 
02637 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
02638                                 int32_t max, unsigned int len)
02639 {
02640     do {
02641         *dst++ = av_clip(*src++, min, max);
02642         *dst++ = av_clip(*src++, min, max);
02643         *dst++ = av_clip(*src++, min, max);
02644         *dst++ = av_clip(*src++, min, max);
02645         *dst++ = av_clip(*src++, min, max);
02646         *dst++ = av_clip(*src++, min, max);
02647         *dst++ = av_clip(*src++, min, max);
02648         *dst++ = av_clip(*src++, min, max);
02649         len -= 8;
02650     } while (len > 0);
02651 }
02652 
02653 #define W0 2048
02654 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
02655 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
02656 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
02657 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
02658 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
02659 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
02660 #define W7 565  /* 2048*sqrt (2)*cos (7*pi/16) */
02661 
02662 static void wmv2_idct_row(short * b)
02663 {
02664     int s1,s2;
02665     int a0,a1,a2,a3,a4,a5,a6,a7;
02666     /*step 1*/
02667     a1 = W1*b[1]+W7*b[7];
02668     a7 = W7*b[1]-W1*b[7];
02669     a5 = W5*b[5]+W3*b[3];
02670     a3 = W3*b[5]-W5*b[3];
02671     a2 = W2*b[2]+W6*b[6];
02672     a6 = W6*b[2]-W2*b[6];
02673     a0 = W0*b[0]+W0*b[4];
02674     a4 = W0*b[0]-W0*b[4];
02675     /*step 2*/
02676     s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
02677     s2 = (181*(a1-a5-a7+a3)+128)>>8;
02678     /*step 3*/
02679     b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
02680     b[1] = (a4+a6 +s1   + (1<<7))>>8;
02681     b[2] = (a4-a6 +s2   + (1<<7))>>8;
02682     b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
02683     b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
02684     b[5] = (a4-a6 -s2   + (1<<7))>>8;
02685     b[6] = (a4+a6 -s1   + (1<<7))>>8;
02686     b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
02687 }
02688 static void wmv2_idct_col(short * b)
02689 {
02690     int s1,s2;
02691     int a0,a1,a2,a3,a4,a5,a6,a7;
02692     /*step 1, with extended precision*/
02693     a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
02694     a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
02695     a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
02696     a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
02697     a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
02698     a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
02699     a0 = (W0*b[8*0]+W0*b[8*4]    )>>3;
02700     a4 = (W0*b[8*0]-W0*b[8*4]    )>>3;
02701     /*step 2*/
02702     s1 = (181*(a1-a5+a7-a3)+128)>>8;
02703     s2 = (181*(a1-a5-a7+a3)+128)>>8;
02704     /*step 3*/
02705     b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
02706     b[8*1] = (a4+a6 +s1   + (1<<13))>>14;
02707     b[8*2] = (a4-a6 +s2   + (1<<13))>>14;
02708     b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
02709 
02710     b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
02711     b[8*5] = (a4-a6 -s2   + (1<<13))>>14;
02712     b[8*6] = (a4+a6 -s1   + (1<<13))>>14;
02713     b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
02714 }
02715 void ff_wmv2_idct_c(short * block){
02716     int i;
02717 
02718     for(i=0;i<64;i+=8){
02719         wmv2_idct_row(block+i);
02720     }
02721     for(i=0;i<8;i++){
02722         wmv2_idct_col(block+i);
02723     }
02724 }
02725 /* XXX: those functions should be suppressed ASAP when all IDCTs are
02726  converted */
02727 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
02728 {
02729     ff_wmv2_idct_c(block);
02730     put_pixels_clamped_c(block, dest, line_size);
02731 }
02732 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
02733 {
02734     ff_wmv2_idct_c(block);
02735     add_pixels_clamped_c(block, dest, line_size);
02736 }
02737 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
02738 {
02739     ff_j_rev_dct (block);
02740     put_pixels_clamped_c(block, dest, line_size);
02741 }
02742 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
02743 {
02744     ff_j_rev_dct (block);
02745     add_pixels_clamped_c(block, dest, line_size);
02746 }
02747 
02748 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
02749 {
02750     ff_j_rev_dct4 (block);
02751     put_pixels_clamped4_c(block, dest, line_size);
02752 }
02753 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
02754 {
02755     ff_j_rev_dct4 (block);
02756     add_pixels_clamped4_c(block, dest, line_size);
02757 }
02758 
02759 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
02760 {
02761     ff_j_rev_dct2 (block);
02762     put_pixels_clamped2_c(block, dest, line_size);
02763 }
02764 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
02765 {
02766     ff_j_rev_dct2 (block);
02767     add_pixels_clamped2_c(block, dest, line_size);
02768 }
02769 
02770 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
02771 {
02772     dest[0] = av_clip_uint8((block[0] + 4)>>3);
02773 }
02774 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
02775 {
02776     dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
02777 }
02778 
02779 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
02780 
02781 /* init static data */
02782 av_cold void ff_dsputil_static_init(void)
02783 {
02784     int i;
02785 
02786     for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
02787     for(i=0;i<MAX_NEG_CROP;i++) {
02788         ff_cropTbl[i] = 0;
02789         ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
02790     }
02791 
02792     for(i=0;i<512;i++) {
02793         ff_squareTbl[i] = (i - 256) * (i - 256);
02794     }
02795 
02796     for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
02797 }
02798 
02799 int ff_check_alignment(void){
02800     static int did_fail=0;
02801     LOCAL_ALIGNED_16(int, aligned, [4]);
02802 
02803     if((intptr_t)aligned & 15){
02804         if(!did_fail){
02805 #if HAVE_MMX || HAVE_ALTIVEC
02806             av_log(NULL, AV_LOG_ERROR,
02807                 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
02808                 "and may be very slow or crash. This is not a bug in libavcodec,\n"
02809                 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
02810                 "Do not report crashes to FFmpeg developers.\n");
02811 #endif
02812             did_fail=1;
02813         }
02814         return -1;
02815     }
02816     return 0;
02817 }
02818 
02819 av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
02820 {
02821     int i, j;
02822 
02823     ff_check_alignment();
02824 
02825 #if CONFIG_ENCODERS
02826     if (avctx->bits_per_raw_sample == 10) {
02827         c->fdct    = ff_jpeg_fdct_islow_10;
02828         c->fdct248 = ff_fdct248_islow_10;
02829     } else {
02830         if(avctx->dct_algo==FF_DCT_FASTINT) {
02831             c->fdct    = ff_fdct_ifast;
02832             c->fdct248 = ff_fdct_ifast248;
02833         }
02834         else if(avctx->dct_algo==FF_DCT_FAAN) {
02835             c->fdct    = ff_faandct;
02836             c->fdct248 = ff_faandct248;
02837         }
02838         else {
02839             c->fdct    = ff_jpeg_fdct_islow_8; //slow/accurate/default
02840             c->fdct248 = ff_fdct248_islow_8;
02841         }
02842     }
02843 #endif //CONFIG_ENCODERS
02844 
02845     if(avctx->lowres==1){
02846         c->idct_put= ff_jref_idct4_put;
02847         c->idct_add= ff_jref_idct4_add;
02848         c->idct    = ff_j_rev_dct4;
02849         c->idct_permutation_type= FF_NO_IDCT_PERM;
02850     }else if(avctx->lowres==2){
02851         c->idct_put= ff_jref_idct2_put;
02852         c->idct_add= ff_jref_idct2_add;
02853         c->idct    = ff_j_rev_dct2;
02854         c->idct_permutation_type= FF_NO_IDCT_PERM;
02855     }else if(avctx->lowres==3){
02856         c->idct_put= ff_jref_idct1_put;
02857         c->idct_add= ff_jref_idct1_add;
02858         c->idct    = ff_j_rev_dct1;
02859         c->idct_permutation_type= FF_NO_IDCT_PERM;
02860     }else{
02861         if (avctx->bits_per_raw_sample == 10) {
02862             c->idct_put              = ff_simple_idct_put_10;
02863             c->idct_add              = ff_simple_idct_add_10;
02864             c->idct                  = ff_simple_idct_10;
02865             c->idct_permutation_type = FF_NO_IDCT_PERM;
02866         } else {
02867         if(avctx->idct_algo==FF_IDCT_INT){
02868             c->idct_put= ff_jref_idct_put;
02869             c->idct_add= ff_jref_idct_add;
02870             c->idct    = ff_j_rev_dct;
02871             c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
02872         }else if(avctx->idct_algo==FF_IDCT_WMV2){
02873             c->idct_put= ff_wmv2_idct_put_c;
02874             c->idct_add= ff_wmv2_idct_add_c;
02875             c->idct    = ff_wmv2_idct_c;
02876             c->idct_permutation_type= FF_NO_IDCT_PERM;
02877         }else if(avctx->idct_algo==FF_IDCT_FAAN){
02878             c->idct_put= ff_faanidct_put;
02879             c->idct_add= ff_faanidct_add;
02880             c->idct    = ff_faanidct;
02881             c->idct_permutation_type= FF_NO_IDCT_PERM;
02882         }else{ //accurate/default
02883             c->idct_put = ff_simple_idct_put_8;
02884             c->idct_add = ff_simple_idct_add_8;
02885             c->idct     = ff_simple_idct_8;
02886             c->idct_permutation_type= FF_NO_IDCT_PERM;
02887         }
02888         }
02889     }
02890 
02891     c->diff_pixels = diff_pixels_c;
02892     c->put_pixels_clamped = put_pixels_clamped_c;
02893     c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
02894     c->add_pixels_clamped = add_pixels_clamped_c;
02895     c->sum_abs_dctelem = sum_abs_dctelem_c;
02896     c->gmc1 = gmc1_c;
02897     c->gmc = ff_gmc_c;
02898     c->pix_sum = pix_sum_c;
02899     c->pix_norm1 = pix_norm1_c;
02900 
02901     c->fill_block_tab[0] = fill_block16_c;
02902     c->fill_block_tab[1] = fill_block8_c;
02903 
02904     /* TODO [0] 16  [1] 8 */
02905     c->pix_abs[0][0] = pix_abs16_c;
02906     c->pix_abs[0][1] = pix_abs16_x2_c;
02907     c->pix_abs[0][2] = pix_abs16_y2_c;
02908     c->pix_abs[0][3] = pix_abs16_xy2_c;
02909     c->pix_abs[1][0] = pix_abs8_c;
02910     c->pix_abs[1][1] = pix_abs8_x2_c;
02911     c->pix_abs[1][2] = pix_abs8_y2_c;
02912     c->pix_abs[1][3] = pix_abs8_xy2_c;
02913 
02914     c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
02915     c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
02916     c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
02917     c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
02918     c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
02919     c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
02920     c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
02921     c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
02922     c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
02923 
02924     c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
02925     c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
02926     c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
02927     c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
02928     c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
02929     c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
02930     c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
02931     c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
02932     c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
02933 
02934 #define dspfunc(PFX, IDX, NUM) \
02935     c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
02936     c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
02937     c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
02938     c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
02939     c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
02940     c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
02941     c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
02942     c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
02943     c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
02944     c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
02945     c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
02946     c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
02947     c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
02948     c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
02949     c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
02950     c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
02951 
02952     dspfunc(put_qpel, 0, 16);
02953     dspfunc(put_no_rnd_qpel, 0, 16);
02954 
02955     dspfunc(avg_qpel, 0, 16);
02956     /* dspfunc(avg_no_rnd_qpel, 0, 16); */
02957 
02958     dspfunc(put_qpel, 1, 8);
02959     dspfunc(put_no_rnd_qpel, 1, 8);
02960 
02961     dspfunc(avg_qpel, 1, 8);
02962     /* dspfunc(avg_no_rnd_qpel, 1, 8); */
02963 
02964 #undef dspfunc
02965 
02966 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
02967     ff_mlp_init(c, avctx);
02968 #endif
02969 
02970     c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
02971     c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
02972     c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
02973     c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
02974     c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
02975     c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
02976     c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
02977     c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
02978 
02979 #define SET_CMP_FUNC(name) \
02980     c->name[0]= name ## 16_c;\
02981     c->name[1]= name ## 8x8_c;
02982 
02983     SET_CMP_FUNC(hadamard8_diff)
02984     c->hadamard8_diff[4]= hadamard8_intra16_c;
02985     c->hadamard8_diff[5]= hadamard8_intra8x8_c;
02986     SET_CMP_FUNC(dct_sad)
02987     SET_CMP_FUNC(dct_max)
02988 #if CONFIG_GPL
02989     SET_CMP_FUNC(dct264_sad)
02990 #endif
02991     c->sad[0]= pix_abs16_c;
02992     c->sad[1]= pix_abs8_c;
02993     c->sse[0]= sse16_c;
02994     c->sse[1]= sse8_c;
02995     c->sse[2]= sse4_c;
02996     SET_CMP_FUNC(quant_psnr)
02997     SET_CMP_FUNC(rd)
02998     SET_CMP_FUNC(bit)
02999     c->vsad[0]= vsad16_c;
03000     c->vsad[4]= vsad_intra16_c;
03001     c->vsad[5]= vsad_intra8_c;
03002     c->vsse[0]= vsse16_c;
03003     c->vsse[4]= vsse_intra16_c;
03004     c->vsse[5]= vsse_intra8_c;
03005     c->nsse[0]= nsse16_c;
03006     c->nsse[1]= nsse8_c;
03007 #if CONFIG_DWT
03008     ff_dsputil_init_dwt(c);
03009 #endif
03010 
03011     c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
03012 
03013     c->add_bytes= add_bytes_c;
03014     c->diff_bytes= diff_bytes_c;
03015     c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
03016     c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
03017     c->add_hfyu_left_prediction  = add_hfyu_left_prediction_c;
03018     c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
03019     c->bswap_buf= bswap_buf;
03020     c->bswap16_buf = bswap16_buf;
03021 
03022     if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
03023         c->h263_h_loop_filter= h263_h_loop_filter_c;
03024         c->h263_v_loop_filter= h263_v_loop_filter_c;
03025     }
03026 
03027     c->h261_loop_filter= h261_loop_filter_c;
03028 
03029     c->try_8x8basis= try_8x8basis_c;
03030     c->add_8x8basis= add_8x8basis_c;
03031 
03032 #if CONFIG_VORBIS_DECODER
03033     c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling;
03034 #endif
03035     c->vector_fmul_reverse = vector_fmul_reverse_c;
03036     c->vector_fmul_add = vector_fmul_add_c;
03037     c->vector_fmul_window = vector_fmul_window_c;
03038     c->vector_clipf = vector_clipf_c;
03039     c->scalarproduct_int16 = scalarproduct_int16_c;
03040     c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
03041     c->apply_window_int16 = apply_window_int16_c;
03042     c->vector_clip_int32 = vector_clip_int32_c;
03043     c->scalarproduct_float = ff_scalarproduct_float_c;
03044     c->butterflies_float = butterflies_float_c;
03045     c->butterflies_float_interleave = butterflies_float_interleave_c;
03046     c->vector_fmul_scalar = vector_fmul_scalar_c;
03047 
03048     c->shrink[0]= av_image_copy_plane;
03049     c->shrink[1]= ff_shrink22;
03050     c->shrink[2]= ff_shrink44;
03051     c->shrink[3]= ff_shrink88;
03052 
03053     c->prefetch= just_return;
03054 
03055     memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
03056     memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
03057 
03058 #undef FUNC
03059 #undef FUNCC
03060 #define FUNC(f, depth) f ## _ ## depth
03061 #define FUNCC(f, depth) f ## _ ## depth ## _c
03062 
03063 #define dspfunc1(PFX, IDX, NUM, depth)\
03064     c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM        , depth);\
03065     c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
03066     c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
03067     c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
03068 
03069 #define dspfunc2(PFX, IDX, NUM, depth)\
03070     c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
03071     c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
03072     c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
03073     c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
03074     c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
03075     c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
03076     c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
03077     c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
03078     c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
03079     c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
03080     c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
03081     c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
03082     c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
03083     c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
03084     c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
03085     c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
03086 
03087 
03088 #define BIT_DEPTH_FUNCS(depth, dct)\
03089     c->get_pixels                    = FUNCC(get_pixels   ## dct   , depth);\
03090     c->draw_edges                    = FUNCC(draw_edges            , depth);\
03091     c->emulated_edge_mc              = FUNC (ff_emulated_edge_mc   , depth);\
03092     c->clear_block                   = FUNCC(clear_block  ## dct   , depth);\
03093     c->clear_blocks                  = FUNCC(clear_blocks ## dct   , depth);\
03094     c->add_pixels8                   = FUNCC(add_pixels8  ## dct   , depth);\
03095     c->add_pixels4                   = FUNCC(add_pixels4  ## dct   , depth);\
03096     c->put_no_rnd_pixels_l2[0]       = FUNCC(put_no_rnd_pixels16_l2, depth);\
03097     c->put_no_rnd_pixels_l2[1]       = FUNCC(put_no_rnd_pixels8_l2 , depth);\
03098 \
03099     c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8   , depth);\
03100     c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4   , depth);\
03101     c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2   , depth);\
03102     c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8   , depth);\
03103     c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4   , depth);\
03104     c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2   , depth);\
03105 \
03106     dspfunc1(put       , 0, 16, depth);\
03107     dspfunc1(put       , 1,  8, depth);\
03108     dspfunc1(put       , 2,  4, depth);\
03109     dspfunc1(put       , 3,  2, depth);\
03110     dspfunc1(put_no_rnd, 0, 16, depth);\
03111     dspfunc1(put_no_rnd, 1,  8, depth);\
03112     dspfunc1(avg       , 0, 16, depth);\
03113     dspfunc1(avg       , 1,  8, depth);\
03114     dspfunc1(avg       , 2,  4, depth);\
03115     dspfunc1(avg       , 3,  2, depth);\
03116     dspfunc1(avg_no_rnd, 0, 16, depth);\
03117     dspfunc1(avg_no_rnd, 1,  8, depth);\
03118 \
03119     dspfunc2(put_h264_qpel, 0, 16, depth);\
03120     dspfunc2(put_h264_qpel, 1,  8, depth);\
03121     dspfunc2(put_h264_qpel, 2,  4, depth);\
03122     dspfunc2(put_h264_qpel, 3,  2, depth);\
03123     dspfunc2(avg_h264_qpel, 0, 16, depth);\
03124     dspfunc2(avg_h264_qpel, 1,  8, depth);\
03125     dspfunc2(avg_h264_qpel, 2,  4, depth);
03126 
03127     switch (avctx->bits_per_raw_sample) {
03128     case 9:
03129         if (c->dct_bits == 32) {
03130             BIT_DEPTH_FUNCS(9, _32);
03131         } else {
03132             BIT_DEPTH_FUNCS(9, _16);
03133         }
03134         break;
03135     case 10:
03136         if (c->dct_bits == 32) {
03137             BIT_DEPTH_FUNCS(10, _32);
03138         } else {
03139             BIT_DEPTH_FUNCS(10, _16);
03140         }
03141         break;
03142     case 12:
03143         if (c->dct_bits == 32) {
03144             BIT_DEPTH_FUNCS(12, _32);
03145         } else {
03146             BIT_DEPTH_FUNCS(12, _16);
03147         }
03148         break;
03149     case 14:
03150         if (c->dct_bits == 32) {
03151             BIT_DEPTH_FUNCS(14, _32);
03152         } else {
03153             BIT_DEPTH_FUNCS(14, _16);
03154         }
03155         break;
03156     default:
03157         if(avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
03158             BIT_DEPTH_FUNCS(8, _16);
03159         }
03160         break;
03161     }
03162 
03163 
03164     if (HAVE_MMX)        ff_dsputil_init_mmx   (c, avctx);
03165     if (ARCH_ARM)        ff_dsputil_init_arm   (c, avctx);
03166     if (HAVE_VIS)        ff_dsputil_init_vis   (c, avctx);
03167     if (ARCH_ALPHA)      ff_dsputil_init_alpha (c, avctx);
03168     if (ARCH_PPC)        ff_dsputil_init_ppc   (c, avctx);
03169     if (HAVE_MMI)        ff_dsputil_init_mmi   (c, avctx);
03170     if (ARCH_SH4)        ff_dsputil_init_sh4   (c, avctx);
03171     if (ARCH_BFIN)       ff_dsputil_init_bfin  (c, avctx);
03172     if (HAVE_MIPSFPU)    ff_dsputil_init_mips  (c, avctx);
03173 
03174     for (i = 0; i < 4; i++) {
03175         for (j = 0; j < 16; j++) {
03176             if(!c->put_2tap_qpel_pixels_tab[i][j])
03177                 c->put_2tap_qpel_pixels_tab[i][j] =
03178                     c->put_h264_qpel_pixels_tab[i][j];
03179             if(!c->avg_2tap_qpel_pixels_tab[i][j])
03180                 c->avg_2tap_qpel_pixels_tab[i][j] =
03181                     c->avg_h264_qpel_pixels_tab[i][j];
03182         }
03183     }
03184 
03185     ff_init_scantable_permutation(c->idct_permutation,
03186                                   c->idct_permutation_type);
03187 }
03188 
03189 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
03190 {
03191     ff_dsputil_init(c, avctx);
03192 }