Go to the documentation of this file.
   39 #if HAVE_SSE2_EXTERNAL 
   41 void ff_cavs_idct8_sse2(int16_t *
out, 
const int16_t *in);
 
   43 static void cavs_idct8_add_sse2(uint8_t *dst, int16_t *
block, ptrdiff_t 
stride)
 
   52 #if HAVE_MMXEXT_INLINE 
   61 #define QPEL_CAVSV1(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \ 
   62         "movd (%0), "#F"            \n\t"\ 
   63         "movq "#C", %%mm6           \n\t"\ 
   64         "pmullw "MANGLE(MUL1)", %%mm6\n\t"\ 
   65         "movq "#D", %%mm7           \n\t"\ 
   66         "pmullw "MANGLE(MUL2)", %%mm7\n\t"\ 
   67         "psllw $3, "#E"             \n\t"\ 
   68         "psubw "#E", %%mm6          \n\t"\ 
   69         "psraw $3, "#E"             \n\t"\ 
   70         "paddw %%mm7, %%mm6         \n\t"\ 
   71         "paddw "#E", %%mm6          \n\t"\ 
   72         "paddw "#B", "#B"           \n\t"\ 
   73         "pxor %%mm7, %%mm7          \n\t"\ 
   75         "punpcklbw %%mm7, "#F"      \n\t"\ 
   76         "psubw "#B", %%mm6          \n\t"\ 
   77         "psraw $1, "#B"             \n\t"\ 
   78         "psubw "#A", %%mm6          \n\t"\ 
   79         "paddw "MANGLE(ADD)", %%mm6 \n\t"\ 
   80         "psraw $7, %%mm6            \n\t"\ 
   81         "packuswb %%mm6, %%mm6      \n\t"\ 
   82         OP(%%mm6, (%1), A, d)            \ 
   86 #define QPEL_CAVSV2(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \ 
   87         "movd (%0), "#F"            \n\t"\ 
   88         "movq "#C", %%mm6           \n\t"\ 
   89         "paddw "#D", %%mm6          \n\t"\ 
   90         "pmullw "MANGLE(MUL1)", %%mm6\n\t"\ 
   92         "punpcklbw %%mm7, "#F"      \n\t"\ 
   93         "psubw "#B", %%mm6          \n\t"\ 
   94         "psubw "#E", %%mm6          \n\t"\ 
   95         "paddw "MANGLE(ADD)", %%mm6 \n\t"\ 
   96         "psraw $3, %%mm6            \n\t"\ 
   97         "packuswb %%mm6, %%mm6      \n\t"\ 
   98         OP(%%mm6, (%1), A, d)            \ 
  102 #define QPEL_CAVSV3(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \ 
  103         "movd (%0), "#F"            \n\t"\ 
  104         "movq "#C", %%mm6           \n\t"\ 
  105         "pmullw "MANGLE(MUL2)", %%mm6\n\t"\ 
  106         "movq "#D", %%mm7           \n\t"\ 
  107         "pmullw "MANGLE(MUL1)", %%mm7\n\t"\ 
  108         "psllw $3, "#B"             \n\t"\ 
  109         "psubw "#B", %%mm6          \n\t"\ 
  110         "psraw $3, "#B"             \n\t"\ 
  111         "paddw %%mm7, %%mm6         \n\t"\ 
  112         "paddw "#B", %%mm6          \n\t"\ 
  113         "paddw "#E", "#E"           \n\t"\ 
  114         "pxor %%mm7, %%mm7          \n\t"\ 
  116         "punpcklbw %%mm7, "#F"      \n\t"\ 
  117         "psubw "#E", %%mm6          \n\t"\ 
  118         "psraw $1, "#E"             \n\t"\ 
  119         "psubw "#F", %%mm6          \n\t"\ 
  120         "paddw "MANGLE(ADD)", %%mm6 \n\t"\ 
  121         "psraw $7, %%mm6            \n\t"\ 
  122         "packuswb %%mm6, %%mm6      \n\t"\ 
  123         OP(%%mm6, (%1), A, d)            \ 
  127 #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\ 
  133         "pxor %%mm7, %%mm7          \n\t"\ 
  134         "movd (%0), %%mm0           \n\t"\ 
  136         "movd (%0), %%mm1           \n\t"\ 
  138         "movd (%0), %%mm2           \n\t"\ 
  140         "movd (%0), %%mm3           \n\t"\ 
  142         "movd (%0), %%mm4           \n\t"\ 
  144         "punpcklbw %%mm7, %%mm0     \n\t"\ 
  145         "punpcklbw %%mm7, %%mm1     \n\t"\ 
  146         "punpcklbw %%mm7, %%mm2     \n\t"\ 
  147         "punpcklbw %%mm7, %%mm3     \n\t"\ 
  148         "punpcklbw %%mm7, %%mm4     \n\t"\ 
  149         VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\ 
  150         VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\ 
  151         VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\ 
  152         VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\ 
  153         VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\ 
  154         VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\ 
  155         VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\ 
  156         VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\ 
  158         : "+a"(src), "+c"(dst)\ 
  159         : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\ 
  160           NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\ 
  165             VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\ 
  166             VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\ 
  167             VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\ 
  168             VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\ 
  169             VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\ 
  170             VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\ 
  171             VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\ 
  172             VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\ 
  174            : "+a"(src), "+c"(dst)\ 
  175            : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\ 
  176              NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\ 
  180      src += 4-(h+5)*srcStride;\ 
  181      dst += 4-h*dstStride;\ 
  184 #define QPEL_CAVS(OPNAME, OP, MMX)\ 
  185 static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ 
  189         "pxor %%mm7, %%mm7          \n\t"\ 
  190         "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\ 
  192         "movq    (%0), %%mm0        \n\t"\ 
  193         "movq   1(%0), %%mm2        \n\t"\ 
  194         "movq %%mm0, %%mm1          \n\t"\ 
  195         "movq %%mm2, %%mm3          \n\t"\ 
  196         "punpcklbw %%mm7, %%mm0     \n\t"\ 
  197         "punpckhbw %%mm7, %%mm1     \n\t"\ 
  198         "punpcklbw %%mm7, %%mm2     \n\t"\ 
  199         "punpckhbw %%mm7, %%mm3     \n\t"\ 
  200         "paddw %%mm2, %%mm0         \n\t"\ 
  201         "paddw %%mm3, %%mm1         \n\t"\ 
  202         "pmullw %%mm6, %%mm0        \n\t"\ 
  203         "pmullw %%mm6, %%mm1        \n\t"\ 
  204         "movq   -1(%0), %%mm2       \n\t"\ 
  205         "movq    2(%0), %%mm4       \n\t"\ 
  206         "movq %%mm2, %%mm3          \n\t"\ 
  207         "movq %%mm4, %%mm5          \n\t"\ 
  208         "punpcklbw %%mm7, %%mm2     \n\t"\ 
  209         "punpckhbw %%mm7, %%mm3     \n\t"\ 
  210         "punpcklbw %%mm7, %%mm4     \n\t"\ 
  211         "punpckhbw %%mm7, %%mm5     \n\t"\ 
  212         "paddw %%mm4, %%mm2         \n\t"\ 
  213         "paddw %%mm3, %%mm5         \n\t"\ 
  214         "psubw %%mm2, %%mm0         \n\t"\ 
  215         "psubw %%mm5, %%mm1         \n\t"\ 
  216         "movq "MANGLE(ff_pw_4)", %%mm5\n\t"\ 
  217         "paddw %%mm5, %%mm0         \n\t"\ 
  218         "paddw %%mm5, %%mm1         \n\t"\ 
  219         "psraw $3, %%mm0            \n\t"\ 
  220         "psraw $3, %%mm1            \n\t"\ 
  221         "packuswb %%mm1, %%mm0      \n\t"\ 
  222         OP(%%mm0, (%1),%%mm5, q)         \ 
  227         : "+a"(src), "+c"(dst), "+m"(h)\ 
  228         : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\ 
  229           NAMED_CONSTRAINTS_ADD(ff_pw_4,ff_pw_5)\ 
  234 static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\ 
  236   QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42)      \ 
  239 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\ 
  241   QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_42)        \ 
  244 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\ 
  246   QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42)      \ 
  249 static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ 
  251     OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst  , src  , dstStride, srcStride, 8);\ 
  253 static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ 
  255     OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst  , src  , dstStride, srcStride, 16);\ 
  256     OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 
  259 static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ 
  261     OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst  , src  , dstStride, srcStride, 8);\ 
  263 static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ 
  265     OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst  , src  , dstStride, srcStride, 16);\ 
  266     OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 
  269 static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ 
  271     OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst  , src  , dstStride, srcStride, 8);\ 
  273 static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ 
  275     OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst  , src  , dstStride, srcStride, 16);\ 
  276     OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 
  279 static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ 
  281     OPNAME ## cavs_qpel8_h_ ## MMX(dst  , src  , dstStride, srcStride);\ 
  282     OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 
  285     OPNAME ## cavs_qpel8_h_ ## MMX(dst  , src  , dstStride, srcStride);\ 
  286     OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 
  289 #define CAVS_MC(OPNAME, SIZE, MMX) \ 
  290 static void OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 
  292     OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\ 
  295 static void OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 
  297     OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\ 
  300 static void OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 
  302     OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\ 
  305 static void OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 
  307     OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\ 
  310 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b "    \n\t" 
  311 #define AVG_MMXEXT_OP(a, b, temp, size) \ 
  312 "mov" #size " " #b ", " #temp "   \n\t"\ 
  313 "pavgb " #temp ", " #a "          \n\t"\ 
  314 "mov" #size " " #a ", " #b "      \n\t" 
  318 #if HAVE_MMX_EXTERNAL 
  319 static void put_cavs_qpel8_mc00_mmx(uint8_t *dst, 
const uint8_t *
src,
 
  325 static void avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, 
const uint8_t *
src,
 
  331 static void put_cavs_qpel16_mc00_sse2(uint8_t *dst, 
const uint8_t *
src,
 
  337 static void avg_cavs_qpel16_mc00_sse2(uint8_t *dst, 
const uint8_t *
src,
 
  346 #if HAVE_MMX_EXTERNAL 
  347     c->put_cavs_qpel_pixels_tab[1][0] = put_cavs_qpel8_mc00_mmx;
 
  351 #define DSPFUNC(PFX, IDX, NUM, EXT)                                                       \ 
  352     c->PFX ## _cavs_qpel_pixels_tab[IDX][ 2] = PFX ## _cavs_qpel ## NUM ## _mc20_ ## EXT; \ 
  353     c->PFX ## _cavs_qpel_pixels_tab[IDX][ 4] = PFX ## _cavs_qpel ## NUM ## _mc01_ ## EXT; \ 
  354     c->PFX ## _cavs_qpel_pixels_tab[IDX][ 8] = PFX ## _cavs_qpel ## NUM ## _mc02_ ## EXT; \ 
  355     c->PFX ## _cavs_qpel_pixels_tab[IDX][12] = PFX ## _cavs_qpel ## NUM ## _mc03_ ## EXT; \ 
  357 #if HAVE_MMXEXT_INLINE 
  358 QPEL_CAVS(put_,        PUT_OP, mmxext)
 
  359 QPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext)
 
  374 #if HAVE_MMXEXT_INLINE 
  382 #if HAVE_MMX_EXTERNAL 
  384         c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmxext;
 
  387 #if HAVE_SSE2_EXTERNAL 
  389         c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_sse2;
 
  390         c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_sse2;
 
  392         c->cavs_idct8_add = cavs_idct8_add_sse2;
 
  
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
 
static atomic_int cpu_flags
 
av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c)
 
#define LOCAL_ALIGNED(a, t, v,...)
 
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 
static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c)
 
void ff_add_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
 
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
 
#define CAVS_MC(OPNAME, SIZE)
 
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 
static double b2(void *priv, double x, double y)
 
#define EXTERNAL_SSE2(flags)
 
#define DSPFUNC(PFX, IDX, NUM, EXT)
 
#define INLINE_MMXEXT(flags)
 
The exact code depends on how similar the blocks are and how related they are to the block
 
#define EXTERNAL_MMXEXT(flags)