80 #if HAVE_MMXEXT_INLINE
102 for(i=0; i<64; i++) {
106 #if ARCH_X86_64 && HAVE_MMX && HAVE_YASM
108 int16_t *
block, int16_t *qmat);
110 static void ff_prores_idct_put_10_sse2_wrap(int16_t *dst){
121 for(i=0; i<64; i++) {
138 #if HAVE_MMXEXT_INLINE
143 #if ARCH_X86_64 && HAVE_YASM
158 #if HAVE_NEON && ARCH_ARM
165 #define AANSCALE_BITS 12
168 #define NB_ITS_SPEED 50000
173 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
174 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
175 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
176 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
177 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
178 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
179 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
180 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
190 for (i = 0; i < 64; i++) {
191 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
202 memset(block, 0, 64 *
sizeof(*block));
206 for (i = 0; i < 64; i++)
207 block[i] = (
av_lfg_get(prng) % (2*vals)) -vals;
210 for (i = 0; i < 64; i++)
216 for (i = 0; i < j; i++) {
218 block[idx] =
av_lfg_get(prng) % (2*vals) -vals;
222 block[ 0] =
av_lfg_get(prng) % (16*vals) - (8*vals);
223 block[63] = (block[0] & 1) ^ 1;
233 for (i = 0; i < 64; i++)
234 dst[idct_mmx_perm[i]] = src[i];
236 for (i = 0; i < 64; i++)
237 dst[idct_simple_mmx_perm[i]] = src[i];
239 for (i = 0; i < 64; i++)
240 dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
242 for (i = 0; i < 64; i++)
243 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
245 for (i = 0; i < 64; i++)
246 dst[(i>>3) | ((i<<3)&0x38)] = src[i];
248 for (i = 0; i < 64; i++)
258 int64_t err2, ti, ti1, it1, err_sum = 0;
259 int64_t sysErr[64], sysErrMax = 0;
261 int blockSumErrMax = 0, blockSumErr;
263 const int vals=1<<
bits;
271 for (i = 0; i < 64; i++)
273 for (it = 0; it <
NB_ITS; it++) {
281 for (i = 0; i < 64; i++) {
288 if (!strcmp(dct->
name,
"PR-SSE2"))
289 for (i = 0; i < 64; i++)
293 for (i = 0; i < 64; i++) {
300 sysErr[i] +=
block[i] - block1[i];
302 if (abs(
block[i]) > maxout)
303 maxout = abs(
block[i]);
305 if (blockSumErrMax < blockSumErr)
306 blockSumErrMax = blockSumErr;
308 for (i = 0; i < 64; i++)
309 sysErrMax =
FFMAX(sysErrMax,
FFABS(sysErr[i]));
311 for (i = 0; i < 64; i++) {
314 printf(
"%7d ", (
int) sysErr[i]);
318 omse = (double) err2 / NB_ITS / 64;
319 ome = (double) err_sum / NB_ITS / 64;
321 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
323 printf(
"%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
324 is_idct ?
"IDCT" :
"DCT", dct->
name, err_inf,
325 omse, ome, (
double) sysErrMax / NB_ITS,
326 maxout, blockSumErrMax);
349 }
while (ti1 < 1000000);
351 printf(
"%s %s: %0.1f kdct/s\n", is_idct ?
"IDCT" :
"DCT", dct->
name,
352 (
double) it1 * 1000.0 / (
double) ti1);
363 static double c8[8][8];
364 static double c4[4][4];
365 double block1[64], block2[64], block3[64];
372 for (i = 0; i < 8; i++) {
374 for (j = 0; j < 8; j++) {
375 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
376 c8[i][j] = s * cos(
M_PI * i * (j + 0.5) / 8.0);
377 sum += c8[i][j] * c8[i][j];
381 for (i = 0; i < 4; i++) {
383 for (j = 0; j < 4; j++) {
384 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
385 c4[i][j] = s * cos(
M_PI * i * (j + 0.5) / 4.0);
386 sum += c4[i][j] * c4[i][j];
393 for (i = 0; i < 4; i++) {
394 for (j = 0; j < 8; j++) {
395 block1[8 * (2 * i) + j] =
396 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) *
s;
397 block1[8 * (2 * i + 1) + j] =
398 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) *
s;
403 for (i = 0; i < 8; i++) {
404 for (j = 0; j < 8; j++) {
406 for (k = 0; k < 8; k++)
407 sum += c8[k][j] * block1[8 * i + k];
408 block2[8 * i + j] = sum;
413 for (i = 0; i < 8; i++) {
414 for (j = 0; j < 4; j++) {
417 for (k = 0; k < 4; k++)
418 sum += c4[k][j] * block2[8 * (2 * k) + i];
419 block3[8 * (2 * j) + i] = sum;
423 for (k = 0; k < 4; k++)
424 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
425 block3[8 * (2 * j + 1) + i] = sum;
430 for (i = 0; i < 8; i++) {
431 for (j = 0; j < 8; j++) {
432 v = block3[8 * i + j];
434 else if (v > 255) v = 255;
435 dest[i * linesize + j] = (int)
rint(v);
441 void (*idct248_put)(
uint8_t *dest,
int line_size,
445 int it, i, it1, ti, ti1, err_max,
v;
453 for (it = 0; it <
NB_ITS; it++) {
455 for (i = 0; i < 64; i++)
459 for (i = 0; i < 64; i++)
463 for (i = 0; i < 64; i++)
467 for (i = 0; i < 64; i++) {
494 printf(
"%s %s: err_inf=%d\n", 1 ?
"IDCT248" :
"DCT248", name, err_max);
503 for (i = 0; i < 64; i++)
510 }
while (ti1 < 1000000);
512 printf(
"%s %s: %0.1f kdct/s\n", 1 ?
"IDCT248" :
"DCT248", name,
513 (
double) it1 * 1000.0 / (
double) ti1);
518 printf(
"dct-test [-i] [<test-number>] [<bits>]\n"
519 "test-number 0 -> test with random matrixes\n"
520 " 1 -> test with random sparse matrixes\n"
521 " 2 -> do 3. test from mpeg4 std\n"
522 "bits Number of time domain bits to use, 8 is default\n"
523 "-i test IDCT implementations\n"
524 "-4 test IDCT248 implementations\n"
532 int main(
int argc,
char **argv)
534 int test_idct = 0, test_248_dct = 0;
547 c =
getopt(argc, argv,
"ih4t");
568 test = atoi(argv[
optind]);
569 if(optind+1 < argc) bits= atoi(argv[optind+1]);
571 printf(
"ffmpeg DCT/IDCT test\n");
576 const struct algo *algos = test_idct ? idct_tab :
fdct_tab;
577 for (i = 0; algos[i].
name; i++)
579 err |=
dct_error(&algos[i], test, test_idct, speed, bits);
584 printf(
"Error: %d.\n", err);