FFmpeg
me_cmp.c
Go to the documentation of this file.
1 /*
2  * DSP utils
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "libavutil/attributes.h"
24 #include "libavutil/internal.h"
25 #include "libavutil/mem_internal.h"
26 #include "avcodec.h"
27 #include "copy_block.h"
28 #include "simple_idct.h"
29 #include "me_cmp.h"
30 #include "mpegvideoenc.h"
31 #include "config.h"
32 #include "config_components.h"
33 
34 /* (i - 256) * (i - 256) */
35 const uint32_t ff_square_tab[512] = {
36  65536, 65025, 64516, 64009, 63504, 63001, 62500, 62001, 61504, 61009, 60516, 60025, 59536, 59049, 58564, 58081,
37  57600, 57121, 56644, 56169, 55696, 55225, 54756, 54289, 53824, 53361, 52900, 52441, 51984, 51529, 51076, 50625,
38  50176, 49729, 49284, 48841, 48400, 47961, 47524, 47089, 46656, 46225, 45796, 45369, 44944, 44521, 44100, 43681,
39  43264, 42849, 42436, 42025, 41616, 41209, 40804, 40401, 40000, 39601, 39204, 38809, 38416, 38025, 37636, 37249,
40  36864, 36481, 36100, 35721, 35344, 34969, 34596, 34225, 33856, 33489, 33124, 32761, 32400, 32041, 31684, 31329,
41  30976, 30625, 30276, 29929, 29584, 29241, 28900, 28561, 28224, 27889, 27556, 27225, 26896, 26569, 26244, 25921,
42  25600, 25281, 24964, 24649, 24336, 24025, 23716, 23409, 23104, 22801, 22500, 22201, 21904, 21609, 21316, 21025,
43  20736, 20449, 20164, 19881, 19600, 19321, 19044, 18769, 18496, 18225, 17956, 17689, 17424, 17161, 16900, 16641,
44  16384, 16129, 15876, 15625, 15376, 15129, 14884, 14641, 14400, 14161, 13924, 13689, 13456, 13225, 12996, 12769,
45  12544, 12321, 12100, 11881, 11664, 11449, 11236, 11025, 10816, 10609, 10404, 10201, 10000, 9801, 9604, 9409,
46  9216, 9025, 8836, 8649, 8464, 8281, 8100, 7921, 7744, 7569, 7396, 7225, 7056, 6889, 6724, 6561,
47  6400, 6241, 6084, 5929, 5776, 5625, 5476, 5329, 5184, 5041, 4900, 4761, 4624, 4489, 4356, 4225,
48  4096, 3969, 3844, 3721, 3600, 3481, 3364, 3249, 3136, 3025, 2916, 2809, 2704, 2601, 2500, 2401,
49  2304, 2209, 2116, 2025, 1936, 1849, 1764, 1681, 1600, 1521, 1444, 1369, 1296, 1225, 1156, 1089,
50  1024, 961, 900, 841, 784, 729, 676, 625, 576, 529, 484, 441, 400, 361, 324, 289,
51  256, 225, 196, 169, 144, 121, 100, 81, 64, 49, 36, 25, 16, 9, 4, 1,
52  0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225,
53  256, 289, 324, 361, 400, 441, 484, 529, 576, 625, 676, 729, 784, 841, 900, 961,
54  1024, 1089, 1156, 1225, 1296, 1369, 1444, 1521, 1600, 1681, 1764, 1849, 1936, 2025, 2116, 2209,
55  2304, 2401, 2500, 2601, 2704, 2809, 2916, 3025, 3136, 3249, 3364, 3481, 3600, 3721, 3844, 3969,
56  4096, 4225, 4356, 4489, 4624, 4761, 4900, 5041, 5184, 5329, 5476, 5625, 5776, 5929, 6084, 6241,
57  6400, 6561, 6724, 6889, 7056, 7225, 7396, 7569, 7744, 7921, 8100, 8281, 8464, 8649, 8836, 9025,
58  9216, 9409, 9604, 9801, 10000, 10201, 10404, 10609, 10816, 11025, 11236, 11449, 11664, 11881, 12100, 12321,
59  12544, 12769, 12996, 13225, 13456, 13689, 13924, 14161, 14400, 14641, 14884, 15129, 15376, 15625, 15876, 16129,
60  16384, 16641, 16900, 17161, 17424, 17689, 17956, 18225, 18496, 18769, 19044, 19321, 19600, 19881, 20164, 20449,
61  20736, 21025, 21316, 21609, 21904, 22201, 22500, 22801, 23104, 23409, 23716, 24025, 24336, 24649, 24964, 25281,
62  25600, 25921, 26244, 26569, 26896, 27225, 27556, 27889, 28224, 28561, 28900, 29241, 29584, 29929, 30276, 30625,
63  30976, 31329, 31684, 32041, 32400, 32761, 33124, 33489, 33856, 34225, 34596, 34969, 35344, 35721, 36100, 36481,
64  36864, 37249, 37636, 38025, 38416, 38809, 39204, 39601, 40000, 40401, 40804, 41209, 41616, 42025, 42436, 42849,
65  43264, 43681, 44100, 44521, 44944, 45369, 45796, 46225, 46656, 47089, 47524, 47961, 48400, 48841, 49284, 49729,
66  50176, 50625, 51076, 51529, 51984, 52441, 52900, 53361, 53824, 54289, 54756, 55225, 55696, 56169, 56644, 57121,
67  57600, 58081, 58564, 59049, 59536, 60025, 60516, 61009, 61504, 62001, 62500, 63001, 63504, 64009, 64516, 65025,
68 };
69 
70 static int sse4_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
71  ptrdiff_t stride, int h)
72 {
73  int s = 0, i;
74  const uint32_t *sq = ff_square_tab + 256;
75 
76  for (i = 0; i < h; i++) {
77  s += sq[pix1[0] - pix2[0]];
78  s += sq[pix1[1] - pix2[1]];
79  s += sq[pix1[2] - pix2[2]];
80  s += sq[pix1[3] - pix2[3]];
81  pix1 += stride;
82  pix2 += stride;
83  }
84  return s;
85 }
86 
87 static int sse8_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
88  ptrdiff_t stride, int h)
89 {
90  int s = 0, i;
91  const uint32_t *sq = ff_square_tab + 256;
92 
93  for (i = 0; i < h; i++) {
94  s += sq[pix1[0] - pix2[0]];
95  s += sq[pix1[1] - pix2[1]];
96  s += sq[pix1[2] - pix2[2]];
97  s += sq[pix1[3] - pix2[3]];
98  s += sq[pix1[4] - pix2[4]];
99  s += sq[pix1[5] - pix2[5]];
100  s += sq[pix1[6] - pix2[6]];
101  s += sq[pix1[7] - pix2[7]];
102  pix1 += stride;
103  pix2 += stride;
104  }
105  return s;
106 }
107 
108 static int sse16_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
109  ptrdiff_t stride, int h)
110 {
111  int s = 0, i;
112  const uint32_t *sq = ff_square_tab + 256;
113 
114  for (i = 0; i < h; i++) {
115  s += sq[pix1[0] - pix2[0]];
116  s += sq[pix1[1] - pix2[1]];
117  s += sq[pix1[2] - pix2[2]];
118  s += sq[pix1[3] - pix2[3]];
119  s += sq[pix1[4] - pix2[4]];
120  s += sq[pix1[5] - pix2[5]];
121  s += sq[pix1[6] - pix2[6]];
122  s += sq[pix1[7] - pix2[7]];
123  s += sq[pix1[8] - pix2[8]];
124  s += sq[pix1[9] - pix2[9]];
125  s += sq[pix1[10] - pix2[10]];
126  s += sq[pix1[11] - pix2[11]];
127  s += sq[pix1[12] - pix2[12]];
128  s += sq[pix1[13] - pix2[13]];
129  s += sq[pix1[14] - pix2[14]];
130  s += sq[pix1[15] - pix2[15]];
131 
132  pix1 += stride;
133  pix2 += stride;
134  }
135  return s;
136 }
137 
138 static int sum_abs_dctelem_c(const int16_t *block)
139 {
140  int sum = 0, i;
141 
142  for (i = 0; i < 64; i++)
143  sum += FFABS(block[i]);
144  return sum;
145 }
146 
147 #define avg2(a, b) (((a) + (b) + 1) >> 1)
148 #define avg4(a, b, c, d) (((a) + (b) + (c) + (d) + 2) >> 2)
149 
150 static inline int pix_abs16_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
151  ptrdiff_t stride, int h)
152 {
153  int s = 0, i;
154 
155  for (i = 0; i < h; i++) {
156  s += abs(pix1[0] - pix2[0]);
157  s += abs(pix1[1] - pix2[1]);
158  s += abs(pix1[2] - pix2[2]);
159  s += abs(pix1[3] - pix2[3]);
160  s += abs(pix1[4] - pix2[4]);
161  s += abs(pix1[5] - pix2[5]);
162  s += abs(pix1[6] - pix2[6]);
163  s += abs(pix1[7] - pix2[7]);
164  s += abs(pix1[8] - pix2[8]);
165  s += abs(pix1[9] - pix2[9]);
166  s += abs(pix1[10] - pix2[10]);
167  s += abs(pix1[11] - pix2[11]);
168  s += abs(pix1[12] - pix2[12]);
169  s += abs(pix1[13] - pix2[13]);
170  s += abs(pix1[14] - pix2[14]);
171  s += abs(pix1[15] - pix2[15]);
172  pix1 += stride;
173  pix2 += stride;
174  }
175  return s;
176 }
177 
178 static inline int pix_median_abs16_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
179  ptrdiff_t stride, int h)
180 {
181  int s = 0, i, j;
182 
183 #define V(x) (pix1[x] - pix2[x])
184 
185  s += abs(V(0));
186  s += abs(V(1) - V(0));
187  s += abs(V(2) - V(1));
188  s += abs(V(3) - V(2));
189  s += abs(V(4) - V(3));
190  s += abs(V(5) - V(4));
191  s += abs(V(6) - V(5));
192  s += abs(V(7) - V(6));
193  s += abs(V(8) - V(7));
194  s += abs(V(9) - V(8));
195  s += abs(V(10) - V(9));
196  s += abs(V(11) - V(10));
197  s += abs(V(12) - V(11));
198  s += abs(V(13) - V(12));
199  s += abs(V(14) - V(13));
200  s += abs(V(15) - V(14));
201 
202  pix1 += stride;
203  pix2 += stride;
204 
205  for (i = 1; i < h; i++) {
206  s += abs(V(0) - V(-stride));
207  for (j = 1; j < 16; j++)
208  s += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1)));
209  pix1 += stride;
210  pix2 += stride;
211 
212  }
213 #undef V
214  return s;
215 }
216 
217 static int pix_abs16_x2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
218  ptrdiff_t stride, int h)
219 {
220  int s = 0, i;
221 
222  for (i = 0; i < h; i++) {
223  s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
224  s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
225  s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
226  s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
227  s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
228  s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
229  s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
230  s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
231  s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
232  s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
233  s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
234  s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
235  s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
236  s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
237  s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
238  s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
239  pix1 += stride;
240  pix2 += stride;
241  }
242  return s;
243 }
244 
245 static int pix_abs16_y2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
246  ptrdiff_t stride, int h)
247 {
248  int s = 0, i;
249  const uint8_t *pix3 = pix2 + stride;
250 
251  for (i = 0; i < h; i++) {
252  s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
253  s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
254  s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
255  s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
256  s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
257  s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
258  s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
259  s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
260  s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
261  s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
262  s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
263  s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
264  s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
265  s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
266  s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
267  s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
268  pix1 += stride;
269  pix2 += stride;
270  pix3 += stride;
271  }
272  return s;
273 }
274 
275 static int pix_abs16_xy2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
276  ptrdiff_t stride, int h)
277 {
278  int s = 0, i;
279  const uint8_t *pix3 = pix2 + stride;
280 
281  for (i = 0; i < h; i++) {
282  s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
283  s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
284  s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
285  s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
286  s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
287  s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
288  s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
289  s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
290  s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
291  s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
292  s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
293  s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
294  s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
295  s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
296  s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
297  s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
298  pix1 += stride;
299  pix2 += stride;
300  pix3 += stride;
301  }
302  return s;
303 }
304 
305 static inline int pix_abs8_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
306  ptrdiff_t stride, int h)
307 {
308  int s = 0, i;
309 
310  for (i = 0; i < h; i++) {
311  s += abs(pix1[0] - pix2[0]);
312  s += abs(pix1[1] - pix2[1]);
313  s += abs(pix1[2] - pix2[2]);
314  s += abs(pix1[3] - pix2[3]);
315  s += abs(pix1[4] - pix2[4]);
316  s += abs(pix1[5] - pix2[5]);
317  s += abs(pix1[6] - pix2[6]);
318  s += abs(pix1[7] - pix2[7]);
319  pix1 += stride;
320  pix2 += stride;
321  }
322  return s;
323 }
324 
325 static inline int pix_median_abs8_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
326  ptrdiff_t stride, int h)
327 {
328  int s = 0, i, j;
329 
330 #define V(x) (pix1[x] - pix2[x])
331 
332  s += abs(V(0));
333  s += abs(V(1) - V(0));
334  s += abs(V(2) - V(1));
335  s += abs(V(3) - V(2));
336  s += abs(V(4) - V(3));
337  s += abs(V(5) - V(4));
338  s += abs(V(6) - V(5));
339  s += abs(V(7) - V(6));
340 
341  pix1 += stride;
342  pix2 += stride;
343 
344  for (i = 1; i < h; i++) {
345  s += abs(V(0) - V(-stride));
346  for (j = 1; j < 8; j++)
347  s += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1)));
348  pix1 += stride;
349  pix2 += stride;
350 
351  }
352 #undef V
353  return s;
354 }
355 
356 static int pix_abs8_x2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
357  ptrdiff_t stride, int h)
358 {
359  int s = 0, i;
360 
361  for (i = 0; i < h; i++) {
362  s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
363  s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
364  s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
365  s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
366  s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
367  s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
368  s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
369  s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
370  pix1 += stride;
371  pix2 += stride;
372  }
373  return s;
374 }
375 
376 static int pix_abs8_y2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
377  ptrdiff_t stride, int h)
378 {
379  int s = 0, i;
380  const uint8_t *pix3 = pix2 + stride;
381 
382  for (i = 0; i < h; i++) {
383  s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
384  s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
385  s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
386  s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
387  s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
388  s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
389  s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
390  s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
391  pix1 += stride;
392  pix2 += stride;
393  pix3 += stride;
394  }
395  return s;
396 }
397 
398 static int pix_abs8_xy2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
399  ptrdiff_t stride, int h)
400 {
401  int s = 0, i;
402  const uint8_t *pix3 = pix2 + stride;
403 
404  for (i = 0; i < h; i++) {
405  s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
406  s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
407  s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
408  s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
409  s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
410  s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
411  s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
412  s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
413  pix1 += stride;
414  pix2 += stride;
415  pix3 += stride;
416  }
417  return s;
418 }
419 
420 static int nsse16_c(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
421  ptrdiff_t stride, int h)
422 {
423  int score1 = 0, score2 = 0, x, y;
424 
425  for (y = 0; y < h; y++) {
426  for (x = 0; x < 16; x++)
427  score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
428  if (y + 1 < h) {
429  for (x = 0; x < 15; x++)
430  score2 += FFABS(s1[x] - s1[x + stride] -
431  s1[x + 1] + s1[x + stride + 1]) -
432  FFABS(s2[x] - s2[x + stride] -
433  s2[x + 1] + s2[x + stride + 1]);
434  }
435  s1 += stride;
436  s2 += stride;
437  }
438 
439  if (c)
440  return score1 + FFABS(score2) * c->avctx->nsse_weight;
441  else
442  return score1 + FFABS(score2) * 8;
443 }
444 
445 static int nsse8_c(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
446  ptrdiff_t stride, int h)
447 {
448  int score1 = 0, score2 = 0, x, y;
449 
450  for (y = 0; y < h; y++) {
451  for (x = 0; x < 8; x++)
452  score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
453  if (y + 1 < h) {
454  for (x = 0; x < 7; x++)
455  score2 += FFABS(s1[x] - s1[x + stride] -
456  s1[x + 1] + s1[x + stride + 1]) -
457  FFABS(s2[x] - s2[x + stride] -
458  s2[x + 1] + s2[x + stride + 1]);
459  }
460  s1 += stride;
461  s2 += stride;
462  }
463 
464  if (c)
465  return score1 + FFABS(score2) * c->avctx->nsse_weight;
466  else
467  return score1 + FFABS(score2) * 8;
468 }
469 
470 static int zero_cmp(MpegEncContext *s, const uint8_t *a, const uint8_t *b,
471  ptrdiff_t stride, int h)
472 {
473  return 0;
474 }
475 
477 {
478  int ret = 0;
479  int i;
480 
481  memset(cmp, 0, sizeof(void *) * 6);
482 
483  for (i = 0; i < 6; i++) {
484  switch (type & 0xFF) {
485  case FF_CMP_SAD:
486  cmp[i] = c->sad[i];
487  break;
488  case FF_CMP_MEDIAN_SAD:
489  cmp[i] = c->median_sad[i];
490  break;
491  case FF_CMP_SATD:
492  cmp[i] = c->hadamard8_diff[i];
493  break;
494  case FF_CMP_SSE:
495  cmp[i] = c->sse[i];
496  break;
497  case FF_CMP_DCT:
498  cmp[i] = c->dct_sad[i];
499  break;
500  case FF_CMP_DCT264:
501  cmp[i] = c->dct264_sad[i];
502  break;
503  case FF_CMP_DCTMAX:
504  cmp[i] = c->dct_max[i];
505  break;
506  case FF_CMP_PSNR:
507  cmp[i] = c->quant_psnr[i];
508  break;
509  case FF_CMP_BIT:
510  cmp[i] = c->bit[i];
511  break;
512  case FF_CMP_RD:
513  cmp[i] = c->rd[i];
514  break;
515  case FF_CMP_VSAD:
516  cmp[i] = c->vsad[i];
517  break;
518  case FF_CMP_VSSE:
519  cmp[i] = c->vsse[i];
520  break;
521  case FF_CMP_ZERO:
522  cmp[i] = zero_cmp;
523  break;
524  case FF_CMP_NSSE:
525  cmp[i] = c->nsse[i];
526  break;
527 #if CONFIG_DWT
528  case FF_CMP_W53:
529  cmp[i]= c->w53[i];
530  break;
531  case FF_CMP_W97:
532  cmp[i]= c->w97[i];
533  break;
534 #endif
535  default:
537  "invalid cmp function selection\n");
538  ret = -1;
539  break;
540  }
541  }
542 
543  return ret;
544 }
545 
546 #define BUTTERFLY2(o1, o2, i1, i2) \
547  o1 = (i1) + (i2); \
548  o2 = (i1) - (i2);
549 
550 #define BUTTERFLY1(x, y) \
551  { \
552  int a, b; \
553  a = x; \
554  b = y; \
555  x = a + b; \
556  y = a - b; \
557  }
558 
559 #define BUTTERFLYA(x, y) (FFABS((x) + (y)) + FFABS((x) - (y)))
560 
561 static int hadamard8_diff8x8_c(MpegEncContext *s, const uint8_t *dst,
562  const uint8_t *src, ptrdiff_t stride, int h)
563 {
564  int i, temp[64], sum = 0;
565 
566  for (i = 0; i < 8; i++) {
567  // FIXME: try pointer walks
568  BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1],
569  src[stride * i + 0] - dst[stride * i + 0],
570  src[stride * i + 1] - dst[stride * i + 1]);
571  BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3],
572  src[stride * i + 2] - dst[stride * i + 2],
573  src[stride * i + 3] - dst[stride * i + 3]);
574  BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5],
575  src[stride * i + 4] - dst[stride * i + 4],
576  src[stride * i + 5] - dst[stride * i + 5]);
577  BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7],
578  src[stride * i + 6] - dst[stride * i + 6],
579  src[stride * i + 7] - dst[stride * i + 7]);
580 
581  BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]);
582  BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]);
583  BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]);
584  BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]);
585 
586  BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]);
587  BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]);
588  BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]);
589  BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]);
590  }
591 
592  for (i = 0; i < 8; i++) {
593  BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]);
594  BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]);
595  BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]);
596  BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]);
597 
598  BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]);
599  BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]);
600  BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]);
601  BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]);
602 
603  sum += BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i]) +
604  BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i]) +
605  BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i]) +
606  BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]);
607  }
608  return sum;
609 }
610 
611 static int hadamard8_intra8x8_c(MpegEncContext *s, const uint8_t *src,
612  const uint8_t *dummy, ptrdiff_t stride, int h)
613 {
614  int i, temp[64], sum = 0;
615 
616  for (i = 0; i < 8; i++) {
617  // FIXME: try pointer walks
618  BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1],
619  src[stride * i + 0], src[stride * i + 1]);
620  BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3],
621  src[stride * i + 2], src[stride * i + 3]);
622  BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5],
623  src[stride * i + 4], src[stride * i + 5]);
624  BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7],
625  src[stride * i + 6], src[stride * i + 7]);
626 
627  BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]);
628  BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]);
629  BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]);
630  BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]);
631 
632  BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]);
633  BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]);
634  BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]);
635  BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]);
636  }
637 
638  for (i = 0; i < 8; i++) {
639  BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]);
640  BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]);
641  BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]);
642  BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]);
643 
644  BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]);
645  BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]);
646  BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]);
647  BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]);
648 
649  sum +=
650  BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i])
651  + BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i])
652  + BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i])
653  + BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]);
654  }
655 
656  sum -= FFABS(temp[8 * 0] + temp[8 * 4]); // -mean
657 
658  return sum;
659 }
660 
661 static int dct_sad8x8_c(MpegEncContext *s, const uint8_t *src1,
662  const uint8_t *src2, ptrdiff_t stride, int h)
663 {
664  LOCAL_ALIGNED_16(int16_t, temp, [64]);
665 
666  s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
667  s->fdsp.fdct(temp);
668  return s->mecc.sum_abs_dctelem(temp);
669 }
670 
671 #if CONFIG_GPL
672 #define DCT8_1D \
673  { \
674  const int s07 = SRC(0) + SRC(7); \
675  const int s16 = SRC(1) + SRC(6); \
676  const int s25 = SRC(2) + SRC(5); \
677  const int s34 = SRC(3) + SRC(4); \
678  const int a0 = s07 + s34; \
679  const int a1 = s16 + s25; \
680  const int a2 = s07 - s34; \
681  const int a3 = s16 - s25; \
682  const int d07 = SRC(0) - SRC(7); \
683  const int d16 = SRC(1) - SRC(6); \
684  const int d25 = SRC(2) - SRC(5); \
685  const int d34 = SRC(3) - SRC(4); \
686  const int a4 = d16 + d25 + (d07 + (d07 >> 1)); \
687  const int a5 = d07 - d34 - (d25 + (d25 >> 1)); \
688  const int a6 = d07 + d34 - (d16 + (d16 >> 1)); \
689  const int a7 = d16 - d25 + (d34 + (d34 >> 1)); \
690  DST(0, a0 + a1); \
691  DST(1, a4 + (a7 >> 2)); \
692  DST(2, a2 + (a3 >> 1)); \
693  DST(3, a5 + (a6 >> 2)); \
694  DST(4, a0 - a1); \
695  DST(5, a6 - (a5 >> 2)); \
696  DST(6, (a2 >> 1) - a3); \
697  DST(7, (a4 >> 2) - a7); \
698  }
699 
700 static int dct264_sad8x8_c(MpegEncContext *s, const uint8_t *src1,
701  const uint8_t *src2, ptrdiff_t stride, int h)
702 {
703  int16_t dct[8][8];
704  int i, sum = 0;
705 
706  s->pdsp.diff_pixels_unaligned(dct[0], src1, src2, stride);
707 
708 #define SRC(x) dct[i][x]
709 #define DST(x, v) dct[i][x] = v
710  for (i = 0; i < 8; i++)
711  DCT8_1D
712 #undef SRC
713 #undef DST
714 
715 #define SRC(x) dct[x][i]
716 #define DST(x, v) sum += FFABS(v)
717  for (i = 0; i < 8; i++)
718  DCT8_1D
719 #undef SRC
720 #undef DST
721  return sum;
722 }
723 #endif
724 
725 static int dct_max8x8_c(MpegEncContext *s, const uint8_t *src1,
726  const uint8_t *src2, ptrdiff_t stride, int h)
727 {
728  LOCAL_ALIGNED_16(int16_t, temp, [64]);
729  int sum = 0, i;
730 
731  s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
732  s->fdsp.fdct(temp);
733 
734  for (i = 0; i < 64; i++)
735  sum = FFMAX(sum, FFABS(temp[i]));
736 
737  return sum;
738 }
739 
740 static int quant_psnr8x8_c(MpegEncContext *s, const uint8_t *src1,
741  const uint8_t *src2, ptrdiff_t stride, int h)
742 {
743  LOCAL_ALIGNED_16(int16_t, temp, [64 * 2]);
744  int16_t *const bak = temp + 64;
745  int sum = 0, i;
746 
747  s->mb_intra = 0;
748 
749  s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
750 
751  memcpy(bak, temp, 64 * sizeof(int16_t));
752 
753  s->block_last_index[0 /* FIXME */] =
754  s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
755  s->dct_unquantize_inter(s, temp, 0, s->qscale);
757 
758  for (i = 0; i < 64; i++)
759  sum += (temp[i] - bak[i]) * (temp[i] - bak[i]);
760 
761  return sum;
762 }
763 
764 static int rd8x8_c(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2,
765  ptrdiff_t stride, int h)
766 {
767  const uint8_t *scantable = s->intra_scantable.permutated;
768  LOCAL_ALIGNED_16(int16_t, temp, [64]);
769  LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
770  LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
771  int i, last, run, bits, level, distortion, start_i;
772  const int esc_length = s->ac_esc_length;
773  uint8_t *length, *last_length;
774 
775  copy_block8(lsrc1, src1, 8, stride, 8);
776  copy_block8(lsrc2, src2, 8, stride, 8);
777 
778  s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8);
779 
780  s->block_last_index[0 /* FIXME */] =
781  last =
782  s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
783 
784  bits = 0;
785 
786  if (s->mb_intra) {
787  start_i = 1;
788  length = s->intra_ac_vlc_length;
789  last_length = s->intra_ac_vlc_last_length;
790  bits += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma
791  } else {
792  start_i = 0;
793  length = s->inter_ac_vlc_length;
794  last_length = s->inter_ac_vlc_last_length;
795  }
796 
797  if (last >= start_i) {
798  run = 0;
799  for (i = start_i; i < last; i++) {
800  int j = scantable[i];
801  level = temp[j];
802 
803  if (level) {
804  level += 64;
805  if ((level & (~127)) == 0)
806  bits += length[UNI_AC_ENC_INDEX(run, level)];
807  else
808  bits += esc_length;
809  run = 0;
810  } else
811  run++;
812  }
813  i = scantable[last];
814 
815  level = temp[i] + 64;
816 
817  av_assert2(level - 64);
818 
819  if ((level & (~127)) == 0) {
820  bits += last_length[UNI_AC_ENC_INDEX(run, level)];
821  } else
822  bits += esc_length;
823  }
824 
825  if (last >= 0) {
826  if (s->mb_intra)
827  s->dct_unquantize_intra(s, temp, 0, s->qscale);
828  else
829  s->dct_unquantize_inter(s, temp, 0, s->qscale);
830  }
831 
832  s->idsp.idct_add(lsrc2, 8, temp);
833 
834  distortion = s->mecc.sse[1](NULL, lsrc2, lsrc1, 8, 8);
835 
836  return distortion + ((bits * s->qscale * s->qscale * 109 + 64) >> 7);
837 }
838 
839 static int bit8x8_c(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2,
840  ptrdiff_t stride, int h)
841 {
842  const uint8_t *scantable = s->intra_scantable.permutated;
843  LOCAL_ALIGNED_16(int16_t, temp, [64]);
844  int i, last, run, bits, level, start_i;
845  const int esc_length = s->ac_esc_length;
846  uint8_t *length, *last_length;
847 
848  s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
849 
850  s->block_last_index[0 /* FIXME */] =
851  last =
852  s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
853 
854  bits = 0;
855 
856  if (s->mb_intra) {
857  start_i = 1;
858  length = s->intra_ac_vlc_length;
859  last_length = s->intra_ac_vlc_last_length;
860  bits += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma
861  } else {
862  start_i = 0;
863  length = s->inter_ac_vlc_length;
864  last_length = s->inter_ac_vlc_last_length;
865  }
866 
867  if (last >= start_i) {
868  run = 0;
869  for (i = start_i; i < last; i++) {
870  int j = scantable[i];
871  level = temp[j];
872 
873  if (level) {
874  level += 64;
875  if ((level & (~127)) == 0)
876  bits += length[UNI_AC_ENC_INDEX(run, level)];
877  else
878  bits += esc_length;
879  run = 0;
880  } else
881  run++;
882  }
883  i = scantable[last];
884 
885  level = temp[i] + 64;
886 
887  av_assert2(level - 64);
888 
889  if ((level & (~127)) == 0)
890  bits += last_length[UNI_AC_ENC_INDEX(run, level)];
891  else
892  bits += esc_length;
893  }
894 
895  return bits;
896 }
897 
898 #define VSAD_INTRA(size) \
899 static int vsad_intra ## size ## _c(MpegEncContext *c, \
900  const uint8_t *s, const uint8_t *dummy, \
901  ptrdiff_t stride, int h) \
902 { \
903  int score = 0, x, y; \
904  \
905  for (y = 1; y < h; y++) { \
906  for (x = 0; x < size; x += 4) { \
907  score += FFABS(s[x] - s[x + stride]) + \
908  FFABS(s[x + 1] - s[x + stride + 1]) + \
909  FFABS(s[x + 2] - s[x + 2 + stride]) + \
910  FFABS(s[x + 3] - s[x + 3 + stride]); \
911  } \
912  s += stride; \
913  } \
914  \
915  return score; \
916 }
917 VSAD_INTRA(8)
918 VSAD_INTRA(16)
919 
920 #define VSAD(size) \
921 static int vsad ## size ## _c(MpegEncContext *c, \
922  const uint8_t *s1, const uint8_t *s2, \
923  ptrdiff_t stride, int h) \
924 { \
925  int score = 0, x, y; \
926  \
927  for (y = 1; y < h; y++) { \
928  for (x = 0; x < size; x++) \
929  score += FFABS(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \
930  s1 += stride; \
931  s2 += stride; \
932  } \
933  \
934  return score; \
935 }
936 VSAD(8)
937 VSAD(16)
938 
939 #define SQ(a) ((a) * (a))
940 #define VSSE_INTRA(size) \
941 static int vsse_intra ## size ## _c(MpegEncContext *c, \
942  const uint8_t *s, const uint8_t *dummy, \
943  ptrdiff_t stride, int h) \
944 { \
945  int score = 0, x, y; \
946  \
947  for (y = 1; y < h; y++) { \
948  for (x = 0; x < size; x += 4) { \
949  score += SQ(s[x] - s[x + stride]) + \
950  SQ(s[x + 1] - s[x + stride + 1]) + \
951  SQ(s[x + 2] - s[x + stride + 2]) + \
952  SQ(s[x + 3] - s[x + stride + 3]); \
953  } \
954  s += stride; \
955  } \
956  \
957  return score; \
958 }
959 VSSE_INTRA(8)
960 VSSE_INTRA(16)
961 
962 #define VSSE(size) \
963 static int vsse ## size ## _c(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, \
964  ptrdiff_t stride, int h) \
965 { \
966  int score = 0, x, y; \
967  \
968  for (y = 1; y < h; y++) { \
969  for (x = 0; x < size; x++) \
970  score += SQ(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \
971  s1 += stride; \
972  s2 += stride; \
973  } \
974  \
975  return score; \
976 }
977 VSSE(8)
978 VSSE(16)
979 
980 #define WRAPPER8_16_SQ(name8, name16) \
981 static int name16(MpegEncContext *s, const uint8_t *dst, const uint8_t *src, \
982  ptrdiff_t stride, int h) \
983 { \
984  int score = 0; \
985  \
986  score += name8(s, dst, src, stride, 8); \
987  score += name8(s, dst + 8, src + 8, stride, 8); \
988  if (h == 16) { \
989  dst += 8 * stride; \
990  src += 8 * stride; \
991  score += name8(s, dst, src, stride, 8); \
992  score += name8(s, dst + 8, src + 8, stride, 8); \
993  } \
994  return score; \
995 }
996 
997 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
998 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
999 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
1000 #if CONFIG_GPL
1001 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
1002 #endif
1003 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
1004 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
1005 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
1006 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
1007 
1009 {
1010  c->sum_abs_dctelem = sum_abs_dctelem_c;
1011 
1012  /* TODO [0] 16 [1] 8 */
1013  c->pix_abs[0][0] = pix_abs16_c;
1014  c->pix_abs[0][1] = pix_abs16_x2_c;
1015  c->pix_abs[0][2] = pix_abs16_y2_c;
1016  c->pix_abs[0][3] = pix_abs16_xy2_c;
1017  c->pix_abs[1][0] = pix_abs8_c;
1018  c->pix_abs[1][1] = pix_abs8_x2_c;
1019  c->pix_abs[1][2] = pix_abs8_y2_c;
1020  c->pix_abs[1][3] = pix_abs8_xy2_c;
1021 
1022 #define SET_CMP_FUNC(name) \
1023  c->name[0] = name ## 16_c; \
1024  c->name[1] = name ## 8x8_c;
1025 
1026  SET_CMP_FUNC(hadamard8_diff)
1027  c->hadamard8_diff[4] = hadamard8_intra16_c;
1028  c->hadamard8_diff[5] = hadamard8_intra8x8_c;
1029  SET_CMP_FUNC(dct_sad)
1030  SET_CMP_FUNC(dct_max)
1031 #if CONFIG_GPL
1032  SET_CMP_FUNC(dct264_sad)
1033 #endif
1034  c->sad[0] = pix_abs16_c;
1035  c->sad[1] = pix_abs8_c;
1036  c->sse[0] = sse16_c;
1037  c->sse[1] = sse8_c;
1038  c->sse[2] = sse4_c;
1039  SET_CMP_FUNC(quant_psnr)
1040  SET_CMP_FUNC(rd)
1041  SET_CMP_FUNC(bit)
1042  c->vsad[0] = vsad16_c;
1043  c->vsad[1] = vsad8_c;
1044  c->vsad[4] = vsad_intra16_c;
1045  c->vsad[5] = vsad_intra8_c;
1046  c->vsse[0] = vsse16_c;
1047  c->vsse[1] = vsse8_c;
1048  c->vsse[4] = vsse_intra16_c;
1049  c->vsse[5] = vsse_intra8_c;
1050  c->nsse[0] = nsse16_c;
1051  c->nsse[1] = nsse8_c;
1052 #if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER
1054 #endif
1055 
1056  c->median_sad[0] = pix_median_abs16_c;
1057  c->median_sad[1] = pix_median_abs8_c;
1058 
1059 #if ARCH_AARCH64
1060  ff_me_cmp_init_aarch64(c, avctx);
1061 #elif ARCH_ALPHA
1062  ff_me_cmp_init_alpha(c, avctx);
1063 #elif ARCH_ARM
1064  ff_me_cmp_init_arm(c, avctx);
1065 #elif ARCH_PPC
1066  ff_me_cmp_init_ppc(c, avctx);
1067 #elif ARCH_RISCV
1068  ff_me_cmp_init_riscv(c, avctx);
1069 #elif ARCH_X86
1070  ff_me_cmp_init_x86(c, avctx);
1071 #elif ARCH_MIPS
1072  ff_me_cmp_init_mips(c, avctx);
1073 #endif
1074 
1075 }
sum_abs_dctelem_c
static int sum_abs_dctelem_c(const int16_t *block)
Definition: me_cmp.c:138
level
uint8_t level
Definition: svq3.c:204
pix_abs8_y2_c
static int pix_abs8_y2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:376
pix_median_abs8_c
static int pix_median_abs8_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:325
nsse16_c
static int nsse16_c(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h)
Definition: me_cmp.c:420
mem_internal.h
sse8_c
static int sse8_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:87
pix_abs8_x2_c
static int pix_abs8_x2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:356
src1
const pixel * src1
Definition: h264pred_template.c:421
mpegvideoenc.h
VSSE_INTRA
#define VSSE_INTRA(size)
Definition: me_cmp.c:940
b
#define b
Definition: input.c:41
avg2
#define avg2(a, b)
Definition: me_cmp.c:147
copy_block8
static void copy_block8(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)
Definition: copy_block.h:47
ff_me_cmp_init_x86
void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
BUTTERFLYA
#define BUTTERFLYA(x, y)
Definition: me_cmp.c:559
pix_abs16_x2_c
static int pix_abs16_x2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:217
bit
#define bit(string, value)
Definition: cbs_mpeg2.c:56
FF_CMP_VSSE
#define FF_CMP_VSSE
Definition: avcodec.h:904
pix_abs8_xy2_c
static int pix_abs8_xy2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:398
dummy
int dummy
Definition: motion.c:66
sse4_c
static int sse4_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:70
ff_me_cmp_init
av_cold void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx)
Definition: me_cmp.c:1008
pix_abs16_c
static int pix_abs16_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:150
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
FF_CMP_SSE
#define FF_CMP_SSE
Definition: avcodec.h:896
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
av_cold
#define av_cold
Definition: attributes.h:90
FF_CMP_BIT
#define FF_CMP_BIT
Definition: avcodec.h:900
pix_abs16_y2_c
static int pix_abs16_y2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:245
s
#define s(width, name)
Definition: cbs_vp9.c:198
s1
#define s1
Definition: regdef.h:38
bits
uint8_t bits
Definition: vp3data.h:128
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:150
pix_median_abs16_c
static int pix_median_abs16_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:178
simple_idct.h
zero_cmp
static int zero_cmp(MpegEncContext *s, const uint8_t *a, const uint8_t *b, ptrdiff_t stride, int h)
Definition: me_cmp.c:470
pix_abs16_xy2_c
static int pix_abs16_xy2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:275
VSAD_INTRA
#define VSAD_INTRA(size)
Definition: me_cmp.c:898
cmp
static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby, const int size, const int h, int ref_index, int src_index, me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags)
compares a block (either a full macroblock or a partition thereof) against a proposed motion-compensa...
Definition: motion_est.c:262
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
MECmpContext
Definition: me_cmp.h:55
NULL
#define NULL
Definition: coverity.c:32
run
uint8_t run
Definition: svq3.c:203
VSSE
#define VSSE(size)
Definition: me_cmp.c:962
abs
#define abs(x)
Definition: cuda_runtime.h:35
FF_CMP_MEDIAN_SAD
#define FF_CMP_MEDIAN_SAD
Definition: avcodec.h:910
hadamard8_intra8x8_c
static int hadamard8_intra8x8_c(MpegEncContext *s, const uint8_t *src, const uint8_t *dummy, ptrdiff_t stride, int h)
Definition: me_cmp.c:611
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_me_cmp_init_mips
void ff_me_cmp_init_mips(MECmpContext *c, AVCodecContext *avctx)
Definition: me_cmp_init_mips.c:25
s2
#define s2
Definition: regdef.h:39
FF_CMP_PSNR
#define FF_CMP_PSNR
Definition: avcodec.h:899
FF_CMP_W53
#define FF_CMP_W53
Definition: avcodec.h:906
pix_abs8_c
static int pix_abs8_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:305
quant_psnr8x8_c
static int quant_psnr8x8_c(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2, ptrdiff_t stride, int h)
Definition: me_cmp.c:740
ff_me_cmp_init_alpha
av_cold void ff_me_cmp_init_alpha(MECmpContext *c, AVCodecContext *avctx)
Definition: me_cmp_alpha.c:267
FF_CMP_SATD
#define FF_CMP_SATD
Definition: avcodec.h:897
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
FF_CMP_ZERO
#define FF_CMP_ZERO
Definition: avcodec.h:902
attributes.h
FF_CMP_SAD
#define FF_CMP_SAD
Definition: avcodec.h:895
BUTTERFLY1
#define BUTTERFLY1(x, y)
Definition: me_cmp.c:550
SRC
#define SRC(x, y)
Definition: h264pred_template.c:825
UNI_AC_ENC_INDEX
#define UNI_AC_ENC_INDEX(run, level)
Definition: mpegvideoenc.h:36
hadamard8_diff8x8_c
static int hadamard8_diff8x8_c(MpegEncContext *s, const uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h)
Definition: me_cmp.c:561
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:67
ff_me_cmp_init_aarch64
av_cold void ff_me_cmp_init_aarch64(MECmpContext *c, AVCodecContext *avctx)
Definition: me_cmp_init_aarch64.c:80
ff_simple_idct_int16_8bit
void ff_simple_idct_int16_8bit(int16_t *block)
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
copy_block.h
FF_CMP_RD
#define FF_CMP_RD
Definition: avcodec.h:901
internal.h
ff_square_tab
const uint32_t ff_square_tab[512]
Definition: me_cmp.c:35
src2
const pixel * src2
Definition: h264pred_template.c:422
FF_CMP_NSSE
#define FF_CMP_NSSE
Definition: avcodec.h:905
dct
static void dct(AudioRNNContext *s, float *out, const float *in)
Definition: af_arnndn.c:1011
dct_max8x8_c
static int dct_max8x8_c(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2, ptrdiff_t stride, int h)
Definition: me_cmp.c:725
avcodec.h
stride
#define stride
Definition: h264pred_template.c:537
mid_pred
#define mid_pred
Definition: mathops.h:98
ret
ret
Definition: filter_design.txt:187
me_cmp_func
int(* me_cmp_func)(struct MpegEncContext *c, const uint8_t *blk1, const uint8_t *blk2, ptrdiff_t stride, int h)
Definition: me_cmp.h:50
me_cmp.h
AVCodecContext
main external API structure.
Definition: avcodec.h:445
WRAPPER8_16_SQ
#define WRAPPER8_16_SQ(name8, name16)
Definition: me_cmp.c:980
sse16_c
static int sse16_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:108
ff_me_cmp_init_riscv
void ff_me_cmp_init_riscv(MECmpContext *c, AVCodecContext *avctx)
Definition: me_cmp_init.c:80
FF_CMP_DCT
#define FF_CMP_DCT
Definition: avcodec.h:898
avg4
#define avg4(a, b, c, d)
Definition: me_cmp.c:148
temp
else temp
Definition: vf_mcdeint.c:263
dct_sad8x8_c
static int dct_sad8x8_c(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2, ptrdiff_t stride, int h)
Definition: me_cmp.c:661
DCT8_1D
#define DCT8_1D(src, srcstride, dst, dststride)
Definition: h264dsp.c:93
ff_me_cmp_init_arm
av_cold void ff_me_cmp_init_arm(MECmpContext *c, AVCodecContext *avctx)
Definition: me_cmp_init_arm.c:41
DST
#define DST(x, y)
Definition: vp9dsp_template.c:781
ff_set_cmp
int ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type)
Definition: me_cmp.c:476
rd8x8_c
static int rd8x8_c(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2, ptrdiff_t stride, int h)
Definition: me_cmp.c:764
ff_me_cmp_init_ppc
av_cold void ff_me_cmp_init_ppc(MECmpContext *c, AVCodecContext *avctx)
Definition: me_cmp.c:726
ff_dsputil_init_dwt
void ff_dsputil_init_dwt(MECmpContext *c)
Definition: snow_dwt.c:842
FF_CMP_DCTMAX
#define FF_CMP_DCTMAX
Definition: avcodec.h:908
FF_CMP_DCT264
#define FF_CMP_DCT264
Definition: avcodec.h:909
FF_CMP_VSAD
#define FF_CMP_VSAD
Definition: avcodec.h:903
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
SET_CMP_FUNC
#define SET_CMP_FUNC(name)
nsse8_c
static int nsse8_c(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h)
Definition: me_cmp.c:445
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
V
#define V(x)
h
h
Definition: vp9dsp_template.c:2038
VSAD
#define VSAD(size)
Definition: me_cmp.c:920
bit8x8_c
static int bit8x8_c(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2, ptrdiff_t stride, int h)
Definition: me_cmp.c:839
BUTTERFLY2
#define BUTTERFLY2(o1, o2, i1, i2)
Definition: me_cmp.c:546
MpegEncContext
MpegEncContext.
Definition: mpegvideo.h:67
FF_CMP_W97
#define FF_CMP_W97
Definition: avcodec.h:907