Go to the documentation of this file.
38 #define PIXOP2(OPNAME, OP) \
39 static inline void OPNAME ## _no_rnd_pixels8_l2_8(uint8_t *dst, \
40 const uint8_t *src1, \
41 const uint8_t *src2, \
49 for (i = 0; i < h; i++) { \
51 a = AV_RN32(&src1[i * src_stride1]); \
52 b = AV_RN32(&src2[i * src_stride2]); \
53 OP(*((uint32_t *) &dst[i * dst_stride]), \
54 no_rnd_avg32(a, b)); \
55 a = AV_RN32(&src1[i * src_stride1 + 4]); \
56 b = AV_RN32(&src2[i * src_stride2 + 4]); \
57 OP(*((uint32_t *) &dst[i * dst_stride + 4]), \
58 no_rnd_avg32(a, b)); \
62 static inline void OPNAME ## _no_rnd_pixels8_x2_8_c(uint8_t *block, \
63 const uint8_t *pixels, \
64 ptrdiff_t line_size, \
67 OPNAME ## _no_rnd_pixels8_l2_8(block, pixels, pixels + 1, \
68 line_size, line_size, line_size, h); \
71 static inline void OPNAME ## _pixels8_x2_8_c(uint8_t *block, \
72 const uint8_t *pixels, \
73 ptrdiff_t line_size, \
76 OPNAME ## _pixels8_l2_8(block, pixels, pixels + 1, \
77 line_size, line_size, line_size, h); \
80 static inline void OPNAME ## _no_rnd_pixels8_y2_8_c(uint8_t *block, \
81 const uint8_t *pixels, \
82 ptrdiff_t line_size, \
85 OPNAME ## _no_rnd_pixels8_l2_8(block, pixels, pixels + line_size, \
86 line_size, line_size, line_size, h); \
89 static inline void OPNAME ## _pixels8_y2_8_c(uint8_t *block, \
90 const uint8_t *pixels, \
91 ptrdiff_t line_size, \
94 OPNAME ## _pixels8_l2_8(block, pixels, pixels + line_size, \
95 line_size, line_size, line_size, h); \
98 static inline void OPNAME ## _pixels4_x2_8_c(uint8_t *block, \
99 const uint8_t *pixels, \
100 ptrdiff_t line_size, \
103 OPNAME ## _pixels4_l2_8(block, pixels, pixels + 1, \
104 line_size, line_size, line_size, h); \
107 static inline void OPNAME ## _pixels4_y2_8_c(uint8_t *block, \
108 const uint8_t *pixels, \
109 ptrdiff_t line_size, \
112 OPNAME ## _pixels4_l2_8(block, pixels, pixels + line_size, \
113 line_size, line_size, line_size, h); \
116 static inline void OPNAME ## _pixels2_x2_8_c(uint8_t *block, \
117 const uint8_t *pixels, \
118 ptrdiff_t line_size, \
121 OPNAME ## _pixels2_l2_8(block, pixels, pixels + 1, \
122 line_size, line_size, line_size, h); \
125 static inline void OPNAME ## _pixels2_y2_8_c(uint8_t *block, \
126 const uint8_t *pixels, \
127 ptrdiff_t line_size, \
130 OPNAME ## _pixels2_l2_8(block, pixels, pixels + line_size, \
131 line_size, line_size, line_size, h); \
134 static inline void OPNAME ## _pixels2_xy2_8_c(uint8_t *block, \
135 const uint8_t *pixels, \
136 ptrdiff_t line_size, \
140 int a0 = pixels[0]; \
141 int b0 = pixels[1] + 2; \
145 pixels += line_size; \
146 for (i = 0; i < h; i += 2) { \
152 block[0] = (a1 + a0) >> 2; \
153 block[1] = (b1 + b0) >> 2; \
155 pixels += line_size; \
156 block += line_size; \
159 b0 = pixels[1] + 2; \
163 block[0] = (a1 + a0) >> 2; \
164 block[1] = (b1 + b0) >> 2; \
165 pixels += line_size; \
166 block += line_size; \
170 static inline void OPNAME ## _pixels4_xy2_8_c(uint8_t *block, \
171 const uint8_t *pixels, \
172 ptrdiff_t line_size, \
177 const uint32_t a = AV_RN32(pixels); \
178 const uint32_t b = AV_RN32(pixels + 1); \
179 uint32_t l0 = (a & 0x03030303UL) + \
180 (b & 0x03030303UL) + \
182 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
183 ((b & 0xFCFCFCFCUL) >> 2); \
186 pixels += line_size; \
187 for (i = 0; i < h; i += 2) { \
188 uint32_t a = AV_RN32(pixels); \
189 uint32_t b = AV_RN32(pixels + 1); \
190 l1 = (a & 0x03030303UL) + \
191 (b & 0x03030303UL); \
192 h1 = ((a & 0xFCFCFCFCUL) >> 2) + \
193 ((b & 0xFCFCFCFCUL) >> 2); \
194 OP(*((uint32_t *) block), h0 + h1 + \
195 (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
196 pixels += line_size; \
197 block += line_size; \
198 a = AV_RN32(pixels); \
199 b = AV_RN32(pixels + 1); \
200 l0 = (a & 0x03030303UL) + \
201 (b & 0x03030303UL) + \
203 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
204 ((b & 0xFCFCFCFCUL) >> 2); \
205 OP(*((uint32_t *) block), h0 + h1 + \
206 (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
207 pixels += line_size; \
208 block += line_size; \
212 static inline void OPNAME ## _pixels8_xy2_8_c(uint8_t *block, \
213 const uint8_t *pixels, \
214 ptrdiff_t line_size, \
220 for (j = 0; j < 2; j++) { \
222 const uint32_t a = AV_RN32(pixels); \
223 const uint32_t b = AV_RN32(pixels + 1); \
224 uint32_t l0 = (a & 0x03030303UL) + \
225 (b & 0x03030303UL) + \
227 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
228 ((b & 0xFCFCFCFCUL) >> 2); \
231 pixels += line_size; \
232 for (i = 0; i < h; i += 2) { \
233 uint32_t a = AV_RN32(pixels); \
234 uint32_t b = AV_RN32(pixels + 1); \
235 l1 = (a & 0x03030303UL) + \
236 (b & 0x03030303UL); \
237 h1 = ((a & 0xFCFCFCFCUL) >> 2) + \
238 ((b & 0xFCFCFCFCUL) >> 2); \
239 OP(*((uint32_t *) block), h0 + h1 + \
240 (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
241 pixels += line_size; \
242 block += line_size; \
243 a = AV_RN32(pixels); \
244 b = AV_RN32(pixels + 1); \
245 l0 = (a & 0x03030303UL) + \
246 (b & 0x03030303UL) + \
248 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
249 ((b & 0xFCFCFCFCUL) >> 2); \
250 OP(*((uint32_t *) block), h0 + h1 + \
251 (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
252 pixels += line_size; \
253 block += line_size; \
255 pixels += 4 - line_size * (h + 1); \
256 block += 4 - line_size * h; \
260 static inline void OPNAME ## _no_rnd_pixels8_xy2_8_c(uint8_t *block, \
261 const uint8_t *pixels, \
262 ptrdiff_t line_size, \
268 for (j = 0; j < 2; j++) { \
270 const uint32_t a = AV_RN32(pixels); \
271 const uint32_t b = AV_RN32(pixels + 1); \
272 uint32_t l0 = (a & 0x03030303UL) + \
273 (b & 0x03030303UL) + \
275 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
276 ((b & 0xFCFCFCFCUL) >> 2); \
279 pixels += line_size; \
280 for (i = 0; i < h; i += 2) { \
281 uint32_t a = AV_RN32(pixels); \
282 uint32_t b = AV_RN32(pixels + 1); \
283 l1 = (a & 0x03030303UL) + \
284 (b & 0x03030303UL); \
285 h1 = ((a & 0xFCFCFCFCUL) >> 2) + \
286 ((b & 0xFCFCFCFCUL) >> 2); \
287 OP(*((uint32_t *) block), h0 + h1 + \
288 (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
289 pixels += line_size; \
290 block += line_size; \
291 a = AV_RN32(pixels); \
292 b = AV_RN32(pixels + 1); \
293 l0 = (a & 0x03030303UL) + \
294 (b & 0x03030303UL) + \
296 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
297 ((b & 0xFCFCFCFCUL) >> 2); \
298 OP(*((uint32_t *) block), h0 + h1 + \
299 (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
300 pixels += line_size; \
301 block += line_size; \
303 pixels += 4 - line_size * (h + 1); \
304 block += 4 - line_size * h; \
308 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_8_c, \
309 OPNAME ## _pixels8_x2_8_c, \
311 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_8_c, \
312 OPNAME ## _pixels8_y2_8_c, \
314 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_8_c, \
315 OPNAME ## _pixels8_xy2_8_c, \
317 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_8_c, \
318 OPNAME ## _pixels8_8_c, \
320 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_8_c, \
321 OPNAME ## _no_rnd_pixels8_x2_8_c, \
323 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_8_c, \
324 OPNAME ## _no_rnd_pixels8_y2_8_c, \
326 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_8_c, \
327 OPNAME ## _no_rnd_pixels8_xy2_8_c, \
330 #define op_avg(a, b) a = rnd_avg32(a, b)
331 #define op_put(a, b) a = b
332 #define put_no_rnd_pixels8_8_c put_pixels8_8_c
340 #define hpel_funcs(prefix, idx, num) \
341 c->prefix ## _pixels_tab idx [0] = prefix ## _pixels ## num ## _8_c; \
342 c->prefix ## _pixels_tab idx [1] = prefix ## _pixels ## num ## _x2_8_c; \
343 c->prefix ## _pixels_tab idx [2] = prefix ## _pixels ## num ## _y2_8_c; \
344 c->prefix ## _pixels_tab idx [3] = prefix ## _pixels ## num ## _xy2_8_c
370 #elif ARCH_LOONGARCH64
void ff_hpeldsp_init_loongarch(HpelDSPContext *c, int flags)
av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags)
av_cold void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
av_cold void ff_hpeldsp_init_aarch64(HpelDSPContext *c, int flags)
void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags)
#define hpel_funcs(prefix, idx, num)
void ff_hpeldsp_init_mips(HpelDSPContext *c, int flags)
av_cold void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags)
#define PIXOP2(OPNAME, OP)
#define flags(name, subs,...)