30 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
31 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
33 #define YSCALEYUV2PACKEDX_UV \
35 "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
39 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d" \n\t"\
40 "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
41 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
42 "movq %%mm3, %%mm4 \n\t"\
45 "movq 8(%%"FF_REG_d"), %%mm0 \n\t" \
46 "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm2 \n\t" \
47 "add %6, %%"FF_REG_S" \n\t" \
48 "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm5 \n\t" \
49 "add $16, %%"FF_REG_d" \n\t"\
50 "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
51 "pmulhw %%mm0, %%mm2 \n\t"\
52 "pmulhw %%mm0, %%mm5 \n\t"\
53 "paddw %%mm2, %%mm3 \n\t"\
54 "paddw %%mm5, %%mm4 \n\t"\
55 "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
58 #define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
59 "lea "offset"(%0), %%"FF_REG_d" \n\t"\
60 "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
61 "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
62 "movq "#dst1", "#dst2" \n\t"\
65 "movq 8(%%"FF_REG_d"), "#coeff" \n\t" \
66 "movq (%%"FF_REG_S", %%"FF_REG_a", 2), "#src1" \n\t" \
67 "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), "#src2" \n\t" \
68 "add $16, %%"FF_REG_d" \n\t"\
69 "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
70 "pmulhw "#coeff", "#src1" \n\t"\
71 "pmulhw "#coeff", "#src2" \n\t"\
72 "paddw "#src1", "#dst1" \n\t"\
73 "paddw "#src2", "#dst2" \n\t"\
74 "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
77 #define YSCALEYUV2PACKEDX \
78 YSCALEYUV2PACKEDX_UV \
79 YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
81 #define YSCALEYUV2PACKEDX_END \
82 :: "r" (&c->redDither), \
83 "m" (dummy), "m" (dummy), "m" (dummy),\
84 "r" (dest), "m" (dstW_reg), "m"(uv_off) \
85 NAMED_CONSTRAINTS_ADD(bF8,bFC) \
86 : "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_S \
89 #define YSCALEYUV2PACKEDX_ACCURATE_UV \
91 "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
95 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d" \n\t"\
96 "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
97 "pxor %%mm4, %%mm4 \n\t"\
98 "pxor %%mm5, %%mm5 \n\t"\
99 "pxor %%mm6, %%mm6 \n\t"\
100 "pxor %%mm7, %%mm7 \n\t"\
103 "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm0 \n\t" \
104 "add %6, %%"FF_REG_S" \n\t" \
105 "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm2 \n\t" \
106 "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
107 "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm1 \n\t" \
108 "movq %%mm0, %%mm3 \n\t"\
109 "punpcklwd %%mm1, %%mm0 \n\t"\
110 "punpckhwd %%mm1, %%mm3 \n\t"\
111 "movq "STR(APCK_COEF)"(%%"FF_REG_d"),%%mm1 \n\t" \
112 "pmaddwd %%mm1, %%mm0 \n\t"\
113 "pmaddwd %%mm1, %%mm3 \n\t"\
114 "paddd %%mm0, %%mm4 \n\t"\
115 "paddd %%mm3, %%mm5 \n\t"\
116 "add %6, %%"FF_REG_S" \n\t" \
117 "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm3 \n\t" \
118 "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
119 "add $"STR(APCK_SIZE)", %%"FF_REG_d" \n\t"\
120 "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
121 "movq %%mm2, %%mm0 \n\t"\
122 "punpcklwd %%mm3, %%mm2 \n\t"\
123 "punpckhwd %%mm3, %%mm0 \n\t"\
124 "pmaddwd %%mm1, %%mm2 \n\t"\
125 "pmaddwd %%mm1, %%mm0 \n\t"\
126 "paddd %%mm2, %%mm6 \n\t"\
127 "paddd %%mm0, %%mm7 \n\t"\
129 "psrad $16, %%mm4 \n\t"\
130 "psrad $16, %%mm5 \n\t"\
131 "psrad $16, %%mm6 \n\t"\
132 "psrad $16, %%mm7 \n\t"\
133 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
134 "packssdw %%mm5, %%mm4 \n\t"\
135 "packssdw %%mm7, %%mm6 \n\t"\
136 "paddw %%mm0, %%mm4 \n\t"\
137 "paddw %%mm0, %%mm6 \n\t"\
138 "movq %%mm4, "U_TEMP"(%0) \n\t"\
139 "movq %%mm6, "V_TEMP"(%0) \n\t"\
141 #define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
142 "lea "offset"(%0), %%"FF_REG_d" \n\t"\
143 "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
144 "pxor %%mm1, %%mm1 \n\t"\
145 "pxor %%mm5, %%mm5 \n\t"\
146 "pxor %%mm7, %%mm7 \n\t"\
147 "pxor %%mm6, %%mm6 \n\t"\
150 "movq (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm0 \n\t" \
151 "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm2 \n\t" \
152 "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
153 "movq (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm4 \n\t" \
154 "movq %%mm0, %%mm3 \n\t"\
155 "punpcklwd %%mm4, %%mm0 \n\t"\
156 "punpckhwd %%mm4, %%mm3 \n\t"\
157 "movq "STR(APCK_COEF)"(%%"FF_REG_d"), %%mm4 \n\t" \
158 "pmaddwd %%mm4, %%mm0 \n\t"\
159 "pmaddwd %%mm4, %%mm3 \n\t"\
160 "paddd %%mm0, %%mm1 \n\t"\
161 "paddd %%mm3, %%mm5 \n\t"\
162 "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm3 \n\t" \
163 "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
164 "add $"STR(APCK_SIZE)", %%"FF_REG_d" \n\t"\
165 "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
166 "movq %%mm2, %%mm0 \n\t"\
167 "punpcklwd %%mm3, %%mm2 \n\t"\
168 "punpckhwd %%mm3, %%mm0 \n\t"\
169 "pmaddwd %%mm4, %%mm2 \n\t"\
170 "pmaddwd %%mm4, %%mm0 \n\t"\
171 "paddd %%mm2, %%mm7 \n\t"\
172 "paddd %%mm0, %%mm6 \n\t"\
174 "psrad $16, %%mm1 \n\t"\
175 "psrad $16, %%mm5 \n\t"\
176 "psrad $16, %%mm7 \n\t"\
177 "psrad $16, %%mm6 \n\t"\
178 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
179 "packssdw %%mm5, %%mm1 \n\t"\
180 "packssdw %%mm6, %%mm7 \n\t"\
181 "paddw %%mm0, %%mm1 \n\t"\
182 "paddw %%mm0, %%mm7 \n\t"\
183 "movq "U_TEMP"(%0), %%mm3 \n\t"\
184 "movq "V_TEMP"(%0), %%mm4 \n\t"\
186 #define YSCALEYUV2PACKEDX_ACCURATE \
187 YSCALEYUV2PACKEDX_ACCURATE_UV \
188 YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
190 #define YSCALEYUV2RGBX \
191 "psubw "U_OFFSET"(%0), %%mm3 \n\t" \
192 "psubw "V_OFFSET"(%0), %%mm4 \n\t" \
193 "movq %%mm3, %%mm2 \n\t" \
194 "movq %%mm4, %%mm5 \n\t" \
195 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
196 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
198 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
199 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
200 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" \
201 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" \
202 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
203 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
205 "paddw %%mm3, %%mm4 \n\t"\
206 "movq %%mm2, %%mm0 \n\t"\
207 "movq %%mm5, %%mm6 \n\t"\
208 "movq %%mm4, %%mm3 \n\t"\
209 "punpcklwd %%mm2, %%mm2 \n\t"\
210 "punpcklwd %%mm5, %%mm5 \n\t"\
211 "punpcklwd %%mm4, %%mm4 \n\t"\
212 "paddw %%mm1, %%mm2 \n\t"\
213 "paddw %%mm1, %%mm5 \n\t"\
214 "paddw %%mm1, %%mm4 \n\t"\
215 "punpckhwd %%mm0, %%mm0 \n\t"\
216 "punpckhwd %%mm6, %%mm6 \n\t"\
217 "punpckhwd %%mm3, %%mm3 \n\t"\
218 "paddw %%mm7, %%mm0 \n\t"\
219 "paddw %%mm7, %%mm6 \n\t"\
220 "paddw %%mm7, %%mm3 \n\t"\
222 "packuswb %%mm0, %%mm2 \n\t"\
223 "packuswb %%mm6, %%mm5 \n\t"\
224 "packuswb %%mm3, %%mm4 \n\t"\
226 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
227 "movq "#b", "#q2" \n\t" \
228 "movq "#r", "#t" \n\t" \
229 "punpcklbw "#g", "#b" \n\t" \
230 "punpcklbw "#a", "#r" \n\t" \
231 "punpckhbw "#g", "#q2" \n\t" \
232 "punpckhbw "#a", "#t" \n\t" \
233 "movq "#b", "#q0" \n\t" \
234 "movq "#q2", "#q3" \n\t" \
235 "punpcklwd "#r", "#q0" \n\t" \
236 "punpckhwd "#r", "#b" \n\t" \
237 "punpcklwd "#t", "#q2" \n\t" \
238 "punpckhwd "#t", "#q3" \n\t" \
240 MOVNTQ( q0, (dst, index, 4))\
241 MOVNTQ( b, 8(dst, index, 4))\
242 MOVNTQ( q2, 16(dst, index, 4))\
243 MOVNTQ( q3, 24(dst, index, 4))\
245 "add $8, "#index" \n\t"\
246 "cmp "dstw", "#index" \n\t"\
248 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
251 const int16_t **lumSrc,
int lumFilterSize,
252 const int16_t *chrFilter,
const int16_t **chrUSrc,
253 const int16_t **chrVSrc,
254 int chrFilterSize,
const int16_t **alpSrc,
255 uint8_t *dest,
int dstW,
int dstY)
261 if (CONFIG_SWSCALE_ALPHA &&
c->needAlpha) {
264 "movq %%mm2, "U_TEMP"(%0) \n\t"
265 "movq %%mm4, "V_TEMP"(%0) \n\t"
266 "movq %%mm5, "Y_TEMP"(%0) \n\t"
268 "movq "Y_TEMP"(%0), %%mm5 \n\t"
269 "psraw $3, %%mm1 \n\t"
270 "psraw $3, %%mm7 \n\t"
271 "packuswb %%mm7, %%mm1 \n\t"
272 WRITEBGR32(%4,
"%5", %%FF_REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
277 "pcmpeqd %%mm7, %%mm7 \n\t"
278 WRITEBGR32(%4,
"%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
284 const int16_t **lumSrc,
int lumFilterSize,
285 const int16_t *chrFilter,
const int16_t **chrUSrc,
286 const int16_t **chrVSrc,
287 int chrFilterSize,
const int16_t **alpSrc,
288 uint8_t *dest,
int dstW,
int dstY)
294 if (CONFIG_SWSCALE_ALPHA &&
c->needAlpha) {
298 "psraw $3, %%mm1 \n\t"
299 "psraw $3, %%mm7 \n\t"
300 "packuswb %%mm7, %%mm1 \n\t"
301 WRITEBGR32(%4,
"%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
306 "pcmpeqd %%mm7, %%mm7 \n\t"
307 WRITEBGR32(%4,
"%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
313 const int16_t **lumSrc,
int lumFilterSize,
314 const int16_t *chrFilter,
const int16_t **chrUSrc,
315 const int16_t **chrVSrc,
316 int chrFilterSize,
const int16_t **alpSrc,
317 uint8_t *dest,
int dstW,
int dstY)
323 if (CONFIG_SWSCALE_ALPHA &&
c->needAlpha) {
327 "psraw $3, %%mm1 \n\t"
328 "psraw $3, %%mm7 \n\t"
329 "packuswb %%mm7, %%mm1 \n\t"
330 WRITEBGR32(%4,
"%5", %%FF_REGa, %%mm5, %%mm4, %%mm2, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
335 "pcmpeqd %%mm7, %%mm7 \n\t"
336 WRITEBGR32(%4,
"%5", %%FF_REGa, %%mm5, %%mm4, %%mm2, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
341 #define REAL_WRITERGB16(dst, dstw, index) \
342 "pand "MANGLE(bF8)", %%mm2 \n\t" \
343 "pand "MANGLE(bFC)", %%mm4 \n\t" \
344 "pand "MANGLE(bF8)", %%mm5 \n\t" \
345 "psrlq $3, %%mm2 \n\t"\
347 "movq %%mm2, %%mm1 \n\t"\
348 "movq %%mm4, %%mm3 \n\t"\
350 "punpcklbw %%mm7, %%mm3 \n\t"\
351 "punpcklbw %%mm5, %%mm2 \n\t"\
352 "punpckhbw %%mm7, %%mm4 \n\t"\
353 "punpckhbw %%mm5, %%mm1 \n\t"\
355 "psllq $3, %%mm3 \n\t"\
356 "psllq $3, %%mm4 \n\t"\
358 "por %%mm3, %%mm2 \n\t"\
359 "por %%mm4, %%mm1 \n\t"\
361 MOVNTQ(%%mm2, (dst, index, 2))\
362 MOVNTQ(%%mm1, 8(dst, index, 2))\
364 "add $8, "#index" \n\t"\
365 "cmp "dstw", "#index" \n\t"\
367 #define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
370 const int16_t **lumSrc,
int lumFilterSize,
371 const int16_t *chrFilter,
const int16_t **chrUSrc,
372 const int16_t **chrVSrc,
373 int chrFilterSize,
const int16_t **alpSrc,
374 uint8_t *dest,
int dstW,
int dstY)
382 "pxor %%mm7, %%mm7 \n\t"
392 const int16_t **lumSrc,
int lumFilterSize,
393 const int16_t *chrFilter,
const int16_t **chrUSrc,
394 const int16_t **chrVSrc,
395 int chrFilterSize,
const int16_t **alpSrc,
396 uint8_t *dest,
int dstW,
int dstY)
404 "pxor %%mm7, %%mm7 \n\t"
413 #define REAL_WRITERGB15(dst, dstw, index) \
414 "pand "MANGLE(bF8)", %%mm2 \n\t" \
415 "pand "MANGLE(bF8)", %%mm4 \n\t" \
416 "pand "MANGLE(bF8)", %%mm5 \n\t" \
417 "psrlq $3, %%mm2 \n\t"\
418 "psrlq $1, %%mm5 \n\t"\
420 "movq %%mm2, %%mm1 \n\t"\
421 "movq %%mm4, %%mm3 \n\t"\
423 "punpcklbw %%mm7, %%mm3 \n\t"\
424 "punpcklbw %%mm5, %%mm2 \n\t"\
425 "punpckhbw %%mm7, %%mm4 \n\t"\
426 "punpckhbw %%mm5, %%mm1 \n\t"\
428 "psllq $2, %%mm3 \n\t"\
429 "psllq $2, %%mm4 \n\t"\
431 "por %%mm3, %%mm2 \n\t"\
432 "por %%mm4, %%mm1 \n\t"\
434 MOVNTQ(%%mm2, (dst, index, 2))\
435 MOVNTQ(%%mm1, 8(dst, index, 2))\
437 "add $8, "#index" \n\t"\
438 "cmp "dstw", "#index" \n\t"\
440 #define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
443 const int16_t **lumSrc,
int lumFilterSize,
444 const int16_t *chrFilter,
const int16_t **chrUSrc,
445 const int16_t **chrVSrc,
446 int chrFilterSize,
const int16_t **alpSrc,
447 uint8_t *dest,
int dstW,
int dstY)
455 "pxor %%mm7, %%mm7 \n\t"
465 const int16_t **lumSrc,
int lumFilterSize,
466 const int16_t *chrFilter,
const int16_t **chrUSrc,
467 const int16_t **chrVSrc,
468 int chrFilterSize,
const int16_t **alpSrc,
469 uint8_t *dest,
int dstW,
int dstY)
477 "pxor %%mm7, %%mm7 \n\t"
486 #define WRITEBGR24MMXEXT(dst, dstw, index) \
488 "movq "MANGLE(M24A)", %%mm0 \n\t"\
489 "movq "MANGLE(M24C)", %%mm7 \n\t"\
490 "pshufw $0x50, %%mm2, %%mm1 \n\t" \
491 "pshufw $0x50, %%mm4, %%mm3 \n\t" \
492 "pshufw $0x00, %%mm5, %%mm6 \n\t" \
494 "pand %%mm0, %%mm1 \n\t" \
495 "pand %%mm0, %%mm3 \n\t" \
496 "pand %%mm7, %%mm6 \n\t" \
498 "psllq $8, %%mm3 \n\t" \
499 "por %%mm1, %%mm6 \n\t"\
500 "por %%mm3, %%mm6 \n\t"\
501 MOVNTQ(%%mm6, (dst))\
503 "psrlq $8, %%mm4 \n\t" \
504 "pshufw $0xA5, %%mm2, %%mm1 \n\t" \
505 "pshufw $0x55, %%mm4, %%mm3 \n\t" \
506 "pshufw $0xA5, %%mm5, %%mm6 \n\t" \
508 "pand "MANGLE(M24B)", %%mm1 \n\t" \
509 "pand %%mm7, %%mm3 \n\t" \
510 "pand %%mm0, %%mm6 \n\t" \
512 "por %%mm1, %%mm3 \n\t" \
513 "por %%mm3, %%mm6 \n\t"\
514 MOVNTQ(%%mm6, 8(dst))\
516 "pshufw $0xFF, %%mm2, %%mm1 \n\t" \
517 "pshufw $0xFA, %%mm4, %%mm3 \n\t" \
518 "pshufw $0xFA, %%mm5, %%mm6 \n\t" \
520 "pand %%mm7, %%mm1 \n\t" \
521 "pand %%mm0, %%mm3 \n\t" \
522 "pand "MANGLE(M24B)", %%mm6 \n\t" \
524 "por %%mm1, %%mm3 \n\t"\
525 "por %%mm3, %%mm6 \n\t"\
526 MOVNTQ(%%mm6, 16(dst))\
528 "add $24, "#dst" \n\t"\
530 "add $8, "#index" \n\t"\
531 "cmp "dstw", "#index" \n\t"\
535 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMXEXT(dst, dstw, index)
539 const int16_t **lumSrc,
int lumFilterSize,
540 const int16_t *chrFilter,
const int16_t **chrUSrc,
541 const int16_t **chrVSrc,
542 int chrFilterSize,
const int16_t **alpSrc,
543 uint8_t *dest,
int dstW,
int dstY)
551 "pxor %%mm7, %%mm7 \n\t"
552 "lea (%%"FF_REG_a
", %%"FF_REG_a
", 2), %%"FF_REG_c
"\n\t"
553 "add %4, %%"FF_REG_c
" \n\t"
555 ::
"r" (&
c->redDither),
557 "r" (dest),
"m" (dstW_reg),
"m"(uv_off)
559 :
"%"FF_REG_a,
"%"FF_REG_c,
"%"FF_REG_d,
"%"FF_REG_S
564 const int16_t **lumSrc,
int lumFilterSize,
565 const int16_t *chrFilter,
const int16_t **chrUSrc,
566 const int16_t **chrVSrc,
567 int chrFilterSize,
const int16_t **alpSrc,
568 uint8_t *dest,
int dstW,
int dstY)
576 "pxor %%mm7, %%mm7 \n\t"
577 "lea (%%"FF_REG_a
", %%"FF_REG_a
", 2), %%"FF_REG_c
" \n\t"
578 "add %4, %%"FF_REG_c
" \n\t"
580 ::
"r" (&
c->redDither),
582 "r" (dest),
"m" (dstW_reg),
"m"(uv_off)
584 :
"%"FF_REG_a,
"%"FF_REG_c,
"%"FF_REG_d,
"%"FF_REG_S
589 #define REAL_WRITEYUY2(dst, dstw, index) \
590 "packuswb %%mm3, %%mm3 \n\t"\
591 "packuswb %%mm4, %%mm4 \n\t"\
592 "packuswb %%mm7, %%mm1 \n\t"\
593 "punpcklbw %%mm4, %%mm3 \n\t"\
594 "movq %%mm1, %%mm7 \n\t"\
595 "punpcklbw %%mm3, %%mm1 \n\t"\
596 "punpckhbw %%mm3, %%mm7 \n\t"\
598 MOVNTQ(%%mm1, (dst, index, 2))\
599 MOVNTQ(%%mm7, 8(dst, index, 2))\
601 "add $8, "#index" \n\t"\
602 "cmp "dstw", "#index" \n\t"\
604 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
607 const int16_t **lumSrc,
int lumFilterSize,
608 const int16_t *chrFilter,
const int16_t **chrUSrc,
609 const int16_t **chrVSrc,
610 int chrFilterSize,
const int16_t **alpSrc,
611 uint8_t *dest,
int dstW,
int dstY)
619 "psraw $3, %%mm3 \n\t"
620 "psraw $3, %%mm4 \n\t"
621 "psraw $3, %%mm1 \n\t"
622 "psraw $3, %%mm7 \n\t"
628 const int16_t **lumSrc,
int lumFilterSize,
629 const int16_t *chrFilter,
const int16_t **chrUSrc,
630 const int16_t **chrVSrc,
631 int chrFilterSize,
const int16_t **alpSrc,
632 uint8_t *dest,
int dstW,
int dstY)
640 "psraw $3, %%mm3 \n\t"
641 "psraw $3, %%mm4 \n\t"
642 "psraw $3, %%mm1 \n\t"
643 "psraw $3, %%mm7 \n\t"
648 #define REAL_YSCALEYUV2RGB_UV(index, c) \
649 "xor "#index", "#index" \n\t"\
652 "movq (%2, "#index"), %%mm2 \n\t" \
653 "movq (%3, "#index"), %%mm3 \n\t" \
654 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
655 "movq (%2, "#index"), %%mm5 \n\t" \
656 "movq (%3, "#index"), %%mm4 \n\t" \
657 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
658 "psubw %%mm3, %%mm2 \n\t" \
659 "psubw %%mm4, %%mm5 \n\t" \
660 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
661 "pmulhw %%mm0, %%mm2 \n\t" \
662 "pmulhw %%mm0, %%mm5 \n\t" \
663 "psraw $4, %%mm3 \n\t" \
664 "psraw $4, %%mm4 \n\t" \
665 "paddw %%mm2, %%mm3 \n\t" \
666 "paddw %%mm5, %%mm4 \n\t" \
667 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
668 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
669 "movq %%mm3, %%mm2 \n\t" \
670 "movq %%mm4, %%mm5 \n\t" \
671 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
672 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
675 #define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
676 "movq ("#b1", "#index", 2), %%mm0 \n\t" \
677 "movq ("#b2", "#index", 2), %%mm1 \n\t" \
678 "movq 8("#b1", "#index", 2), %%mm6 \n\t" \
679 "movq 8("#b2", "#index", 2), %%mm7 \n\t" \
680 "psubw %%mm1, %%mm0 \n\t" \
681 "psubw %%mm7, %%mm6 \n\t" \
682 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
683 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
684 "psraw $4, %%mm1 \n\t" \
685 "psraw $4, %%mm7 \n\t" \
686 "paddw %%mm0, %%mm1 \n\t" \
687 "paddw %%mm6, %%mm7 \n\t" \
689 #define REAL_YSCALEYUV2RGB_COEFF(c) \
690 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
691 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
692 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
693 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
694 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
695 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
697 "paddw %%mm3, %%mm4 \n\t"\
698 "movq %%mm2, %%mm0 \n\t"\
699 "movq %%mm5, %%mm6 \n\t"\
700 "movq %%mm4, %%mm3 \n\t"\
701 "punpcklwd %%mm2, %%mm2 \n\t"\
702 "punpcklwd %%mm5, %%mm5 \n\t"\
703 "punpcklwd %%mm4, %%mm4 \n\t"\
704 "paddw %%mm1, %%mm2 \n\t"\
705 "paddw %%mm1, %%mm5 \n\t"\
706 "paddw %%mm1, %%mm4 \n\t"\
707 "punpckhwd %%mm0, %%mm0 \n\t"\
708 "punpckhwd %%mm6, %%mm6 \n\t"\
709 "punpckhwd %%mm3, %%mm3 \n\t"\
710 "paddw %%mm7, %%mm0 \n\t"\
711 "paddw %%mm7, %%mm6 \n\t"\
712 "paddw %%mm7, %%mm3 \n\t"\
714 "packuswb %%mm0, %%mm2 \n\t"\
715 "packuswb %%mm6, %%mm5 \n\t"\
716 "packuswb %%mm3, %%mm4 \n\t"\
718 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
720 #define YSCALEYUV2RGB(index, c) \
721 REAL_YSCALEYUV2RGB_UV(index, c) \
722 REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
723 REAL_YSCALEYUV2RGB_COEFF(c)
729 const int16_t *ubuf[2],
const int16_t *vbuf[2],
730 const int16_t *abuf[2], uint8_t *dest,
731 int dstW,
int yalpha,
int uvalpha,
int y)
733 const int16_t *buf0 = buf[0], *buf1 = buf[1],
734 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
736 if (CONFIG_SWSCALE_ALPHA &&
c->needAlpha) {
737 const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
742 "psraw $3, %%mm1 \n\t"
743 "psraw $3, %%mm7 \n\t"
744 "packuswb %%mm7, %%mm1 \n\t"
745 WRITEBGR32(%4,
DSTW_OFFSET"(%5)", %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
746 ::
"c" (buf0),
"d" (buf1),
"S" (ubuf0),
"D" (ubuf1),
"r" (dest),
748 "r" (abuf0),
"r" (abuf1)
752 c->u_temp=(intptr_t)abuf0;
753 c->v_temp=(intptr_t)abuf1;
756 "mov %4, %%"FF_REG_b
" \n\t"
757 "push %%"FF_REG_BP
" \n\t"
761 "mov "U_TEMP"(%5), %0 \n\t"
762 "mov "V_TEMP"(%5), %1 \n\t"
764 "psraw $3, %%mm1 \n\t"
765 "psraw $3, %%mm7 \n\t"
766 "packuswb %%mm7, %%mm1 \n\t"
769 WRITEBGR32(%%FF_REGb,
DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
770 "pop %%"FF_REG_BP
" \n\t"
772 ::
"c" (buf0),
"d" (buf1),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
779 "mov %4, %%"FF_REG_b
" \n\t"
780 "push %%"FF_REG_BP
" \n\t"
782 "pcmpeqd %%mm7, %%mm7 \n\t"
783 WRITEBGR32(%%FF_REGb,
DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
784 "pop %%"FF_REG_BP
" \n\t"
786 ::
"c" (buf0),
"d" (buf1),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
793 const int16_t *ubuf[2],
const int16_t *vbuf[2],
794 const int16_t *abuf[2], uint8_t *dest,
795 int dstW,
int yalpha,
int uvalpha,
int y)
797 const int16_t *buf0 = buf[0], *buf1 = buf[1],
798 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
802 "mov %4, %%"FF_REG_b
" \n\t"
803 "push %%"FF_REG_BP
" \n\t"
805 "pxor %%mm7, %%mm7 \n\t"
807 "pop %%"FF_REG_BP
" \n\t"
809 ::
"c" (buf0),
"d" (buf1),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
816 const int16_t *ubuf[2],
const int16_t *vbuf[2],
817 const int16_t *abuf[2], uint8_t *dest,
818 int dstW,
int yalpha,
int uvalpha,
int y)
820 const int16_t *buf0 = buf[0], *buf1 = buf[1],
821 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
825 "mov %4, %%"FF_REG_b
" \n\t"
826 "push %%"FF_REG_BP
" \n\t"
828 "pxor %%mm7, %%mm7 \n\t"
834 "pop %%"FF_REG_BP
" \n\t"
836 ::
"c" (buf0),
"d" (buf1),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
843 const int16_t *ubuf[2],
const int16_t *vbuf[2],
844 const int16_t *abuf[2], uint8_t *dest,
845 int dstW,
int yalpha,
int uvalpha,
int y)
847 const int16_t *buf0 = buf[0], *buf1 = buf[1],
848 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
852 "mov %4, %%"FF_REG_b
" \n\t"
853 "push %%"FF_REG_BP
" \n\t"
855 "pxor %%mm7, %%mm7 \n\t"
861 "pop %%"FF_REG_BP
" \n\t"
863 ::
"c" (buf0),
"d" (buf1),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
869 #define REAL_YSCALEYUV2PACKED(index, c) \
870 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
871 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
872 "psraw $3, %%mm0 \n\t"\
873 "psraw $3, %%mm1 \n\t"\
874 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
875 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
876 "xor "#index", "#index" \n\t"\
879 "movq (%2, "#index"), %%mm2 \n\t" \
880 "movq (%3, "#index"), %%mm3 \n\t" \
881 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
882 "movq (%2, "#index"), %%mm5 \n\t" \
883 "movq (%3, "#index"), %%mm4 \n\t" \
884 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
885 "psubw %%mm3, %%mm2 \n\t" \
886 "psubw %%mm4, %%mm5 \n\t" \
887 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
888 "pmulhw %%mm0, %%mm2 \n\t" \
889 "pmulhw %%mm0, %%mm5 \n\t" \
890 "psraw $7, %%mm3 \n\t" \
891 "psraw $7, %%mm4 \n\t" \
892 "paddw %%mm2, %%mm3 \n\t" \
893 "paddw %%mm5, %%mm4 \n\t" \
894 "movq (%0, "#index", 2), %%mm0 \n\t" \
895 "movq (%1, "#index", 2), %%mm1 \n\t" \
896 "movq 8(%0, "#index", 2), %%mm6 \n\t" \
897 "movq 8(%1, "#index", 2), %%mm7 \n\t" \
898 "psubw %%mm1, %%mm0 \n\t" \
899 "psubw %%mm7, %%mm6 \n\t" \
900 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
901 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
902 "psraw $7, %%mm1 \n\t" \
903 "psraw $7, %%mm7 \n\t" \
904 "paddw %%mm0, %%mm1 \n\t" \
905 "paddw %%mm6, %%mm7 \n\t" \
907 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
910 const int16_t *ubuf[2],
const int16_t *vbuf[2],
911 const int16_t *abuf[2], uint8_t *dest,
912 int dstW,
int yalpha,
int uvalpha,
int y)
914 const int16_t *buf0 = buf[0], *buf1 = buf[1],
915 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
919 "mov %4, %%"FF_REG_b
" \n\t"
920 "push %%"FF_REG_BP
" \n\t"
923 "pop %%"FF_REG_BP
" \n\t"
925 ::
"c" (buf0),
"d" (buf1),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
930 #define REAL_YSCALEYUV2RGB1(index, c) \
931 "xor "#index", "#index" \n\t"\
934 "movq (%2, "#index"), %%mm3 \n\t" \
935 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
936 "movq (%2, "#index"), %%mm4 \n\t" \
937 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
938 "psraw $4, %%mm3 \n\t" \
939 "psraw $4, %%mm4 \n\t" \
940 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
941 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
942 "movq %%mm3, %%mm2 \n\t" \
943 "movq %%mm4, %%mm5 \n\t" \
944 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
945 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
947 "movq (%0, "#index", 2), %%mm1 \n\t" \
948 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
949 "psraw $4, %%mm1 \n\t" \
950 "psraw $4, %%mm7 \n\t" \
951 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
952 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
953 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
954 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
955 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
956 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
958 "paddw %%mm3, %%mm4 \n\t"\
959 "movq %%mm2, %%mm0 \n\t"\
960 "movq %%mm5, %%mm6 \n\t"\
961 "movq %%mm4, %%mm3 \n\t"\
962 "punpcklwd %%mm2, %%mm2 \n\t"\
963 "punpcklwd %%mm5, %%mm5 \n\t"\
964 "punpcklwd %%mm4, %%mm4 \n\t"\
965 "paddw %%mm1, %%mm2 \n\t"\
966 "paddw %%mm1, %%mm5 \n\t"\
967 "paddw %%mm1, %%mm4 \n\t"\
968 "punpckhwd %%mm0, %%mm0 \n\t"\
969 "punpckhwd %%mm6, %%mm6 \n\t"\
970 "punpckhwd %%mm3, %%mm3 \n\t"\
971 "paddw %%mm7, %%mm0 \n\t"\
972 "paddw %%mm7, %%mm6 \n\t"\
973 "paddw %%mm7, %%mm3 \n\t"\
975 "packuswb %%mm0, %%mm2 \n\t"\
976 "packuswb %%mm6, %%mm5 \n\t"\
977 "packuswb %%mm3, %%mm4 \n\t"\
979 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
982 #define REAL_YSCALEYUV2RGB1b(index, c) \
983 "xor "#index", "#index" \n\t"\
986 "movq (%2, "#index"), %%mm2 \n\t" \
987 "movq (%3, "#index"), %%mm3 \n\t" \
988 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
989 "movq (%2, "#index"), %%mm5 \n\t" \
990 "movq (%3, "#index"), %%mm4 \n\t" \
991 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
992 "paddw %%mm2, %%mm3 \n\t" \
993 "paddw %%mm5, %%mm4 \n\t" \
994 "psrlw $5, %%mm3 \n\t" \
995 "psrlw $5, %%mm4 \n\t" \
996 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
997 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
998 "movq %%mm3, %%mm2 \n\t" \
999 "movq %%mm4, %%mm5 \n\t" \
1000 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
1001 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
1003 "movq (%0, "#index", 2), %%mm1 \n\t" \
1004 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
1005 "psraw $4, %%mm1 \n\t" \
1006 "psraw $4, %%mm7 \n\t" \
1007 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
1008 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
1009 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
1010 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
1011 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
1012 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
1014 "paddw %%mm3, %%mm4 \n\t"\
1015 "movq %%mm2, %%mm0 \n\t"\
1016 "movq %%mm5, %%mm6 \n\t"\
1017 "movq %%mm4, %%mm3 \n\t"\
1018 "punpcklwd %%mm2, %%mm2 \n\t"\
1019 "punpcklwd %%mm5, %%mm5 \n\t"\
1020 "punpcklwd %%mm4, %%mm4 \n\t"\
1021 "paddw %%mm1, %%mm2 \n\t"\
1022 "paddw %%mm1, %%mm5 \n\t"\
1023 "paddw %%mm1, %%mm4 \n\t"\
1024 "punpckhwd %%mm0, %%mm0 \n\t"\
1025 "punpckhwd %%mm6, %%mm6 \n\t"\
1026 "punpckhwd %%mm3, %%mm3 \n\t"\
1027 "paddw %%mm7, %%mm0 \n\t"\
1028 "paddw %%mm7, %%mm6 \n\t"\
1029 "paddw %%mm7, %%mm3 \n\t"\
1031 "packuswb %%mm0, %%mm2 \n\t"\
1032 "packuswb %%mm6, %%mm5 \n\t"\
1033 "packuswb %%mm3, %%mm4 \n\t"\
1035 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
1037 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
1038 "movq (%1, "#index", 2), %%mm7 \n\t" \
1039 "movq 8(%1, "#index", 2), %%mm1 \n\t" \
1040 "psraw $7, %%mm7 \n\t" \
1041 "psraw $7, %%mm1 \n\t" \
1042 "packuswb %%mm1, %%mm7 \n\t"
1043 #define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
1049 const int16_t *ubuf[2],
const int16_t *vbuf[2],
1050 const int16_t *abuf0, uint8_t *dest,
1051 int dstW,
int uvalpha,
int y)
1053 const int16_t *ubuf0 = ubuf[0];
1054 const int16_t *buf1= buf0;
1056 if (uvalpha < 2048) {
1057 const int16_t *ubuf1 = ubuf[0];
1058 if (CONFIG_SWSCALE_ALPHA &&
c->needAlpha) {
1061 "mov %4, %%"FF_REG_b
" \n\t"
1062 "push %%"FF_REG_BP
" \n\t"
1065 WRITEBGR32(%%FF_REGb,
DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1066 "pop %%"FF_REG_BP
" \n\t"
1068 ::
"c" (buf0),
"d" (abuf0),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
1074 "mov %4, %%"FF_REG_b
" \n\t"
1075 "push %%"FF_REG_BP
" \n\t"
1077 "pcmpeqd %%mm7, %%mm7 \n\t"
1078 WRITEBGR32(%%FF_REGb,
DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1079 "pop %%"FF_REG_BP
" \n\t"
1081 ::
"c" (buf0),
"d" (buf1),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
1086 const int16_t *ubuf1 = ubuf[1];
1087 if (CONFIG_SWSCALE_ALPHA &&
c->needAlpha) {
1090 "mov %4, %%"FF_REG_b
" \n\t"
1091 "push %%"FF_REG_BP
" \n\t"
1094 WRITEBGR32(%%FF_REGb,
DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1095 "pop %%"FF_REG_BP
" \n\t"
1097 ::
"c" (buf0),
"d" (abuf0),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
1103 "mov %4, %%"FF_REG_b
" \n\t"
1104 "push %%"FF_REG_BP
" \n\t"
1106 "pcmpeqd %%mm7, %%mm7 \n\t"
1107 WRITEBGR32(%%FF_REGb,
DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1108 "pop %%"FF_REG_BP
" \n\t"
1110 ::
"c" (buf0),
"d" (buf1),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
1118 const int16_t *ubuf[2],
const int16_t *vbuf[2],
1119 const int16_t *abuf0, uint8_t *dest,
1120 int dstW,
int uvalpha,
int y)
1122 const int16_t *ubuf0 = ubuf[0];
1123 const int16_t *buf1= buf0;
1125 if (uvalpha < 2048) {
1126 const int16_t *ubuf1 = ubuf[0];
1129 "mov %4, %%"FF_REG_b
" \n\t"
1130 "push %%"FF_REG_BP
" \n\t"
1132 "pxor %%mm7, %%mm7 \n\t"
1134 "pop %%"FF_REG_BP
" \n\t"
1136 ::
"c" (buf0),
"d" (buf1),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
1141 const int16_t *ubuf1 = ubuf[1];
1144 "mov %4, %%"FF_REG_b
" \n\t"
1145 "push %%"FF_REG_BP
" \n\t"
1147 "pxor %%mm7, %%mm7 \n\t"
1149 "pop %%"FF_REG_BP
" \n\t"
1151 ::
"c" (buf0),
"d" (buf1),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
1159 const int16_t *ubuf[2],
const int16_t *vbuf[2],
1160 const int16_t *abuf0, uint8_t *dest,
1161 int dstW,
int uvalpha,
int y)
1163 const int16_t *ubuf0 = ubuf[0];
1164 const int16_t *buf1= buf0;
1166 if (uvalpha < 2048) {
1167 const int16_t *ubuf1 = ubuf[0];
1170 "mov %4, %%"FF_REG_b
" \n\t"
1171 "push %%"FF_REG_BP
" \n\t"
1173 "pxor %%mm7, %%mm7 \n\t"
1179 "pop %%"FF_REG_BP
" \n\t"
1181 ::
"c" (buf0),
"d" (buf1),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
1186 const int16_t *ubuf1 = ubuf[1];
1189 "mov %4, %%"FF_REG_b
" \n\t"
1190 "push %%"FF_REG_BP
" \n\t"
1192 "pxor %%mm7, %%mm7 \n\t"
1198 "pop %%"FF_REG_BP
" \n\t"
1200 ::
"c" (buf0),
"d" (buf1),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
1208 const int16_t *ubuf[2],
const int16_t *vbuf[2],
1209 const int16_t *abuf0, uint8_t *dest,
1210 int dstW,
int uvalpha,
int y)
1212 const int16_t *ubuf0 = ubuf[0];
1213 const int16_t *buf1= buf0;
1215 if (uvalpha < 2048) {
1216 const int16_t *ubuf1 = ubuf[0];
1219 "mov %4, %%"FF_REG_b
" \n\t"
1220 "push %%"FF_REG_BP
" \n\t"
1222 "pxor %%mm7, %%mm7 \n\t"
1228 "pop %%"FF_REG_BP
" \n\t"
1230 ::
"c" (buf0),
"d" (buf1),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
1235 const int16_t *ubuf1 = ubuf[1];
1238 "mov %4, %%"FF_REG_b
" \n\t"
1239 "push %%"FF_REG_BP
" \n\t"
1241 "pxor %%mm7, %%mm7 \n\t"
1247 "pop %%"FF_REG_BP
" \n\t"
1249 ::
"c" (buf0),
"d" (buf1),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
1256 #define REAL_YSCALEYUV2PACKED1(index, c) \
1257 "xor "#index", "#index" \n\t"\
1260 "movq (%2, "#index"), %%mm3 \n\t" \
1261 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1262 "movq (%2, "#index"), %%mm4 \n\t" \
1263 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1264 "psraw $7, %%mm3 \n\t" \
1265 "psraw $7, %%mm4 \n\t" \
1266 "movq (%0, "#index", 2), %%mm1 \n\t" \
1267 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
1268 "psraw $7, %%mm1 \n\t" \
1269 "psraw $7, %%mm7 \n\t" \
1271 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
1273 #define REAL_YSCALEYUV2PACKED1b(index, c) \
1274 "xor "#index", "#index" \n\t"\
1277 "movq (%2, "#index"), %%mm2 \n\t" \
1278 "movq (%3, "#index"), %%mm3 \n\t" \
1279 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1280 "movq (%2, "#index"), %%mm5 \n\t" \
1281 "movq (%3, "#index"), %%mm4 \n\t" \
1282 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1283 "paddw %%mm2, %%mm3 \n\t" \
1284 "paddw %%mm5, %%mm4 \n\t" \
1285 "psrlw $8, %%mm3 \n\t" \
1286 "psrlw $8, %%mm4 \n\t" \
1287 "movq (%0, "#index", 2), %%mm1 \n\t" \
1288 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
1289 "psraw $7, %%mm1 \n\t" \
1290 "psraw $7, %%mm7 \n\t"
1291 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
1294 const int16_t *ubuf[2],
const int16_t *vbuf[2],
1295 const int16_t *abuf0, uint8_t *dest,
1296 int dstW,
int uvalpha,
int y)
1298 const int16_t *ubuf0 = ubuf[0];
1299 const int16_t *buf1= buf0;
1301 if (uvalpha < 2048) {
1302 const int16_t *ubuf1 = ubuf[0];
1305 "mov %4, %%"FF_REG_b
" \n\t"
1306 "push %%"FF_REG_BP
" \n\t"
1309 "pop %%"FF_REG_BP
" \n\t"
1311 ::
"c" (buf0),
"d" (buf1),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
1315 const int16_t *ubuf1 = ubuf[1];
1318 "mov %4, %%"FF_REG_b
" \n\t"
1319 "push %%"FF_REG_BP
" \n\t"
1322 "pop %%"FF_REG_BP
" \n\t"
1324 ::
"c" (buf0),
"d" (buf1),
"S" (ubuf0),
"D" (ubuf1),
"m" (dest),
1333 c->use_mmx_vfilter= 0;
1339 switch (
c->opts.dst_format) {
1351 c->use_mmx_vfilter= 1;
1353 switch (
c->opts.dst_format) {
1367 switch (
c->opts.dst_format) {
1369 c->yuv2packed1 =
RENAME(yuv2rgb32_1);
1370 c->yuv2packed2 =
RENAME(yuv2rgb32_2);
1373 c->yuv2packed1 =
RENAME(yuv2bgr24_1);
1374 c->yuv2packed2 =
RENAME(yuv2bgr24_2);
1377 c->yuv2packed1 =
RENAME(yuv2rgb555_1);
1378 c->yuv2packed2 =
RENAME(yuv2rgb555_2);
1381 c->yuv2packed1 =
RENAME(yuv2rgb565_1);
1382 c->yuv2packed2 =
RENAME(yuv2rgb565_2);
1385 c->yuv2packed1 =
RENAME(yuv2yuyv422_1);
1386 c->yuv2packed2 =
RENAME(yuv2yuyv422_2);
1394 if (
c->srcBpc == 8 &&
c->dstBpc <= 14) {
1400 c->hyscale_fast =
NULL;
1401 c->hcscale_fast =
NULL;