00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include <stddef.h>
00031
00032 #undef PREFETCH
00033 #undef MOVNTQ
00034 #undef EMMS
00035 #undef SFENCE
00036 #undef MMREG_SIZE
00037 #undef PREFETCHW
00038 #undef PAVGB
00039
00040 #if HAVE_SSE2
00041 #define MMREG_SIZE 16
00042 #else
00043 #define MMREG_SIZE 8
00044 #endif
00045
00046 #if HAVE_AMD3DNOW
00047 #define PREFETCH "prefetch"
00048 #define PREFETCHW "prefetchw"
00049 #define PAVGB "pavgusb"
00050 #elif HAVE_MMX2
00051 #define PREFETCH "prefetchnta"
00052 #define PREFETCHW "prefetcht0"
00053 #define PAVGB "pavgb"
00054 #else
00055 #define PREFETCH " # nop"
00056 #define PREFETCHW " # nop"
00057 #endif
00058
00059 #if HAVE_AMD3DNOW
00060
00061 #define EMMS "femms"
00062 #else
00063 #define EMMS "emms"
00064 #endif
00065
00066 #if HAVE_MMX2
00067 #define MOVNTQ "movntq"
00068 #define SFENCE "sfence"
00069 #else
00070 #define MOVNTQ "movq"
00071 #define SFENCE " # nop"
00072 #endif
00073
00074 static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
00075 {
00076 uint8_t *dest = dst;
00077 const uint8_t *s = src;
00078 const uint8_t *end;
00079 #if HAVE_MMX
00080 const uint8_t *mm_end;
00081 #endif
00082 end = s + src_size;
00083 #if HAVE_MMX
00084 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
00085 mm_end = end - 23;
00086 __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory");
00087 while (s < mm_end)
00088 {
00089 __asm__ volatile(
00090 PREFETCH" 32%1 \n\t"
00091 "movd %1, %%mm0 \n\t"
00092 "punpckldq 3%1, %%mm0 \n\t"
00093 "movd 6%1, %%mm1 \n\t"
00094 "punpckldq 9%1, %%mm1 \n\t"
00095 "movd 12%1, %%mm2 \n\t"
00096 "punpckldq 15%1, %%mm2 \n\t"
00097 "movd 18%1, %%mm3 \n\t"
00098 "punpckldq 21%1, %%mm3 \n\t"
00099 "por %%mm7, %%mm0 \n\t"
00100 "por %%mm7, %%mm1 \n\t"
00101 "por %%mm7, %%mm2 \n\t"
00102 "por %%mm7, %%mm3 \n\t"
00103 MOVNTQ" %%mm0, %0 \n\t"
00104 MOVNTQ" %%mm1, 8%0 \n\t"
00105 MOVNTQ" %%mm2, 16%0 \n\t"
00106 MOVNTQ" %%mm3, 24%0"
00107 :"=m"(*dest)
00108 :"m"(*s)
00109 :"memory");
00110 dest += 32;
00111 s += 24;
00112 }
00113 __asm__ volatile(SFENCE:::"memory");
00114 __asm__ volatile(EMMS:::"memory");
00115 #endif
00116 while (s < end)
00117 {
00118 #ifdef WORDS_BIGENDIAN
00119
00120 *dest++ = 255;
00121 *dest++ = s[2];
00122 *dest++ = s[1];
00123 *dest++ = s[0];
00124 s+=3;
00125 #else
00126 *dest++ = *s++;
00127 *dest++ = *s++;
00128 *dest++ = *s++;
00129 *dest++ = 255;
00130 #endif
00131 }
00132 }
00133
00134 static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
00135 {
00136 uint8_t *dest = dst;
00137 const uint8_t *s = src;
00138 const uint8_t *end;
00139 #if HAVE_MMX
00140 const uint8_t *mm_end;
00141 #endif
00142 end = s + src_size;
00143 #if HAVE_MMX
00144 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
00145 mm_end = end - 31;
00146 while (s < mm_end)
00147 {
00148 __asm__ volatile(
00149 PREFETCH" 32%1 \n\t"
00150 "movq %1, %%mm0 \n\t"
00151 "movq 8%1, %%mm1 \n\t"
00152 "movq 16%1, %%mm4 \n\t"
00153 "movq 24%1, %%mm5 \n\t"
00154 "movq %%mm0, %%mm2 \n\t"
00155 "movq %%mm1, %%mm3 \n\t"
00156 "movq %%mm4, %%mm6 \n\t"
00157 "movq %%mm5, %%mm7 \n\t"
00158 "psrlq $8, %%mm2 \n\t"
00159 "psrlq $8, %%mm3 \n\t"
00160 "psrlq $8, %%mm6 \n\t"
00161 "psrlq $8, %%mm7 \n\t"
00162 "pand %2, %%mm0 \n\t"
00163 "pand %2, %%mm1 \n\t"
00164 "pand %2, %%mm4 \n\t"
00165 "pand %2, %%mm5 \n\t"
00166 "pand %3, %%mm2 \n\t"
00167 "pand %3, %%mm3 \n\t"
00168 "pand %3, %%mm6 \n\t"
00169 "pand %3, %%mm7 \n\t"
00170 "por %%mm2, %%mm0 \n\t"
00171 "por %%mm3, %%mm1 \n\t"
00172 "por %%mm6, %%mm4 \n\t"
00173 "por %%mm7, %%mm5 \n\t"
00174
00175 "movq %%mm1, %%mm2 \n\t"
00176 "movq %%mm4, %%mm3 \n\t"
00177 "psllq $48, %%mm2 \n\t"
00178 "psllq $32, %%mm3 \n\t"
00179 "pand %4, %%mm2 \n\t"
00180 "pand %5, %%mm3 \n\t"
00181 "por %%mm2, %%mm0 \n\t"
00182 "psrlq $16, %%mm1 \n\t"
00183 "psrlq $32, %%mm4 \n\t"
00184 "psllq $16, %%mm5 \n\t"
00185 "por %%mm3, %%mm1 \n\t"
00186 "pand %6, %%mm5 \n\t"
00187 "por %%mm5, %%mm4 \n\t"
00188
00189 MOVNTQ" %%mm0, %0 \n\t"
00190 MOVNTQ" %%mm1, 8%0 \n\t"
00191 MOVNTQ" %%mm4, 16%0"
00192 :"=m"(*dest)
00193 :"m"(*s),"m"(mask24l),
00194 "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
00195 :"memory");
00196 dest += 24;
00197 s += 32;
00198 }
00199 __asm__ volatile(SFENCE:::"memory");
00200 __asm__ volatile(EMMS:::"memory");
00201 #endif
00202 while (s < end)
00203 {
00204 #ifdef WORDS_BIGENDIAN
00205
00206 s++;
00207 dest[2] = *s++;
00208 dest[1] = *s++;
00209 dest[0] = *s++;
00210 dest += 3;
00211 #else
00212 *dest++ = *s++;
00213 *dest++ = *s++;
00214 *dest++ = *s++;
00215 s++;
00216 #endif
00217 }
00218 }
00219
00220
00221
00222
00223
00224
00225
00226 static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
00227 {
00228 register const uint8_t* s=src;
00229 register uint8_t* d=dst;
00230 register const uint8_t *end;
00231 const uint8_t *mm_end;
00232 end = s + src_size;
00233 #if HAVE_MMX
00234 __asm__ volatile(PREFETCH" %0"::"m"(*s));
00235 __asm__ volatile("movq %0, %%mm4"::"m"(mask15s));
00236 mm_end = end - 15;
00237 while (s<mm_end)
00238 {
00239 __asm__ volatile(
00240 PREFETCH" 32%1 \n\t"
00241 "movq %1, %%mm0 \n\t"
00242 "movq 8%1, %%mm2 \n\t"
00243 "movq %%mm0, %%mm1 \n\t"
00244 "movq %%mm2, %%mm3 \n\t"
00245 "pand %%mm4, %%mm0 \n\t"
00246 "pand %%mm4, %%mm2 \n\t"
00247 "paddw %%mm1, %%mm0 \n\t"
00248 "paddw %%mm3, %%mm2 \n\t"
00249 MOVNTQ" %%mm0, %0 \n\t"
00250 MOVNTQ" %%mm2, 8%0"
00251 :"=m"(*d)
00252 :"m"(*s)
00253 );
00254 d+=16;
00255 s+=16;
00256 }
00257 __asm__ volatile(SFENCE:::"memory");
00258 __asm__ volatile(EMMS:::"memory");
00259 #endif
00260 mm_end = end - 3;
00261 while (s < mm_end)
00262 {
00263 register unsigned x= *((const uint32_t *)s);
00264 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
00265 d+=4;
00266 s+=4;
00267 }
00268 if (s < end)
00269 {
00270 register unsigned short x= *((const uint16_t *)s);
00271 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
00272 }
00273 }
00274
00275 static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size)
00276 {
00277 register const uint8_t* s=src;
00278 register uint8_t* d=dst;
00279 register const uint8_t *end;
00280 const uint8_t *mm_end;
00281 end = s + src_size;
00282 #if HAVE_MMX
00283 __asm__ volatile(PREFETCH" %0"::"m"(*s));
00284 __asm__ volatile("movq %0, %%mm7"::"m"(mask15rg));
00285 __asm__ volatile("movq %0, %%mm6"::"m"(mask15b));
00286 mm_end = end - 15;
00287 while (s<mm_end)
00288 {
00289 __asm__ volatile(
00290 PREFETCH" 32%1 \n\t"
00291 "movq %1, %%mm0 \n\t"
00292 "movq 8%1, %%mm2 \n\t"
00293 "movq %%mm0, %%mm1 \n\t"
00294 "movq %%mm2, %%mm3 \n\t"
00295 "psrlq $1, %%mm0 \n\t"
00296 "psrlq $1, %%mm2 \n\t"
00297 "pand %%mm7, %%mm0 \n\t"
00298 "pand %%mm7, %%mm2 \n\t"
00299 "pand %%mm6, %%mm1 \n\t"
00300 "pand %%mm6, %%mm3 \n\t"
00301 "por %%mm1, %%mm0 \n\t"
00302 "por %%mm3, %%mm2 \n\t"
00303 MOVNTQ" %%mm0, %0 \n\t"
00304 MOVNTQ" %%mm2, 8%0"
00305 :"=m"(*d)
00306 :"m"(*s)
00307 );
00308 d+=16;
00309 s+=16;
00310 }
00311 __asm__ volatile(SFENCE:::"memory");
00312 __asm__ volatile(EMMS:::"memory");
00313 #endif
00314 mm_end = end - 3;
00315 while (s < mm_end)
00316 {
00317 register uint32_t x= *((const uint32_t*)s);
00318 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
00319 s+=4;
00320 d+=4;
00321 }
00322 if (s < end)
00323 {
00324 register uint16_t x= *((const uint16_t*)s);
00325 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
00326 s+=2;
00327 d+=2;
00328 }
00329 }
00330
00331 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
00332 {
00333 const uint8_t *s = src;
00334 const uint8_t *end;
00335 #if HAVE_MMX
00336 const uint8_t *mm_end;
00337 #endif
00338 uint16_t *d = (uint16_t *)dst;
00339 end = s + src_size;
00340 #if HAVE_MMX
00341 mm_end = end - 15;
00342 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
00343 __asm__ volatile(
00344 "movq %3, %%mm5 \n\t"
00345 "movq %4, %%mm6 \n\t"
00346 "movq %5, %%mm7 \n\t"
00347 "jmp 2f \n\t"
00348 ASMALIGN(4)
00349 "1: \n\t"
00350 PREFETCH" 32(%1) \n\t"
00351 "movd (%1), %%mm0 \n\t"
00352 "movd 4(%1), %%mm3 \n\t"
00353 "punpckldq 8(%1), %%mm0 \n\t"
00354 "punpckldq 12(%1), %%mm3 \n\t"
00355 "movq %%mm0, %%mm1 \n\t"
00356 "movq %%mm3, %%mm4 \n\t"
00357 "pand %%mm6, %%mm0 \n\t"
00358 "pand %%mm6, %%mm3 \n\t"
00359 "pmaddwd %%mm7, %%mm0 \n\t"
00360 "pmaddwd %%mm7, %%mm3 \n\t"
00361 "pand %%mm5, %%mm1 \n\t"
00362 "pand %%mm5, %%mm4 \n\t"
00363 "por %%mm1, %%mm0 \n\t"
00364 "por %%mm4, %%mm3 \n\t"
00365 "psrld $5, %%mm0 \n\t"
00366 "pslld $11, %%mm3 \n\t"
00367 "por %%mm3, %%mm0 \n\t"
00368 MOVNTQ" %%mm0, (%0) \n\t"
00369 "add $16, %1 \n\t"
00370 "add $8, %0 \n\t"
00371 "2: \n\t"
00372 "cmp %2, %1 \n\t"
00373 " jb 1b \n\t"
00374 : "+r" (d), "+r"(s)
00375 : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
00376 );
00377 #else
00378 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00379 __asm__ volatile(
00380 "movq %0, %%mm7 \n\t"
00381 "movq %1, %%mm6 \n\t"
00382 ::"m"(red_16mask),"m"(green_16mask));
00383 while (s < mm_end)
00384 {
00385 __asm__ volatile(
00386 PREFETCH" 32%1 \n\t"
00387 "movd %1, %%mm0 \n\t"
00388 "movd 4%1, %%mm3 \n\t"
00389 "punpckldq 8%1, %%mm0 \n\t"
00390 "punpckldq 12%1, %%mm3 \n\t"
00391 "movq %%mm0, %%mm1 \n\t"
00392 "movq %%mm0, %%mm2 \n\t"
00393 "movq %%mm3, %%mm4 \n\t"
00394 "movq %%mm3, %%mm5 \n\t"
00395 "psrlq $3, %%mm0 \n\t"
00396 "psrlq $3, %%mm3 \n\t"
00397 "pand %2, %%mm0 \n\t"
00398 "pand %2, %%mm3 \n\t"
00399 "psrlq $5, %%mm1 \n\t"
00400 "psrlq $5, %%mm4 \n\t"
00401 "pand %%mm6, %%mm1 \n\t"
00402 "pand %%mm6, %%mm4 \n\t"
00403 "psrlq $8, %%mm2 \n\t"
00404 "psrlq $8, %%mm5 \n\t"
00405 "pand %%mm7, %%mm2 \n\t"
00406 "pand %%mm7, %%mm5 \n\t"
00407 "por %%mm1, %%mm0 \n\t"
00408 "por %%mm4, %%mm3 \n\t"
00409 "por %%mm2, %%mm0 \n\t"
00410 "por %%mm5, %%mm3 \n\t"
00411 "psllq $16, %%mm3 \n\t"
00412 "por %%mm3, %%mm0 \n\t"
00413 MOVNTQ" %%mm0, %0 \n\t"
00414 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00415 d += 4;
00416 s += 16;
00417 }
00418 #endif
00419 __asm__ volatile(SFENCE:::"memory");
00420 __asm__ volatile(EMMS:::"memory");
00421 #endif
00422 while (s < end)
00423 {
00424 register int rgb = *(const uint32_t*)s; s += 4;
00425 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
00426 }
00427 }
00428
00429 static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
00430 {
00431 const uint8_t *s = src;
00432 const uint8_t *end;
00433 #if HAVE_MMX
00434 const uint8_t *mm_end;
00435 #endif
00436 uint16_t *d = (uint16_t *)dst;
00437 end = s + src_size;
00438 #if HAVE_MMX
00439 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00440 __asm__ volatile(
00441 "movq %0, %%mm7 \n\t"
00442 "movq %1, %%mm6 \n\t"
00443 ::"m"(red_16mask),"m"(green_16mask));
00444 mm_end = end - 15;
00445 while (s < mm_end)
00446 {
00447 __asm__ volatile(
00448 PREFETCH" 32%1 \n\t"
00449 "movd %1, %%mm0 \n\t"
00450 "movd 4%1, %%mm3 \n\t"
00451 "punpckldq 8%1, %%mm0 \n\t"
00452 "punpckldq 12%1, %%mm3 \n\t"
00453 "movq %%mm0, %%mm1 \n\t"
00454 "movq %%mm0, %%mm2 \n\t"
00455 "movq %%mm3, %%mm4 \n\t"
00456 "movq %%mm3, %%mm5 \n\t"
00457 "psllq $8, %%mm0 \n\t"
00458 "psllq $8, %%mm3 \n\t"
00459 "pand %%mm7, %%mm0 \n\t"
00460 "pand %%mm7, %%mm3 \n\t"
00461 "psrlq $5, %%mm1 \n\t"
00462 "psrlq $5, %%mm4 \n\t"
00463 "pand %%mm6, %%mm1 \n\t"
00464 "pand %%mm6, %%mm4 \n\t"
00465 "psrlq $19, %%mm2 \n\t"
00466 "psrlq $19, %%mm5 \n\t"
00467 "pand %2, %%mm2 \n\t"
00468 "pand %2, %%mm5 \n\t"
00469 "por %%mm1, %%mm0 \n\t"
00470 "por %%mm4, %%mm3 \n\t"
00471 "por %%mm2, %%mm0 \n\t"
00472 "por %%mm5, %%mm3 \n\t"
00473 "psllq $16, %%mm3 \n\t"
00474 "por %%mm3, %%mm0 \n\t"
00475 MOVNTQ" %%mm0, %0 \n\t"
00476 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00477 d += 4;
00478 s += 16;
00479 }
00480 __asm__ volatile(SFENCE:::"memory");
00481 __asm__ volatile(EMMS:::"memory");
00482 #endif
00483 while (s < end)
00484 {
00485 register int rgb = *(const uint32_t*)s; s += 4;
00486 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
00487 }
00488 }
00489
00490 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size)
00491 {
00492 const uint8_t *s = src;
00493 const uint8_t *end;
00494 #if HAVE_MMX
00495 const uint8_t *mm_end;
00496 #endif
00497 uint16_t *d = (uint16_t *)dst;
00498 end = s + src_size;
00499 #if HAVE_MMX
00500 mm_end = end - 15;
00501 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
00502 __asm__ volatile(
00503 "movq %3, %%mm5 \n\t"
00504 "movq %4, %%mm6 \n\t"
00505 "movq %5, %%mm7 \n\t"
00506 "jmp 2f \n\t"
00507 ASMALIGN(4)
00508 "1: \n\t"
00509 PREFETCH" 32(%1) \n\t"
00510 "movd (%1), %%mm0 \n\t"
00511 "movd 4(%1), %%mm3 \n\t"
00512 "punpckldq 8(%1), %%mm0 \n\t"
00513 "punpckldq 12(%1), %%mm3 \n\t"
00514 "movq %%mm0, %%mm1 \n\t"
00515 "movq %%mm3, %%mm4 \n\t"
00516 "pand %%mm6, %%mm0 \n\t"
00517 "pand %%mm6, %%mm3 \n\t"
00518 "pmaddwd %%mm7, %%mm0 \n\t"
00519 "pmaddwd %%mm7, %%mm3 \n\t"
00520 "pand %%mm5, %%mm1 \n\t"
00521 "pand %%mm5, %%mm4 \n\t"
00522 "por %%mm1, %%mm0 \n\t"
00523 "por %%mm4, %%mm3 \n\t"
00524 "psrld $6, %%mm0 \n\t"
00525 "pslld $10, %%mm3 \n\t"
00526 "por %%mm3, %%mm0 \n\t"
00527 MOVNTQ" %%mm0, (%0) \n\t"
00528 "add $16, %1 \n\t"
00529 "add $8, %0 \n\t"
00530 "2: \n\t"
00531 "cmp %2, %1 \n\t"
00532 " jb 1b \n\t"
00533 : "+r" (d), "+r"(s)
00534 : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
00535 );
00536 #else
00537 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00538 __asm__ volatile(
00539 "movq %0, %%mm7 \n\t"
00540 "movq %1, %%mm6 \n\t"
00541 ::"m"(red_15mask),"m"(green_15mask));
00542 while (s < mm_end)
00543 {
00544 __asm__ volatile(
00545 PREFETCH" 32%1 \n\t"
00546 "movd %1, %%mm0 \n\t"
00547 "movd 4%1, %%mm3 \n\t"
00548 "punpckldq 8%1, %%mm0 \n\t"
00549 "punpckldq 12%1, %%mm3 \n\t"
00550 "movq %%mm0, %%mm1 \n\t"
00551 "movq %%mm0, %%mm2 \n\t"
00552 "movq %%mm3, %%mm4 \n\t"
00553 "movq %%mm3, %%mm5 \n\t"
00554 "psrlq $3, %%mm0 \n\t"
00555 "psrlq $3, %%mm3 \n\t"
00556 "pand %2, %%mm0 \n\t"
00557 "pand %2, %%mm3 \n\t"
00558 "psrlq $6, %%mm1 \n\t"
00559 "psrlq $6, %%mm4 \n\t"
00560 "pand %%mm6, %%mm1 \n\t"
00561 "pand %%mm6, %%mm4 \n\t"
00562 "psrlq $9, %%mm2 \n\t"
00563 "psrlq $9, %%mm5 \n\t"
00564 "pand %%mm7, %%mm2 \n\t"
00565 "pand %%mm7, %%mm5 \n\t"
00566 "por %%mm1, %%mm0 \n\t"
00567 "por %%mm4, %%mm3 \n\t"
00568 "por %%mm2, %%mm0 \n\t"
00569 "por %%mm5, %%mm3 \n\t"
00570 "psllq $16, %%mm3 \n\t"
00571 "por %%mm3, %%mm0 \n\t"
00572 MOVNTQ" %%mm0, %0 \n\t"
00573 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00574 d += 4;
00575 s += 16;
00576 }
00577 #endif
00578 __asm__ volatile(SFENCE:::"memory");
00579 __asm__ volatile(EMMS:::"memory");
00580 #endif
00581 while (s < end)
00582 {
00583 register int rgb = *(const uint32_t*)s; s += 4;
00584 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
00585 }
00586 }
00587
00588 static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
00589 {
00590 const uint8_t *s = src;
00591 const uint8_t *end;
00592 #if HAVE_MMX
00593 const uint8_t *mm_end;
00594 #endif
00595 uint16_t *d = (uint16_t *)dst;
00596 end = s + src_size;
00597 #if HAVE_MMX
00598 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00599 __asm__ volatile(
00600 "movq %0, %%mm7 \n\t"
00601 "movq %1, %%mm6 \n\t"
00602 ::"m"(red_15mask),"m"(green_15mask));
00603 mm_end = end - 15;
00604 while (s < mm_end)
00605 {
00606 __asm__ volatile(
00607 PREFETCH" 32%1 \n\t"
00608 "movd %1, %%mm0 \n\t"
00609 "movd 4%1, %%mm3 \n\t"
00610 "punpckldq 8%1, %%mm0 \n\t"
00611 "punpckldq 12%1, %%mm3 \n\t"
00612 "movq %%mm0, %%mm1 \n\t"
00613 "movq %%mm0, %%mm2 \n\t"
00614 "movq %%mm3, %%mm4 \n\t"
00615 "movq %%mm3, %%mm5 \n\t"
00616 "psllq $7, %%mm0 \n\t"
00617 "psllq $7, %%mm3 \n\t"
00618 "pand %%mm7, %%mm0 \n\t"
00619 "pand %%mm7, %%mm3 \n\t"
00620 "psrlq $6, %%mm1 \n\t"
00621 "psrlq $6, %%mm4 \n\t"
00622 "pand %%mm6, %%mm1 \n\t"
00623 "pand %%mm6, %%mm4 \n\t"
00624 "psrlq $19, %%mm2 \n\t"
00625 "psrlq $19, %%mm5 \n\t"
00626 "pand %2, %%mm2 \n\t"
00627 "pand %2, %%mm5 \n\t"
00628 "por %%mm1, %%mm0 \n\t"
00629 "por %%mm4, %%mm3 \n\t"
00630 "por %%mm2, %%mm0 \n\t"
00631 "por %%mm5, %%mm3 \n\t"
00632 "psllq $16, %%mm3 \n\t"
00633 "por %%mm3, %%mm0 \n\t"
00634 MOVNTQ" %%mm0, %0 \n\t"
00635 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00636 d += 4;
00637 s += 16;
00638 }
00639 __asm__ volatile(SFENCE:::"memory");
00640 __asm__ volatile(EMMS:::"memory");
00641 #endif
00642 while (s < end)
00643 {
00644 register int rgb = *(const uint32_t*)s; s += 4;
00645 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
00646 }
00647 }
00648
00649 static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
00650 {
00651 const uint8_t *s = src;
00652 const uint8_t *end;
00653 #if HAVE_MMX
00654 const uint8_t *mm_end;
00655 #endif
00656 uint16_t *d = (uint16_t *)dst;
00657 end = s + src_size;
00658 #if HAVE_MMX
00659 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00660 __asm__ volatile(
00661 "movq %0, %%mm7 \n\t"
00662 "movq %1, %%mm6 \n\t"
00663 ::"m"(red_16mask),"m"(green_16mask));
00664 mm_end = end - 11;
00665 while (s < mm_end)
00666 {
00667 __asm__ volatile(
00668 PREFETCH" 32%1 \n\t"
00669 "movd %1, %%mm0 \n\t"
00670 "movd 3%1, %%mm3 \n\t"
00671 "punpckldq 6%1, %%mm0 \n\t"
00672 "punpckldq 9%1, %%mm3 \n\t"
00673 "movq %%mm0, %%mm1 \n\t"
00674 "movq %%mm0, %%mm2 \n\t"
00675 "movq %%mm3, %%mm4 \n\t"
00676 "movq %%mm3, %%mm5 \n\t"
00677 "psrlq $3, %%mm0 \n\t"
00678 "psrlq $3, %%mm3 \n\t"
00679 "pand %2, %%mm0 \n\t"
00680 "pand %2, %%mm3 \n\t"
00681 "psrlq $5, %%mm1 \n\t"
00682 "psrlq $5, %%mm4 \n\t"
00683 "pand %%mm6, %%mm1 \n\t"
00684 "pand %%mm6, %%mm4 \n\t"
00685 "psrlq $8, %%mm2 \n\t"
00686 "psrlq $8, %%mm5 \n\t"
00687 "pand %%mm7, %%mm2 \n\t"
00688 "pand %%mm7, %%mm5 \n\t"
00689 "por %%mm1, %%mm0 \n\t"
00690 "por %%mm4, %%mm3 \n\t"
00691 "por %%mm2, %%mm0 \n\t"
00692 "por %%mm5, %%mm3 \n\t"
00693 "psllq $16, %%mm3 \n\t"
00694 "por %%mm3, %%mm0 \n\t"
00695 MOVNTQ" %%mm0, %0 \n\t"
00696 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00697 d += 4;
00698 s += 12;
00699 }
00700 __asm__ volatile(SFENCE:::"memory");
00701 __asm__ volatile(EMMS:::"memory");
00702 #endif
00703 while (s < end)
00704 {
00705 const int b = *s++;
00706 const int g = *s++;
00707 const int r = *s++;
00708 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
00709 }
00710 }
00711
00712 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size)
00713 {
00714 const uint8_t *s = src;
00715 const uint8_t *end;
00716 #if HAVE_MMX
00717 const uint8_t *mm_end;
00718 #endif
00719 uint16_t *d = (uint16_t *)dst;
00720 end = s + src_size;
00721 #if HAVE_MMX
00722 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00723 __asm__ volatile(
00724 "movq %0, %%mm7 \n\t"
00725 "movq %1, %%mm6 \n\t"
00726 ::"m"(red_16mask),"m"(green_16mask));
00727 mm_end = end - 15;
00728 while (s < mm_end)
00729 {
00730 __asm__ volatile(
00731 PREFETCH" 32%1 \n\t"
00732 "movd %1, %%mm0 \n\t"
00733 "movd 3%1, %%mm3 \n\t"
00734 "punpckldq 6%1, %%mm0 \n\t"
00735 "punpckldq 9%1, %%mm3 \n\t"
00736 "movq %%mm0, %%mm1 \n\t"
00737 "movq %%mm0, %%mm2 \n\t"
00738 "movq %%mm3, %%mm4 \n\t"
00739 "movq %%mm3, %%mm5 \n\t"
00740 "psllq $8, %%mm0 \n\t"
00741 "psllq $8, %%mm3 \n\t"
00742 "pand %%mm7, %%mm0 \n\t"
00743 "pand %%mm7, %%mm3 \n\t"
00744 "psrlq $5, %%mm1 \n\t"
00745 "psrlq $5, %%mm4 \n\t"
00746 "pand %%mm6, %%mm1 \n\t"
00747 "pand %%mm6, %%mm4 \n\t"
00748 "psrlq $19, %%mm2 \n\t"
00749 "psrlq $19, %%mm5 \n\t"
00750 "pand %2, %%mm2 \n\t"
00751 "pand %2, %%mm5 \n\t"
00752 "por %%mm1, %%mm0 \n\t"
00753 "por %%mm4, %%mm3 \n\t"
00754 "por %%mm2, %%mm0 \n\t"
00755 "por %%mm5, %%mm3 \n\t"
00756 "psllq $16, %%mm3 \n\t"
00757 "por %%mm3, %%mm0 \n\t"
00758 MOVNTQ" %%mm0, %0 \n\t"
00759 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00760 d += 4;
00761 s += 12;
00762 }
00763 __asm__ volatile(SFENCE:::"memory");
00764 __asm__ volatile(EMMS:::"memory");
00765 #endif
00766 while (s < end)
00767 {
00768 const int r = *s++;
00769 const int g = *s++;
00770 const int b = *s++;
00771 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
00772 }
00773 }
00774
00775 static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
00776 {
00777 const uint8_t *s = src;
00778 const uint8_t *end;
00779 #if HAVE_MMX
00780 const uint8_t *mm_end;
00781 #endif
00782 uint16_t *d = (uint16_t *)dst;
00783 end = s + src_size;
00784 #if HAVE_MMX
00785 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00786 __asm__ volatile(
00787 "movq %0, %%mm7 \n\t"
00788 "movq %1, %%mm6 \n\t"
00789 ::"m"(red_15mask),"m"(green_15mask));
00790 mm_end = end - 11;
00791 while (s < mm_end)
00792 {
00793 __asm__ volatile(
00794 PREFETCH" 32%1 \n\t"
00795 "movd %1, %%mm0 \n\t"
00796 "movd 3%1, %%mm3 \n\t"
00797 "punpckldq 6%1, %%mm0 \n\t"
00798 "punpckldq 9%1, %%mm3 \n\t"
00799 "movq %%mm0, %%mm1 \n\t"
00800 "movq %%mm0, %%mm2 \n\t"
00801 "movq %%mm3, %%mm4 \n\t"
00802 "movq %%mm3, %%mm5 \n\t"
00803 "psrlq $3, %%mm0 \n\t"
00804 "psrlq $3, %%mm3 \n\t"
00805 "pand %2, %%mm0 \n\t"
00806 "pand %2, %%mm3 \n\t"
00807 "psrlq $6, %%mm1 \n\t"
00808 "psrlq $6, %%mm4 \n\t"
00809 "pand %%mm6, %%mm1 \n\t"
00810 "pand %%mm6, %%mm4 \n\t"
00811 "psrlq $9, %%mm2 \n\t"
00812 "psrlq $9, %%mm5 \n\t"
00813 "pand %%mm7, %%mm2 \n\t"
00814 "pand %%mm7, %%mm5 \n\t"
00815 "por %%mm1, %%mm0 \n\t"
00816 "por %%mm4, %%mm3 \n\t"
00817 "por %%mm2, %%mm0 \n\t"
00818 "por %%mm5, %%mm3 \n\t"
00819 "psllq $16, %%mm3 \n\t"
00820 "por %%mm3, %%mm0 \n\t"
00821 MOVNTQ" %%mm0, %0 \n\t"
00822 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00823 d += 4;
00824 s += 12;
00825 }
00826 __asm__ volatile(SFENCE:::"memory");
00827 __asm__ volatile(EMMS:::"memory");
00828 #endif
00829 while (s < end)
00830 {
00831 const int b = *s++;
00832 const int g = *s++;
00833 const int r = *s++;
00834 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
00835 }
00836 }
00837
00838 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size)
00839 {
00840 const uint8_t *s = src;
00841 const uint8_t *end;
00842 #if HAVE_MMX
00843 const uint8_t *mm_end;
00844 #endif
00845 uint16_t *d = (uint16_t *)dst;
00846 end = s + src_size;
00847 #if HAVE_MMX
00848 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00849 __asm__ volatile(
00850 "movq %0, %%mm7 \n\t"
00851 "movq %1, %%mm6 \n\t"
00852 ::"m"(red_15mask),"m"(green_15mask));
00853 mm_end = end - 15;
00854 while (s < mm_end)
00855 {
00856 __asm__ volatile(
00857 PREFETCH" 32%1 \n\t"
00858 "movd %1, %%mm0 \n\t"
00859 "movd 3%1, %%mm3 \n\t"
00860 "punpckldq 6%1, %%mm0 \n\t"
00861 "punpckldq 9%1, %%mm3 \n\t"
00862 "movq %%mm0, %%mm1 \n\t"
00863 "movq %%mm0, %%mm2 \n\t"
00864 "movq %%mm3, %%mm4 \n\t"
00865 "movq %%mm3, %%mm5 \n\t"
00866 "psllq $7, %%mm0 \n\t"
00867 "psllq $7, %%mm3 \n\t"
00868 "pand %%mm7, %%mm0 \n\t"
00869 "pand %%mm7, %%mm3 \n\t"
00870 "psrlq $6, %%mm1 \n\t"
00871 "psrlq $6, %%mm4 \n\t"
00872 "pand %%mm6, %%mm1 \n\t"
00873 "pand %%mm6, %%mm4 \n\t"
00874 "psrlq $19, %%mm2 \n\t"
00875 "psrlq $19, %%mm5 \n\t"
00876 "pand %2, %%mm2 \n\t"
00877 "pand %2, %%mm5 \n\t"
00878 "por %%mm1, %%mm0 \n\t"
00879 "por %%mm4, %%mm3 \n\t"
00880 "por %%mm2, %%mm0 \n\t"
00881 "por %%mm5, %%mm3 \n\t"
00882 "psllq $16, %%mm3 \n\t"
00883 "por %%mm3, %%mm0 \n\t"
00884 MOVNTQ" %%mm0, %0 \n\t"
00885 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00886 d += 4;
00887 s += 12;
00888 }
00889 __asm__ volatile(SFENCE:::"memory");
00890 __asm__ volatile(EMMS:::"memory");
00891 #endif
00892 while (s < end)
00893 {
00894 const int r = *s++;
00895 const int g = *s++;
00896 const int b = *s++;
00897 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
00898 }
00899 }
00900
00901
00902
00903
00904
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922 static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
00923 {
00924 const uint16_t *end;
00925 #if HAVE_MMX
00926 const uint16_t *mm_end;
00927 #endif
00928 uint8_t *d = dst;
00929 const uint16_t *s = (const uint16_t*)src;
00930 end = s + src_size/2;
00931 #if HAVE_MMX
00932 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
00933 mm_end = end - 7;
00934 while (s < mm_end)
00935 {
00936 __asm__ volatile(
00937 PREFETCH" 32%1 \n\t"
00938 "movq %1, %%mm0 \n\t"
00939 "movq %1, %%mm1 \n\t"
00940 "movq %1, %%mm2 \n\t"
00941 "pand %2, %%mm0 \n\t"
00942 "pand %3, %%mm1 \n\t"
00943 "pand %4, %%mm2 \n\t"
00944 "psllq $3, %%mm0 \n\t"
00945 "psrlq $2, %%mm1 \n\t"
00946 "psrlq $7, %%mm2 \n\t"
00947 "movq %%mm0, %%mm3 \n\t"
00948 "movq %%mm1, %%mm4 \n\t"
00949 "movq %%mm2, %%mm5 \n\t"
00950 "punpcklwd %5, %%mm0 \n\t"
00951 "punpcklwd %5, %%mm1 \n\t"
00952 "punpcklwd %5, %%mm2 \n\t"
00953 "punpckhwd %5, %%mm3 \n\t"
00954 "punpckhwd %5, %%mm4 \n\t"
00955 "punpckhwd %5, %%mm5 \n\t"
00956 "psllq $8, %%mm1 \n\t"
00957 "psllq $16, %%mm2 \n\t"
00958 "por %%mm1, %%mm0 \n\t"
00959 "por %%mm2, %%mm0 \n\t"
00960 "psllq $8, %%mm4 \n\t"
00961 "psllq $16, %%mm5 \n\t"
00962 "por %%mm4, %%mm3 \n\t"
00963 "por %%mm5, %%mm3 \n\t"
00964
00965 "movq %%mm0, %%mm6 \n\t"
00966 "movq %%mm3, %%mm7 \n\t"
00967
00968 "movq 8%1, %%mm0 \n\t"
00969 "movq 8%1, %%mm1 \n\t"
00970 "movq 8%1, %%mm2 \n\t"
00971 "pand %2, %%mm0 \n\t"
00972 "pand %3, %%mm1 \n\t"
00973 "pand %4, %%mm2 \n\t"
00974 "psllq $3, %%mm0 \n\t"
00975 "psrlq $2, %%mm1 \n\t"
00976 "psrlq $7, %%mm2 \n\t"
00977 "movq %%mm0, %%mm3 \n\t"
00978 "movq %%mm1, %%mm4 \n\t"
00979 "movq %%mm2, %%mm5 \n\t"
00980 "punpcklwd %5, %%mm0 \n\t"
00981 "punpcklwd %5, %%mm1 \n\t"
00982 "punpcklwd %5, %%mm2 \n\t"
00983 "punpckhwd %5, %%mm3 \n\t"
00984 "punpckhwd %5, %%mm4 \n\t"
00985 "punpckhwd %5, %%mm5 \n\t"
00986 "psllq $8, %%mm1 \n\t"
00987 "psllq $16, %%mm2 \n\t"
00988 "por %%mm1, %%mm0 \n\t"
00989 "por %%mm2, %%mm0 \n\t"
00990 "psllq $8, %%mm4 \n\t"
00991 "psllq $16, %%mm5 \n\t"
00992 "por %%mm4, %%mm3 \n\t"
00993 "por %%mm5, %%mm3 \n\t"
00994
00995 :"=m"(*d)
00996 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
00997 :"memory");
00998
00999 __asm__ volatile(
01000 "movq %%mm0, %%mm4 \n\t"
01001 "movq %%mm3, %%mm5 \n\t"
01002 "movq %%mm6, %%mm0 \n\t"
01003 "movq %%mm7, %%mm1 \n\t"
01004
01005 "movq %%mm4, %%mm6 \n\t"
01006 "movq %%mm5, %%mm7 \n\t"
01007 "movq %%mm0, %%mm2 \n\t"
01008 "movq %%mm1, %%mm3 \n\t"
01009
01010 "psrlq $8, %%mm2 \n\t"
01011 "psrlq $8, %%mm3 \n\t"
01012 "psrlq $8, %%mm6 \n\t"
01013 "psrlq $8, %%mm7 \n\t"
01014 "pand %2, %%mm0 \n\t"
01015 "pand %2, %%mm1 \n\t"
01016 "pand %2, %%mm4 \n\t"
01017 "pand %2, %%mm5 \n\t"
01018 "pand %3, %%mm2 \n\t"
01019 "pand %3, %%mm3 \n\t"
01020 "pand %3, %%mm6 \n\t"
01021 "pand %3, %%mm7 \n\t"
01022 "por %%mm2, %%mm0 \n\t"
01023 "por %%mm3, %%mm1 \n\t"
01024 "por %%mm6, %%mm4 \n\t"
01025 "por %%mm7, %%mm5 \n\t"
01026
01027 "movq %%mm1, %%mm2 \n\t"
01028 "movq %%mm4, %%mm3 \n\t"
01029 "psllq $48, %%mm2 \n\t"
01030 "psllq $32, %%mm3 \n\t"
01031 "pand %4, %%mm2 \n\t"
01032 "pand %5, %%mm3 \n\t"
01033 "por %%mm2, %%mm0 \n\t"
01034 "psrlq $16, %%mm1 \n\t"
01035 "psrlq $32, %%mm4 \n\t"
01036 "psllq $16, %%mm5 \n\t"
01037 "por %%mm3, %%mm1 \n\t"
01038 "pand %6, %%mm5 \n\t"
01039 "por %%mm5, %%mm4 \n\t"
01040
01041 MOVNTQ" %%mm0, %0 \n\t"
01042 MOVNTQ" %%mm1, 8%0 \n\t"
01043 MOVNTQ" %%mm4, 16%0"
01044
01045 :"=m"(*d)
01046 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
01047 :"memory");
01048 d += 24;
01049 s += 8;
01050 }
01051 __asm__ volatile(SFENCE:::"memory");
01052 __asm__ volatile(EMMS:::"memory");
01053 #endif
01054 while (s < end)
01055 {
01056 register uint16_t bgr;
01057 bgr = *s++;
01058 *d++ = (bgr&0x1F)<<3;
01059 *d++ = (bgr&0x3E0)>>2;
01060 *d++ = (bgr&0x7C00)>>7;
01061 }
01062 }
01063
01064 static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
01065 {
01066 const uint16_t *end;
01067 #if HAVE_MMX
01068 const uint16_t *mm_end;
01069 #endif
01070 uint8_t *d = (uint8_t *)dst;
01071 const uint16_t *s = (const uint16_t *)src;
01072 end = s + src_size/2;
01073 #if HAVE_MMX
01074 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
01075 mm_end = end - 7;
01076 while (s < mm_end)
01077 {
01078 __asm__ volatile(
01079 PREFETCH" 32%1 \n\t"
01080 "movq %1, %%mm0 \n\t"
01081 "movq %1, %%mm1 \n\t"
01082 "movq %1, %%mm2 \n\t"
01083 "pand %2, %%mm0 \n\t"
01084 "pand %3, %%mm1 \n\t"
01085 "pand %4, %%mm2 \n\t"
01086 "psllq $3, %%mm0 \n\t"
01087 "psrlq $3, %%mm1 \n\t"
01088 "psrlq $8, %%mm2 \n\t"
01089 "movq %%mm0, %%mm3 \n\t"
01090 "movq %%mm1, %%mm4 \n\t"
01091 "movq %%mm2, %%mm5 \n\t"
01092 "punpcklwd %5, %%mm0 \n\t"
01093 "punpcklwd %5, %%mm1 \n\t"
01094 "punpcklwd %5, %%mm2 \n\t"
01095 "punpckhwd %5, %%mm3 \n\t"
01096 "punpckhwd %5, %%mm4 \n\t"
01097 "punpckhwd %5, %%mm5 \n\t"
01098 "psllq $8, %%mm1 \n\t"
01099 "psllq $16, %%mm2 \n\t"
01100 "por %%mm1, %%mm0 \n\t"
01101 "por %%mm2, %%mm0 \n\t"
01102 "psllq $8, %%mm4 \n\t"
01103 "psllq $16, %%mm5 \n\t"
01104 "por %%mm4, %%mm3 \n\t"
01105 "por %%mm5, %%mm3 \n\t"
01106
01107 "movq %%mm0, %%mm6 \n\t"
01108 "movq %%mm3, %%mm7 \n\t"
01109
01110 "movq 8%1, %%mm0 \n\t"
01111 "movq 8%1, %%mm1 \n\t"
01112 "movq 8%1, %%mm2 \n\t"
01113 "pand %2, %%mm0 \n\t"
01114 "pand %3, %%mm1 \n\t"
01115 "pand %4, %%mm2 \n\t"
01116 "psllq $3, %%mm0 \n\t"
01117 "psrlq $3, %%mm1 \n\t"
01118 "psrlq $8, %%mm2 \n\t"
01119 "movq %%mm0, %%mm3 \n\t"
01120 "movq %%mm1, %%mm4 \n\t"
01121 "movq %%mm2, %%mm5 \n\t"
01122 "punpcklwd %5, %%mm0 \n\t"
01123 "punpcklwd %5, %%mm1 \n\t"
01124 "punpcklwd %5, %%mm2 \n\t"
01125 "punpckhwd %5, %%mm3 \n\t"
01126 "punpckhwd %5, %%mm4 \n\t"
01127 "punpckhwd %5, %%mm5 \n\t"
01128 "psllq $8, %%mm1 \n\t"
01129 "psllq $16, %%mm2 \n\t"
01130 "por %%mm1, %%mm0 \n\t"
01131 "por %%mm2, %%mm0 \n\t"
01132 "psllq $8, %%mm4 \n\t"
01133 "psllq $16, %%mm5 \n\t"
01134 "por %%mm4, %%mm3 \n\t"
01135 "por %%mm5, %%mm3 \n\t"
01136 :"=m"(*d)
01137 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
01138 :"memory");
01139
01140 __asm__ volatile(
01141 "movq %%mm0, %%mm4 \n\t"
01142 "movq %%mm3, %%mm5 \n\t"
01143 "movq %%mm6, %%mm0 \n\t"
01144 "movq %%mm7, %%mm1 \n\t"
01145
01146 "movq %%mm4, %%mm6 \n\t"
01147 "movq %%mm5, %%mm7 \n\t"
01148 "movq %%mm0, %%mm2 \n\t"
01149 "movq %%mm1, %%mm3 \n\t"
01150
01151 "psrlq $8, %%mm2 \n\t"
01152 "psrlq $8, %%mm3 \n\t"
01153 "psrlq $8, %%mm6 \n\t"
01154 "psrlq $8, %%mm7 \n\t"
01155 "pand %2, %%mm0 \n\t"
01156 "pand %2, %%mm1 \n\t"
01157 "pand %2, %%mm4 \n\t"
01158 "pand %2, %%mm5 \n\t"
01159 "pand %3, %%mm2 \n\t"
01160 "pand %3, %%mm3 \n\t"
01161 "pand %3, %%mm6 \n\t"
01162 "pand %3, %%mm7 \n\t"
01163 "por %%mm2, %%mm0 \n\t"
01164 "por %%mm3, %%mm1 \n\t"
01165 "por %%mm6, %%mm4 \n\t"
01166 "por %%mm7, %%mm5 \n\t"
01167
01168 "movq %%mm1, %%mm2 \n\t"
01169 "movq %%mm4, %%mm3 \n\t"
01170 "psllq $48, %%mm2 \n\t"
01171 "psllq $32, %%mm3 \n\t"
01172 "pand %4, %%mm2 \n\t"
01173 "pand %5, %%mm3 \n\t"
01174 "por %%mm2, %%mm0 \n\t"
01175 "psrlq $16, %%mm1 \n\t"
01176 "psrlq $32, %%mm4 \n\t"
01177 "psllq $16, %%mm5 \n\t"
01178 "por %%mm3, %%mm1 \n\t"
01179 "pand %6, %%mm5 \n\t"
01180 "por %%mm5, %%mm4 \n\t"
01181
01182 MOVNTQ" %%mm0, %0 \n\t"
01183 MOVNTQ" %%mm1, 8%0 \n\t"
01184 MOVNTQ" %%mm4, 16%0"
01185
01186 :"=m"(*d)
01187 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
01188 :"memory");
01189 d += 24;
01190 s += 8;
01191 }
01192 __asm__ volatile(SFENCE:::"memory");
01193 __asm__ volatile(EMMS:::"memory");
01194 #endif
01195 while (s < end)
01196 {
01197 register uint16_t bgr;
01198 bgr = *s++;
01199 *d++ = (bgr&0x1F)<<3;
01200 *d++ = (bgr&0x7E0)>>3;
01201 *d++ = (bgr&0xF800)>>8;
01202 }
01203 }
01204
01205 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
01206 {
01207 const uint16_t *end;
01208 #if HAVE_MMX
01209 const uint16_t *mm_end;
01210 #endif
01211 uint8_t *d = dst;
01212 const uint16_t *s = (const uint16_t *)src;
01213 end = s + src_size/2;
01214 #if HAVE_MMX
01215 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
01216 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
01217 mm_end = end - 3;
01218 while (s < mm_end)
01219 {
01220 __asm__ volatile(
01221 PREFETCH" 32%1 \n\t"
01222 "movq %1, %%mm0 \n\t"
01223 "movq %1, %%mm1 \n\t"
01224 "movq %1, %%mm2 \n\t"
01225 "pand %2, %%mm0 \n\t"
01226 "pand %3, %%mm1 \n\t"
01227 "pand %4, %%mm2 \n\t"
01228 "psllq $3, %%mm0 \n\t"
01229 "psrlq $2, %%mm1 \n\t"
01230 "psrlq $7, %%mm2 \n\t"
01231 "movq %%mm0, %%mm3 \n\t"
01232 "movq %%mm1, %%mm4 \n\t"
01233 "movq %%mm2, %%mm5 \n\t"
01234 "punpcklwd %%mm7, %%mm0 \n\t"
01235 "punpcklwd %%mm7, %%mm1 \n\t"
01236 "punpcklwd %%mm7, %%mm2 \n\t"
01237 "punpckhwd %%mm7, %%mm3 \n\t"
01238 "punpckhwd %%mm7, %%mm4 \n\t"
01239 "punpckhwd %%mm7, %%mm5 \n\t"
01240 "psllq $8, %%mm1 \n\t"
01241 "psllq $16, %%mm2 \n\t"
01242 "por %%mm1, %%mm0 \n\t"
01243 "por %%mm2, %%mm0 \n\t"
01244 "psllq $8, %%mm4 \n\t"
01245 "psllq $16, %%mm5 \n\t"
01246 "por %%mm4, %%mm3 \n\t"
01247 "por %%mm5, %%mm3 \n\t"
01248 MOVNTQ" %%mm0, %0 \n\t"
01249 MOVNTQ" %%mm3, 8%0 \n\t"
01250 :"=m"(*d)
01251 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
01252 :"memory");
01253 d += 16;
01254 s += 4;
01255 }
01256 __asm__ volatile(SFENCE:::"memory");
01257 __asm__ volatile(EMMS:::"memory");
01258 #endif
01259 while (s < end)
01260 {
01261 #if 0 //slightly slower on Athlon
01262 int bgr= *s++;
01263 *((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9);
01264 #else
01265 register uint16_t bgr;
01266 bgr = *s++;
01267 #ifdef WORDS_BIGENDIAN
01268 *d++ = 255;
01269 *d++ = (bgr&0x7C00)>>7;
01270 *d++ = (bgr&0x3E0)>>2;
01271 *d++ = (bgr&0x1F)<<3;
01272 #else
01273 *d++ = (bgr&0x1F)<<3;
01274 *d++ = (bgr&0x3E0)>>2;
01275 *d++ = (bgr&0x7C00)>>7;
01276 *d++ = 255;
01277 #endif
01278
01279 #endif
01280 }
01281 }
01282
01283 static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size)
01284 {
01285 const uint16_t *end;
01286 #if HAVE_MMX
01287 const uint16_t *mm_end;
01288 #endif
01289 uint8_t *d = dst;
01290 const uint16_t *s = (const uint16_t*)src;
01291 end = s + src_size/2;
01292 #if HAVE_MMX
01293 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
01294 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
01295 mm_end = end - 3;
01296 while (s < mm_end)
01297 {
01298 __asm__ volatile(
01299 PREFETCH" 32%1 \n\t"
01300 "movq %1, %%mm0 \n\t"
01301 "movq %1, %%mm1 \n\t"
01302 "movq %1, %%mm2 \n\t"
01303 "pand %2, %%mm0 \n\t"
01304 "pand %3, %%mm1 \n\t"
01305 "pand %4, %%mm2 \n\t"
01306 "psllq $3, %%mm0 \n\t"
01307 "psrlq $3, %%mm1 \n\t"
01308 "psrlq $8, %%mm2 \n\t"
01309 "movq %%mm0, %%mm3 \n\t"
01310 "movq %%mm1, %%mm4 \n\t"
01311 "movq %%mm2, %%mm5 \n\t"
01312 "punpcklwd %%mm7, %%mm0 \n\t"
01313 "punpcklwd %%mm7, %%mm1 \n\t"
01314 "punpcklwd %%mm7, %%mm2 \n\t"
01315 "punpckhwd %%mm7, %%mm3 \n\t"
01316 "punpckhwd %%mm7, %%mm4 \n\t"
01317 "punpckhwd %%mm7, %%mm5 \n\t"
01318 "psllq $8, %%mm1 \n\t"
01319 "psllq $16, %%mm2 \n\t"
01320 "por %%mm1, %%mm0 \n\t"
01321 "por %%mm2, %%mm0 \n\t"
01322 "psllq $8, %%mm4 \n\t"
01323 "psllq $16, %%mm5 \n\t"
01324 "por %%mm4, %%mm3 \n\t"
01325 "por %%mm5, %%mm3 \n\t"
01326 MOVNTQ" %%mm0, %0 \n\t"
01327 MOVNTQ" %%mm3, 8%0 \n\t"
01328 :"=m"(*d)
01329 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
01330 :"memory");
01331 d += 16;
01332 s += 4;
01333 }
01334 __asm__ volatile(SFENCE:::"memory");
01335 __asm__ volatile(EMMS:::"memory");
01336 #endif
01337 while (s < end)
01338 {
01339 register uint16_t bgr;
01340 bgr = *s++;
01341 #ifdef WORDS_BIGENDIAN
01342 *d++ = 255;
01343 *d++ = (bgr&0xF800)>>8;
01344 *d++ = (bgr&0x7E0)>>3;
01345 *d++ = (bgr&0x1F)<<3;
01346 #else
01347 *d++ = (bgr&0x1F)<<3;
01348 *d++ = (bgr&0x7E0)>>3;
01349 *d++ = (bgr&0xF800)>>8;
01350 *d++ = 255;
01351 #endif
01352 }
01353 }
01354
01355 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
01356 {
01357 long idx = 15 - src_size;
01358 const uint8_t *s = src-idx;
01359 uint8_t *d = dst-idx;
01360 #if HAVE_MMX
01361 __asm__ volatile(
01362 "test %0, %0 \n\t"
01363 "jns 2f \n\t"
01364 PREFETCH" (%1, %0) \n\t"
01365 "movq %3, %%mm7 \n\t"
01366 "pxor %4, %%mm7 \n\t"
01367 "movq %%mm7, %%mm6 \n\t"
01368 "pxor %5, %%mm7 \n\t"
01369 ASMALIGN(4)
01370 "1: \n\t"
01371 PREFETCH" 32(%1, %0) \n\t"
01372 "movq (%1, %0), %%mm0 \n\t"
01373 "movq 8(%1, %0), %%mm1 \n\t"
01374 # if HAVE_MMX2
01375 "pshufw $177, %%mm0, %%mm3 \n\t"
01376 "pshufw $177, %%mm1, %%mm5 \n\t"
01377 "pand %%mm7, %%mm0 \n\t"
01378 "pand %%mm6, %%mm3 \n\t"
01379 "pand %%mm7, %%mm1 \n\t"
01380 "pand %%mm6, %%mm5 \n\t"
01381 "por %%mm3, %%mm0 \n\t"
01382 "por %%mm5, %%mm1 \n\t"
01383 # else
01384 "movq %%mm0, %%mm2 \n\t"
01385 "movq %%mm1, %%mm4 \n\t"
01386 "pand %%mm7, %%mm0 \n\t"
01387 "pand %%mm6, %%mm2 \n\t"
01388 "pand %%mm7, %%mm1 \n\t"
01389 "pand %%mm6, %%mm4 \n\t"
01390 "movq %%mm2, %%mm3 \n\t"
01391 "movq %%mm4, %%mm5 \n\t"
01392 "pslld $16, %%mm2 \n\t"
01393 "psrld $16, %%mm3 \n\t"
01394 "pslld $16, %%mm4 \n\t"
01395 "psrld $16, %%mm5 \n\t"
01396 "por %%mm2, %%mm0 \n\t"
01397 "por %%mm4, %%mm1 \n\t"
01398 "por %%mm3, %%mm0 \n\t"
01399 "por %%mm5, %%mm1 \n\t"
01400 # endif
01401 MOVNTQ" %%mm0, (%2, %0) \n\t"
01402 MOVNTQ" %%mm1, 8(%2, %0) \n\t"
01403 "add $16, %0 \n\t"
01404 "js 1b \n\t"
01405 SFENCE" \n\t"
01406 EMMS" \n\t"
01407 "2: \n\t"
01408 : "+&r"(idx)
01409 : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
01410 : "memory");
01411 #endif
01412 for (; idx<15; idx+=4) {
01413 register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
01414 v &= 0xff00ff;
01415 *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
01416 }
01417 }
01418
01419 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
01420 {
01421 unsigned i;
01422 #if HAVE_MMX
01423 long mmx_size= 23 - src_size;
01424 __asm__ volatile (
01425 "test %%"REG_a", %%"REG_a" \n\t"
01426 "jns 2f \n\t"
01427 "movq "MANGLE(mask24r)", %%mm5 \n\t"
01428 "movq "MANGLE(mask24g)", %%mm6 \n\t"
01429 "movq "MANGLE(mask24b)", %%mm7 \n\t"
01430 ASMALIGN(4)
01431 "1: \n\t"
01432 PREFETCH" 32(%1, %%"REG_a") \n\t"
01433 "movq (%1, %%"REG_a"), %%mm0 \n\t"
01434 "movq (%1, %%"REG_a"), %%mm1 \n\t"
01435 "movq 2(%1, %%"REG_a"), %%mm2 \n\t"
01436 "psllq $16, %%mm0 \n\t"
01437 "pand %%mm5, %%mm0 \n\t"
01438 "pand %%mm6, %%mm1 \n\t"
01439 "pand %%mm7, %%mm2 \n\t"
01440 "por %%mm0, %%mm1 \n\t"
01441 "por %%mm2, %%mm1 \n\t"
01442 "movq 6(%1, %%"REG_a"), %%mm0 \n\t"
01443 MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t"
01444 "movq 8(%1, %%"REG_a"), %%mm1 \n\t"
01445 "movq 10(%1, %%"REG_a"), %%mm2 \n\t"
01446 "pand %%mm7, %%mm0 \n\t"
01447 "pand %%mm5, %%mm1 \n\t"
01448 "pand %%mm6, %%mm2 \n\t"
01449 "por %%mm0, %%mm1 \n\t"
01450 "por %%mm2, %%mm1 \n\t"
01451 "movq 14(%1, %%"REG_a"), %%mm0 \n\t"
01452 MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t"
01453 "movq 16(%1, %%"REG_a"), %%mm1 \n\t"
01454 "movq 18(%1, %%"REG_a"), %%mm2 \n\t"
01455 "pand %%mm6, %%mm0 \n\t"
01456 "pand %%mm7, %%mm1 \n\t"
01457 "pand %%mm5, %%mm2 \n\t"
01458 "por %%mm0, %%mm1 \n\t"
01459 "por %%mm2, %%mm1 \n\t"
01460 MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t"
01461 "add $24, %%"REG_a" \n\t"
01462 " js 1b \n\t"
01463 "2: \n\t"
01464 : "+a" (mmx_size)
01465 : "r" (src-mmx_size), "r"(dst-mmx_size)
01466 );
01467
01468 __asm__ volatile(SFENCE:::"memory");
01469 __asm__ volatile(EMMS:::"memory");
01470
01471 if (mmx_size==23) return;
01472
01473 src+= src_size;
01474 dst+= src_size;
01475 src_size= 23-mmx_size;
01476 src-= src_size;
01477 dst-= src_size;
01478 #endif
01479 for (i=0; i<src_size; i+=3)
01480 {
01481 register uint8_t x;
01482 x = src[i + 2];
01483 dst[i + 1] = src[i + 1];
01484 dst[i + 2] = src[i + 0];
01485 dst[i + 0] = x;
01486 }
01487 }
01488
01489 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01490 long width, long height,
01491 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
01492 {
01493 long y;
01494 const long chromWidth= width>>1;
01495 for (y=0; y<height; y++)
01496 {
01497 #if HAVE_MMX
01498
01499 __asm__ volatile(
01500 "xor %%"REG_a", %%"REG_a" \n\t"
01501 ASMALIGN(4)
01502 "1: \n\t"
01503 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
01504 PREFETCH" 32(%2, %%"REG_a") \n\t"
01505 PREFETCH" 32(%3, %%"REG_a") \n\t"
01506 "movq (%2, %%"REG_a"), %%mm0 \n\t"
01507 "movq %%mm0, %%mm2 \n\t"
01508 "movq (%3, %%"REG_a"), %%mm1 \n\t"
01509 "punpcklbw %%mm1, %%mm0 \n\t"
01510 "punpckhbw %%mm1, %%mm2 \n\t"
01511
01512 "movq (%1, %%"REG_a",2), %%mm3 \n\t"
01513 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t"
01514 "movq %%mm3, %%mm4 \n\t"
01515 "movq %%mm5, %%mm6 \n\t"
01516 "punpcklbw %%mm0, %%mm3 \n\t"
01517 "punpckhbw %%mm0, %%mm4 \n\t"
01518 "punpcklbw %%mm2, %%mm5 \n\t"
01519 "punpckhbw %%mm2, %%mm6 \n\t"
01520
01521 MOVNTQ" %%mm3, (%0, %%"REG_a", 4) \n\t"
01522 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t"
01523 MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4) \n\t"
01524 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
01525
01526 "add $8, %%"REG_a" \n\t"
01527 "cmp %4, %%"REG_a" \n\t"
01528 " jb 1b \n\t"
01529 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
01530 : "%"REG_a
01531 );
01532 #else
01533
01534 #if ARCH_ALPHA && HAVE_MVI
01535 #define pl2yuy2(n) \
01536 y1 = yc[n]; \
01537 y2 = yc2[n]; \
01538 u = uc[n]; \
01539 v = vc[n]; \
01540 __asm__("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \
01541 __asm__("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \
01542 __asm__("unpkbl %1, %0" : "=r"(u) : "r"(u)); \
01543 __asm__("unpkbl %1, %0" : "=r"(v) : "r"(v)); \
01544 yuv1 = (u << 8) + (v << 24); \
01545 yuv2 = yuv1 + y2; \
01546 yuv1 += y1; \
01547 qdst[n] = yuv1; \
01548 qdst2[n] = yuv2;
01549
01550 int i;
01551 uint64_t *qdst = (uint64_t *) dst;
01552 uint64_t *qdst2 = (uint64_t *) (dst + dstStride);
01553 const uint32_t *yc = (uint32_t *) ysrc;
01554 const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride);
01555 const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc;
01556 for (i = 0; i < chromWidth; i += 8){
01557 uint64_t y1, y2, yuv1, yuv2;
01558 uint64_t u, v;
01559
01560 __asm__("ldq $31,64(%0)" :: "r"(yc));
01561 __asm__("ldq $31,64(%0)" :: "r"(yc2));
01562 __asm__("ldq $31,64(%0)" :: "r"(uc));
01563 __asm__("ldq $31,64(%0)" :: "r"(vc));
01564
01565 pl2yuy2(0);
01566 pl2yuy2(1);
01567 pl2yuy2(2);
01568 pl2yuy2(3);
01569
01570 yc += 4;
01571 yc2 += 4;
01572 uc += 4;
01573 vc += 4;
01574 qdst += 4;
01575 qdst2 += 4;
01576 }
01577 y++;
01578 ysrc += lumStride;
01579 dst += dstStride;
01580
01581 #elif HAVE_FAST_64BIT
01582 int i;
01583 uint64_t *ldst = (uint64_t *) dst;
01584 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01585 for (i = 0; i < chromWidth; i += 2){
01586 uint64_t k, l;
01587 k = yc[0] + (uc[0] << 8) +
01588 (yc[1] << 16) + (vc[0] << 24);
01589 l = yc[2] + (uc[1] << 8) +
01590 (yc[3] << 16) + (vc[1] << 24);
01591 *ldst++ = k + (l << 32);
01592 yc += 4;
01593 uc += 2;
01594 vc += 2;
01595 }
01596
01597 #else
01598 int i, *idst = (int32_t *) dst;
01599 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01600 for (i = 0; i < chromWidth; i++){
01601 #ifdef WORDS_BIGENDIAN
01602 *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
01603 (yc[1] << 8) + (vc[0] << 0);
01604 #else
01605 *idst++ = yc[0] + (uc[0] << 8) +
01606 (yc[1] << 16) + (vc[0] << 24);
01607 #endif
01608 yc += 2;
01609 uc++;
01610 vc++;
01611 }
01612 #endif
01613 #endif
01614 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1)
01615 {
01616 usrc += chromStride;
01617 vsrc += chromStride;
01618 }
01619 ysrc += lumStride;
01620 dst += dstStride;
01621 }
01622 #if HAVE_MMX
01623 __asm__( EMMS" \n\t"
01624 SFENCE" \n\t"
01625 :::"memory");
01626 #endif
01627 }
01628
01633 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01634 long width, long height,
01635 long lumStride, long chromStride, long dstStride)
01636 {
01637
01638 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
01639 }
01640
01641 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01642 long width, long height,
01643 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
01644 {
01645 long y;
01646 const long chromWidth= width>>1;
01647 for (y=0; y<height; y++)
01648 {
01649 #if HAVE_MMX
01650
01651 __asm__ volatile(
01652 "xor %%"REG_a", %%"REG_a" \n\t"
01653 ASMALIGN(4)
01654 "1: \n\t"
01655 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
01656 PREFETCH" 32(%2, %%"REG_a") \n\t"
01657 PREFETCH" 32(%3, %%"REG_a") \n\t"
01658 "movq (%2, %%"REG_a"), %%mm0 \n\t"
01659 "movq %%mm0, %%mm2 \n\t"
01660 "movq (%3, %%"REG_a"), %%mm1 \n\t"
01661 "punpcklbw %%mm1, %%mm0 \n\t"
01662 "punpckhbw %%mm1, %%mm2 \n\t"
01663
01664 "movq (%1, %%"REG_a",2), %%mm3 \n\t"
01665 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t"
01666 "movq %%mm0, %%mm4 \n\t"
01667 "movq %%mm2, %%mm6 \n\t"
01668 "punpcklbw %%mm3, %%mm0 \n\t"
01669 "punpckhbw %%mm3, %%mm4 \n\t"
01670 "punpcklbw %%mm5, %%mm2 \n\t"
01671 "punpckhbw %%mm5, %%mm6 \n\t"
01672
01673 MOVNTQ" %%mm0, (%0, %%"REG_a", 4) \n\t"
01674 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t"
01675 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4) \n\t"
01676 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
01677
01678 "add $8, %%"REG_a" \n\t"
01679 "cmp %4, %%"REG_a" \n\t"
01680 " jb 1b \n\t"
01681 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
01682 : "%"REG_a
01683 );
01684 #else
01685
01686
01687 #if HAVE_FAST_64BIT
01688 int i;
01689 uint64_t *ldst = (uint64_t *) dst;
01690 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01691 for (i = 0; i < chromWidth; i += 2){
01692 uint64_t k, l;
01693 k = uc[0] + (yc[0] << 8) +
01694 (vc[0] << 16) + (yc[1] << 24);
01695 l = uc[1] + (yc[2] << 8) +
01696 (vc[1] << 16) + (yc[3] << 24);
01697 *ldst++ = k + (l << 32);
01698 yc += 4;
01699 uc += 2;
01700 vc += 2;
01701 }
01702
01703 #else
01704 int i, *idst = (int32_t *) dst;
01705 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01706 for (i = 0; i < chromWidth; i++){
01707 #ifdef WORDS_BIGENDIAN
01708 *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
01709 (vc[0] << 8) + (yc[1] << 0);
01710 #else
01711 *idst++ = uc[0] + (yc[0] << 8) +
01712 (vc[0] << 16) + (yc[1] << 24);
01713 #endif
01714 yc += 2;
01715 uc++;
01716 vc++;
01717 }
01718 #endif
01719 #endif
01720 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1)
01721 {
01722 usrc += chromStride;
01723 vsrc += chromStride;
01724 }
01725 ysrc += lumStride;
01726 dst += dstStride;
01727 }
01728 #if HAVE_MMX
01729 __asm__( EMMS" \n\t"
01730 SFENCE" \n\t"
01731 :::"memory");
01732 #endif
01733 }
01734
01739 static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01740 long width, long height,
01741 long lumStride, long chromStride, long dstStride)
01742 {
01743
01744 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
01745 }
01746
01750 static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01751 long width, long height,
01752 long lumStride, long chromStride, long dstStride)
01753 {
01754 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
01755 }
01756
01760 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01761 long width, long height,
01762 long lumStride, long chromStride, long dstStride)
01763 {
01764 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
01765 }
01766
01771 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01772 long width, long height,
01773 long lumStride, long chromStride, long srcStride)
01774 {
01775 long y;
01776 const long chromWidth= width>>1;
01777 for (y=0; y<height; y+=2)
01778 {
01779 #if HAVE_MMX
01780 __asm__ volatile(
01781 "xor %%"REG_a", %%"REG_a" \n\t"
01782 "pcmpeqw %%mm7, %%mm7 \n\t"
01783 "psrlw $8, %%mm7 \n\t"
01784 ASMALIGN(4)
01785 "1: \n\t"
01786 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01787 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01788 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01789 "movq %%mm0, %%mm2 \n\t"
01790 "movq %%mm1, %%mm3 \n\t"
01791 "psrlw $8, %%mm0 \n\t"
01792 "psrlw $8, %%mm1 \n\t"
01793 "pand %%mm7, %%mm2 \n\t"
01794 "pand %%mm7, %%mm3 \n\t"
01795 "packuswb %%mm1, %%mm0 \n\t"
01796 "packuswb %%mm3, %%mm2 \n\t"
01797
01798 MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t"
01799
01800 "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t"
01801 "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t"
01802 "movq %%mm1, %%mm3 \n\t"
01803 "movq %%mm2, %%mm4 \n\t"
01804 "psrlw $8, %%mm1 \n\t"
01805 "psrlw $8, %%mm2 \n\t"
01806 "pand %%mm7, %%mm3 \n\t"
01807 "pand %%mm7, %%mm4 \n\t"
01808 "packuswb %%mm2, %%mm1 \n\t"
01809 "packuswb %%mm4, %%mm3 \n\t"
01810
01811 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t"
01812
01813 "movq %%mm0, %%mm2 \n\t"
01814 "movq %%mm1, %%mm3 \n\t"
01815 "psrlw $8, %%mm0 \n\t"
01816 "psrlw $8, %%mm1 \n\t"
01817 "pand %%mm7, %%mm2 \n\t"
01818 "pand %%mm7, %%mm3 \n\t"
01819 "packuswb %%mm1, %%mm0 \n\t"
01820 "packuswb %%mm3, %%mm2 \n\t"
01821
01822 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t"
01823 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
01824
01825 "add $8, %%"REG_a" \n\t"
01826 "cmp %4, %%"REG_a" \n\t"
01827 " jb 1b \n\t"
01828 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01829 : "memory", "%"REG_a
01830 );
01831
01832 ydst += lumStride;
01833 src += srcStride;
01834
01835 __asm__ volatile(
01836 "xor %%"REG_a", %%"REG_a" \n\t"
01837 ASMALIGN(4)
01838 "1: \n\t"
01839 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01840 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01841 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01842 "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t"
01843 "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t"
01844 "pand %%mm7, %%mm0 \n\t"
01845 "pand %%mm7, %%mm1 \n\t"
01846 "pand %%mm7, %%mm2 \n\t"
01847 "pand %%mm7, %%mm3 \n\t"
01848 "packuswb %%mm1, %%mm0 \n\t"
01849 "packuswb %%mm3, %%mm2 \n\t"
01850
01851 MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t"
01852 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t"
01853
01854 "add $8, %%"REG_a" \n\t"
01855 "cmp %4, %%"REG_a" \n\t"
01856 " jb 1b \n\t"
01857
01858 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01859 : "memory", "%"REG_a
01860 );
01861 #else
01862 long i;
01863 for (i=0; i<chromWidth; i++)
01864 {
01865 ydst[2*i+0] = src[4*i+0];
01866 udst[i] = src[4*i+1];
01867 ydst[2*i+1] = src[4*i+2];
01868 vdst[i] = src[4*i+3];
01869 }
01870 ydst += lumStride;
01871 src += srcStride;
01872
01873 for (i=0; i<chromWidth; i++)
01874 {
01875 ydst[2*i+0] = src[4*i+0];
01876 ydst[2*i+1] = src[4*i+2];
01877 }
01878 #endif
01879 udst += chromStride;
01880 vdst += chromStride;
01881 ydst += lumStride;
01882 src += srcStride;
01883 }
01884 #if HAVE_MMX
01885 __asm__ volatile( EMMS" \n\t"
01886 SFENCE" \n\t"
01887 :::"memory");
01888 #endif
01889 }
01890
01891 static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc,
01892 uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01893 long width, long height, long lumStride, long chromStride)
01894 {
01895
01896 memcpy(ydst, ysrc, width*height);
01897
01898
01899 }
01900
01901 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
01902 {
01903 long x,y;
01904
01905 dst[0]= src[0];
01906
01907
01908 for (x=0; x<srcWidth-1; x++){
01909 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
01910 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
01911 }
01912 dst[2*srcWidth-1]= src[srcWidth-1];
01913
01914 dst+= dstStride;
01915
01916 for (y=1; y<srcHeight; y++){
01917 #if HAVE_MMX2 || HAVE_AMD3DNOW
01918 const long mmxSize= srcWidth&~15;
01919 __asm__ volatile(
01920 "mov %4, %%"REG_a" \n\t"
01921 "1: \n\t"
01922 "movq (%0, %%"REG_a"), %%mm0 \n\t"
01923 "movq (%1, %%"REG_a"), %%mm1 \n\t"
01924 "movq 1(%0, %%"REG_a"), %%mm2 \n\t"
01925 "movq 1(%1, %%"REG_a"), %%mm3 \n\t"
01926 "movq -1(%0, %%"REG_a"), %%mm4 \n\t"
01927 "movq -1(%1, %%"REG_a"), %%mm5 \n\t"
01928 PAVGB" %%mm0, %%mm5 \n\t"
01929 PAVGB" %%mm0, %%mm3 \n\t"
01930 PAVGB" %%mm0, %%mm5 \n\t"
01931 PAVGB" %%mm0, %%mm3 \n\t"
01932 PAVGB" %%mm1, %%mm4 \n\t"
01933 PAVGB" %%mm1, %%mm2 \n\t"
01934 PAVGB" %%mm1, %%mm4 \n\t"
01935 PAVGB" %%mm1, %%mm2 \n\t"
01936 "movq %%mm5, %%mm7 \n\t"
01937 "movq %%mm4, %%mm6 \n\t"
01938 "punpcklbw %%mm3, %%mm5 \n\t"
01939 "punpckhbw %%mm3, %%mm7 \n\t"
01940 "punpcklbw %%mm2, %%mm4 \n\t"
01941 "punpckhbw %%mm2, %%mm6 \n\t"
01942 #if 1
01943 MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t"
01944 MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t"
01945 MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t"
01946 MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t"
01947 #else
01948 "movq %%mm5, (%2, %%"REG_a", 2) \n\t"
01949 "movq %%mm7, 8(%2, %%"REG_a", 2) \n\t"
01950 "movq %%mm4, (%3, %%"REG_a", 2) \n\t"
01951 "movq %%mm6, 8(%3, %%"REG_a", 2) \n\t"
01952 #endif
01953 "add $8, %%"REG_a" \n\t"
01954 " js 1b \n\t"
01955 :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ),
01956 "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
01957 "g" (-mmxSize)
01958 : "%"REG_a
01959
01960 );
01961 #else
01962 const long mmxSize=1;
01963 #endif
01964 dst[0 ]= (3*src[0] + src[srcStride])>>2;
01965 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
01966
01967 for (x=mmxSize-1; x<srcWidth-1; x++){
01968 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
01969 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
01970 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;
01971 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2;
01972 }
01973 dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2;
01974 dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
01975
01976 dst+=dstStride*2;
01977 src+=srcStride;
01978 }
01979
01980
01981 #if 1
01982 dst[0]= src[0];
01983
01984 for (x=0; x<srcWidth-1; x++){
01985 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
01986 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
01987 }
01988 dst[2*srcWidth-1]= src[srcWidth-1];
01989 #else
01990 for (x=0; x<srcWidth; x++){
01991 dst[2*x+0]=
01992 dst[2*x+1]= src[x];
01993 }
01994 #endif
01995
01996 #if HAVE_MMX
01997 __asm__ volatile( EMMS" \n\t"
01998 SFENCE" \n\t"
01999 :::"memory");
02000 #endif
02001 }
02002
02009 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
02010 long width, long height,
02011 long lumStride, long chromStride, long srcStride)
02012 {
02013 long y;
02014 const long chromWidth= width>>1;
02015 for (y=0; y<height; y+=2)
02016 {
02017 #if HAVE_MMX
02018 __asm__ volatile(
02019 "xor %%"REG_a", %%"REG_a" \n\t"
02020 "pcmpeqw %%mm7, %%mm7 \n\t"
02021 "psrlw $8, %%mm7 \n\t"
02022 ASMALIGN(4)
02023 "1: \n\t"
02024 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
02025 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
02026 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
02027 "movq %%mm0, %%mm2 \n\t"
02028 "movq %%mm1, %%mm3 \n\t"
02029 "pand %%mm7, %%mm0 \n\t"
02030 "pand %%mm7, %%mm1 \n\t"
02031 "psrlw $8, %%mm2 \n\t"
02032 "psrlw $8, %%mm3 \n\t"
02033 "packuswb %%mm1, %%mm0 \n\t"
02034 "packuswb %%mm3, %%mm2 \n\t"
02035
02036 MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t"
02037
02038 "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t"
02039 "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t"
02040 "movq %%mm1, %%mm3 \n\t"
02041 "movq %%mm2, %%mm4 \n\t"
02042 "pand %%mm7, %%mm1 \n\t"
02043 "pand %%mm7, %%mm2 \n\t"
02044 "psrlw $8, %%mm3 \n\t"
02045 "psrlw $8, %%mm4 \n\t"
02046 "packuswb %%mm2, %%mm1 \n\t"
02047 "packuswb %%mm4, %%mm3 \n\t"
02048
02049 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t"
02050
02051 "movq %%mm0, %%mm2 \n\t"
02052 "movq %%mm1, %%mm3 \n\t"
02053 "psrlw $8, %%mm0 \n\t"
02054 "psrlw $8, %%mm1 \n\t"
02055 "pand %%mm7, %%mm2 \n\t"
02056 "pand %%mm7, %%mm3 \n\t"
02057 "packuswb %%mm1, %%mm0 \n\t"
02058 "packuswb %%mm3, %%mm2 \n\t"
02059
02060 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t"
02061 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
02062
02063 "add $8, %%"REG_a" \n\t"
02064 "cmp %4, %%"REG_a" \n\t"
02065 " jb 1b \n\t"
02066 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
02067 : "memory", "%"REG_a
02068 );
02069
02070 ydst += lumStride;
02071 src += srcStride;
02072
02073 __asm__ volatile(
02074 "xor %%"REG_a", %%"REG_a" \n\t"
02075 ASMALIGN(4)
02076 "1: \n\t"
02077 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
02078 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
02079 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
02080 "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t"
02081 "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t"
02082 "psrlw $8, %%mm0 \n\t"
02083 "psrlw $8, %%mm1 \n\t"
02084 "psrlw $8, %%mm2 \n\t"
02085 "psrlw $8, %%mm3 \n\t"
02086 "packuswb %%mm1, %%mm0 \n\t"
02087 "packuswb %%mm3, %%mm2 \n\t"
02088
02089 MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t"
02090 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t"
02091
02092 "add $8, %%"REG_a" \n\t"
02093 "cmp %4, %%"REG_a" \n\t"
02094 " jb 1b \n\t"
02095
02096 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
02097 : "memory", "%"REG_a
02098 );
02099 #else
02100 long i;
02101 for (i=0; i<chromWidth; i++)
02102 {
02103 udst[i] = src[4*i+0];
02104 ydst[2*i+0] = src[4*i+1];
02105 vdst[i] = src[4*i+2];
02106 ydst[2*i+1] = src[4*i+3];
02107 }
02108 ydst += lumStride;
02109 src += srcStride;
02110
02111 for (i=0; i<chromWidth; i++)
02112 {
02113 ydst[2*i+0] = src[4*i+1];
02114 ydst[2*i+1] = src[4*i+3];
02115 }
02116 #endif
02117 udst += chromStride;
02118 vdst += chromStride;
02119 ydst += lumStride;
02120 src += srcStride;
02121 }
02122 #if HAVE_MMX
02123 __asm__ volatile( EMMS" \n\t"
02124 SFENCE" \n\t"
02125 :::"memory");
02126 #endif
02127 }
02128
02136 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
02137 long width, long height,
02138 long lumStride, long chromStride, long srcStride)
02139 {
02140 long y;
02141 const long chromWidth= width>>1;
02142 #if HAVE_MMX
02143 for (y=0; y<height-2; y+=2)
02144 {
02145 long i;
02146 for (i=0; i<2; i++)
02147 {
02148 __asm__ volatile(
02149 "mov %2, %%"REG_a" \n\t"
02150 "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t"
02151 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
02152 "pxor %%mm7, %%mm7 \n\t"
02153 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
02154 ASMALIGN(4)
02155 "1: \n\t"
02156 PREFETCH" 64(%0, %%"REG_d") \n\t"
02157 "movd (%0, %%"REG_d"), %%mm0 \n\t"
02158 "movd 3(%0, %%"REG_d"), %%mm1 \n\t"
02159 "punpcklbw %%mm7, %%mm0 \n\t"
02160 "punpcklbw %%mm7, %%mm1 \n\t"
02161 "movd 6(%0, %%"REG_d"), %%mm2 \n\t"
02162 "movd 9(%0, %%"REG_d"), %%mm3 \n\t"
02163 "punpcklbw %%mm7, %%mm2 \n\t"
02164 "punpcklbw %%mm7, %%mm3 \n\t"
02165 "pmaddwd %%mm6, %%mm0 \n\t"
02166 "pmaddwd %%mm6, %%mm1 \n\t"
02167 "pmaddwd %%mm6, %%mm2 \n\t"
02168 "pmaddwd %%mm6, %%mm3 \n\t"
02169 #ifndef FAST_BGR2YV12
02170 "psrad $8, %%mm0 \n\t"
02171 "psrad $8, %%mm1 \n\t"
02172 "psrad $8, %%mm2 \n\t"
02173 "psrad $8, %%mm3 \n\t"
02174 #endif
02175 "packssdw %%mm1, %%mm0 \n\t"
02176 "packssdw %%mm3, %%mm2 \n\t"
02177 "pmaddwd %%mm5, %%mm0 \n\t"
02178 "pmaddwd %%mm5, %%mm2 \n\t"
02179 "packssdw %%mm2, %%mm0 \n\t"
02180 "psraw $7, %%mm0 \n\t"
02181
02182 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
02183 "movd 15(%0, %%"REG_d"), %%mm1 \n\t"
02184 "punpcklbw %%mm7, %%mm4 \n\t"
02185 "punpcklbw %%mm7, %%mm1 \n\t"
02186 "movd 18(%0, %%"REG_d"), %%mm2 \n\t"
02187 "movd 21(%0, %%"REG_d"), %%mm3 \n\t"
02188 "punpcklbw %%mm7, %%mm2 \n\t"
02189 "punpcklbw %%mm7, %%mm3 \n\t"
02190 "pmaddwd %%mm6, %%mm4 \n\t"
02191 "pmaddwd %%mm6, %%mm1 \n\t"
02192 "pmaddwd %%mm6, %%mm2 \n\t"
02193 "pmaddwd %%mm6, %%mm3 \n\t"
02194 #ifndef FAST_BGR2YV12
02195 "psrad $8, %%mm4 \n\t"
02196 "psrad $8, %%mm1 \n\t"
02197 "psrad $8, %%mm2 \n\t"
02198 "psrad $8, %%mm3 \n\t"
02199 #endif
02200 "packssdw %%mm1, %%mm4 \n\t"
02201 "packssdw %%mm3, %%mm2 \n\t"
02202 "pmaddwd %%mm5, %%mm4 \n\t"
02203 "pmaddwd %%mm5, %%mm2 \n\t"
02204 "add $24, %%"REG_d" \n\t"
02205 "packssdw %%mm2, %%mm4 \n\t"
02206 "psraw $7, %%mm4 \n\t"
02207
02208 "packuswb %%mm4, %%mm0 \n\t"
02209 "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t"
02210
02211 MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t"
02212 "add $8, %%"REG_a" \n\t"
02213 " js 1b \n\t"
02214 : : "r" (src+width*3), "r" (ydst+width), "g" (-width)
02215 : "%"REG_a, "%"REG_d
02216 );
02217 ydst += lumStride;
02218 src += srcStride;
02219 }
02220 src -= srcStride*2;
02221 __asm__ volatile(
02222 "mov %4, %%"REG_a" \n\t"
02223 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
02224 "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t"
02225 "pxor %%mm7, %%mm7 \n\t"
02226 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
02227 "add %%"REG_d", %%"REG_d" \n\t"
02228 ASMALIGN(4)
02229 "1: \n\t"
02230 PREFETCH" 64(%0, %%"REG_d") \n\t"
02231 PREFETCH" 64(%1, %%"REG_d") \n\t"
02232 #if HAVE_MMX2 || HAVE_AMD3DNOW
02233 "movq (%0, %%"REG_d"), %%mm0 \n\t"
02234 "movq (%1, %%"REG_d"), %%mm1 \n\t"
02235 "movq 6(%0, %%"REG_d"), %%mm2 \n\t"
02236 "movq 6(%1, %%"REG_d"), %%mm3 \n\t"
02237 PAVGB" %%mm1, %%mm0 \n\t"
02238 PAVGB" %%mm3, %%mm2 \n\t"
02239 "movq %%mm0, %%mm1 \n\t"
02240 "movq %%mm2, %%mm3 \n\t"
02241 "psrlq $24, %%mm0 \n\t"
02242 "psrlq $24, %%mm2 \n\t"
02243 PAVGB" %%mm1, %%mm0 \n\t"
02244 PAVGB" %%mm3, %%mm2 \n\t"
02245 "punpcklbw %%mm7, %%mm0 \n\t"
02246 "punpcklbw %%mm7, %%mm2 \n\t"
02247 #else
02248 "movd (%0, %%"REG_d"), %%mm0 \n\t"
02249 "movd (%1, %%"REG_d"), %%mm1 \n\t"
02250 "movd 3(%0, %%"REG_d"), %%mm2 \n\t"
02251 "movd 3(%1, %%"REG_d"), %%mm3 \n\t"
02252 "punpcklbw %%mm7, %%mm0 \n\t"
02253 "punpcklbw %%mm7, %%mm1 \n\t"
02254 "punpcklbw %%mm7, %%mm2 \n\t"
02255 "punpcklbw %%mm7, %%mm3 \n\t"
02256 "paddw %%mm1, %%mm0 \n\t"
02257 "paddw %%mm3, %%mm2 \n\t"
02258 "paddw %%mm2, %%mm0 \n\t"
02259 "movd 6(%0, %%"REG_d"), %%mm4 \n\t"
02260 "movd 6(%1, %%"REG_d"), %%mm1 \n\t"
02261 "movd 9(%0, %%"REG_d"), %%mm2 \n\t"
02262 "movd 9(%1, %%"REG_d"), %%mm3 \n\t"
02263 "punpcklbw %%mm7, %%mm4 \n\t"
02264 "punpcklbw %%mm7, %%mm1 \n\t"
02265 "punpcklbw %%mm7, %%mm2 \n\t"
02266 "punpcklbw %%mm7, %%mm3 \n\t"
02267 "paddw %%mm1, %%mm4 \n\t"
02268 "paddw %%mm3, %%mm2 \n\t"
02269 "paddw %%mm4, %%mm2 \n\t"
02270 "psrlw $2, %%mm0 \n\t"
02271 "psrlw $2, %%mm2 \n\t"
02272 #endif
02273 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
02274 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
02275
02276 "pmaddwd %%mm0, %%mm1 \n\t"
02277 "pmaddwd %%mm2, %%mm3 \n\t"
02278 "pmaddwd %%mm6, %%mm0 \n\t"
02279 "pmaddwd %%mm6, %%mm2 \n\t"
02280 #ifndef FAST_BGR2YV12
02281 "psrad $8, %%mm0 \n\t"
02282 "psrad $8, %%mm1 \n\t"
02283 "psrad $8, %%mm2 \n\t"
02284 "psrad $8, %%mm3 \n\t"
02285 #endif
02286 "packssdw %%mm2, %%mm0 \n\t"
02287 "packssdw %%mm3, %%mm1 \n\t"
02288 "pmaddwd %%mm5, %%mm0 \n\t"
02289 "pmaddwd %%mm5, %%mm1 \n\t"
02290 "packssdw %%mm1, %%mm0 \n\t"
02291 "psraw $7, %%mm0 \n\t"
02292
02293 #if HAVE_MMX2 || HAVE_AMD3DNOW
02294 "movq 12(%0, %%"REG_d"), %%mm4 \n\t"
02295 "movq 12(%1, %%"REG_d"), %%mm1 \n\t"
02296 "movq 18(%0, %%"REG_d"), %%mm2 \n\t"
02297 "movq 18(%1, %%"REG_d"), %%mm3 \n\t"
02298 PAVGB" %%mm1, %%mm4 \n\t"
02299 PAVGB" %%mm3, %%mm2 \n\t"
02300 "movq %%mm4, %%mm1 \n\t"
02301 "movq %%mm2, %%mm3 \n\t"
02302 "psrlq $24, %%mm4 \n\t"
02303 "psrlq $24, %%mm2 \n\t"
02304 PAVGB" %%mm1, %%mm4 \n\t"
02305 PAVGB" %%mm3, %%mm2 \n\t"
02306 "punpcklbw %%mm7, %%mm4 \n\t"
02307 "punpcklbw %%mm7, %%mm2 \n\t"
02308 #else
02309 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
02310 "movd 12(%1, %%"REG_d"), %%mm1 \n\t"
02311 "movd 15(%0, %%"REG_d"), %%mm2 \n\t"
02312 "movd 15(%1, %%"REG_d"), %%mm3 \n\t"
02313 "punpcklbw %%mm7, %%mm4 \n\t"
02314 "punpcklbw %%mm7, %%mm1 \n\t"
02315 "punpcklbw %%mm7, %%mm2 \n\t"
02316 "punpcklbw %%mm7, %%mm3 \n\t"
02317 "paddw %%mm1, %%mm4 \n\t"
02318 "paddw %%mm3, %%mm2 \n\t"
02319 "paddw %%mm2, %%mm4 \n\t"
02320 "movd 18(%0, %%"REG_d"), %%mm5 \n\t"
02321 "movd 18(%1, %%"REG_d"), %%mm1 \n\t"
02322 "movd 21(%0, %%"REG_d"), %%mm2 \n\t"
02323 "movd 21(%1, %%"REG_d"), %%mm3 \n\t"
02324 "punpcklbw %%mm7, %%mm5 \n\t"
02325 "punpcklbw %%mm7, %%mm1 \n\t"
02326 "punpcklbw %%mm7, %%mm2 \n\t"
02327 "punpcklbw %%mm7, %%mm3 \n\t"
02328 "paddw %%mm1, %%mm5 \n\t"
02329 "paddw %%mm3, %%mm2 \n\t"
02330 "paddw %%mm5, %%mm2 \n\t"
02331 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
02332 "psrlw $2, %%mm4 \n\t"
02333 "psrlw $2, %%mm2 \n\t"
02334 #endif
02335 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
02336 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
02337
02338 "pmaddwd %%mm4, %%mm1 \n\t"
02339 "pmaddwd %%mm2, %%mm3 \n\t"
02340 "pmaddwd %%mm6, %%mm4 \n\t"
02341 "pmaddwd %%mm6, %%mm2 \n\t"
02342 #ifndef FAST_BGR2YV12
02343 "psrad $8, %%mm4 \n\t"
02344 "psrad $8, %%mm1 \n\t"
02345 "psrad $8, %%mm2 \n\t"
02346 "psrad $8, %%mm3 \n\t"
02347 #endif
02348 "packssdw %%mm2, %%mm4 \n\t"
02349 "packssdw %%mm3, %%mm1 \n\t"
02350 "pmaddwd %%mm5, %%mm4 \n\t"
02351 "pmaddwd %%mm5, %%mm1 \n\t"
02352 "add $24, %%"REG_d" \n\t"
02353 "packssdw %%mm1, %%mm4 \n\t"
02354 "psraw $7, %%mm4 \n\t"
02355
02356 "movq %%mm0, %%mm1 \n\t"
02357 "punpckldq %%mm4, %%mm0 \n\t"
02358 "punpckhdq %%mm4, %%mm1 \n\t"
02359 "packsswb %%mm1, %%mm0 \n\t"
02360 "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t"
02361 "movd %%mm0, (%2, %%"REG_a") \n\t"
02362 "punpckhdq %%mm0, %%mm0 \n\t"
02363 "movd %%mm0, (%3, %%"REG_a") \n\t"
02364 "add $4, %%"REG_a" \n\t"
02365 " js 1b \n\t"
02366 : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
02367 : "%"REG_a, "%"REG_d
02368 );
02369
02370 udst += chromStride;
02371 vdst += chromStride;
02372 src += srcStride*2;
02373 }
02374
02375 __asm__ volatile( EMMS" \n\t"
02376 SFENCE" \n\t"
02377 :::"memory");
02378 #else
02379 y=0;
02380 #endif
02381 for (; y<height; y+=2)
02382 {
02383 long i;
02384 for (i=0; i<chromWidth; i++)
02385 {
02386 unsigned int b = src[6*i+0];
02387 unsigned int g = src[6*i+1];
02388 unsigned int r = src[6*i+2];
02389
02390 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02391 unsigned int V = ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
02392 unsigned int U = ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
02393
02394 udst[i] = U;
02395 vdst[i] = V;
02396 ydst[2*i] = Y;
02397
02398 b = src[6*i+3];
02399 g = src[6*i+4];
02400 r = src[6*i+5];
02401
02402 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02403 ydst[2*i+1] = Y;
02404 }
02405 ydst += lumStride;
02406 src += srcStride;
02407
02408 for (i=0; i<chromWidth; i++)
02409 {
02410 unsigned int b = src[6*i+0];
02411 unsigned int g = src[6*i+1];
02412 unsigned int r = src[6*i+2];
02413
02414 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02415
02416 ydst[2*i] = Y;
02417
02418 b = src[6*i+3];
02419 g = src[6*i+4];
02420 r = src[6*i+5];
02421
02422 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02423 ydst[2*i+1] = Y;
02424 }
02425 udst += chromStride;
02426 vdst += chromStride;
02427 ydst += lumStride;
02428 src += srcStride;
02429 }
02430 }
02431
02432 static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest,
02433 long width, long height, long src1Stride,
02434 long src2Stride, long dstStride){
02435 long h;
02436
02437 for (h=0; h < height; h++)
02438 {
02439 long w;
02440
02441 #if HAVE_MMX
02442 #if HAVE_SSE2
02443 __asm__(
02444 "xor %%"REG_a", %%"REG_a" \n\t"
02445 "1: \n\t"
02446 PREFETCH" 64(%1, %%"REG_a") \n\t"
02447 PREFETCH" 64(%2, %%"REG_a") \n\t"
02448 "movdqa (%1, %%"REG_a"), %%xmm0 \n\t"
02449 "movdqa (%1, %%"REG_a"), %%xmm1 \n\t"
02450 "movdqa (%2, %%"REG_a"), %%xmm2 \n\t"
02451 "punpcklbw %%xmm2, %%xmm0 \n\t"
02452 "punpckhbw %%xmm2, %%xmm1 \n\t"
02453 "movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t"
02454 "movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t"
02455 "add $16, %%"REG_a" \n\t"
02456 "cmp %3, %%"REG_a" \n\t"
02457 " jb 1b \n\t"
02458 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
02459 : "memory", "%"REG_a""
02460 );
02461 #else
02462 __asm__(
02463 "xor %%"REG_a", %%"REG_a" \n\t"
02464 "1: \n\t"
02465 PREFETCH" 64(%1, %%"REG_a") \n\t"
02466 PREFETCH" 64(%2, %%"REG_a") \n\t"
02467 "movq (%1, %%"REG_a"), %%mm0 \n\t"
02468 "movq 8(%1, %%"REG_a"), %%mm2 \n\t"
02469 "movq %%mm0, %%mm1 \n\t"
02470 "movq %%mm2, %%mm3 \n\t"
02471 "movq (%2, %%"REG_a"), %%mm4 \n\t"
02472 "movq 8(%2, %%"REG_a"), %%mm5 \n\t"
02473 "punpcklbw %%mm4, %%mm0 \n\t"
02474 "punpckhbw %%mm4, %%mm1 \n\t"
02475 "punpcklbw %%mm5, %%mm2 \n\t"
02476 "punpckhbw %%mm5, %%mm3 \n\t"
02477 MOVNTQ" %%mm0, (%0, %%"REG_a", 2) \n\t"
02478 MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2) \n\t"
02479 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t"
02480 MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t"
02481 "add $16, %%"REG_a" \n\t"
02482 "cmp %3, %%"REG_a" \n\t"
02483 " jb 1b \n\t"
02484 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
02485 : "memory", "%"REG_a
02486 );
02487 #endif
02488 for (w= (width&(~15)); w < width; w++)
02489 {
02490 dest[2*w+0] = src1[w];
02491 dest[2*w+1] = src2[w];
02492 }
02493 #else
02494 for (w=0; w < width; w++)
02495 {
02496 dest[2*w+0] = src1[w];
02497 dest[2*w+1] = src2[w];
02498 }
02499 #endif
02500 dest += dstStride;
02501 src1 += src1Stride;
02502 src2 += src2Stride;
02503 }
02504 #if HAVE_MMX
02505 __asm__(
02506 EMMS" \n\t"
02507 SFENCE" \n\t"
02508 ::: "memory"
02509 );
02510 #endif
02511 }
02512
02513 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
02514 uint8_t *dst1, uint8_t *dst2,
02515 long width, long height,
02516 long srcStride1, long srcStride2,
02517 long dstStride1, long dstStride2)
02518 {
02519 long y,x,w,h;
02520 w=width/2; h=height/2;
02521 #if HAVE_MMX
02522 __asm__ volatile(
02523 PREFETCH" %0 \n\t"
02524 PREFETCH" %1 \n\t"
02525 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
02526 #endif
02527 for (y=0;y<h;y++){
02528 const uint8_t* s1=src1+srcStride1*(y>>1);
02529 uint8_t* d=dst1+dstStride1*y;
02530 x=0;
02531 #if HAVE_MMX
02532 for (;x<w-31;x+=32)
02533 {
02534 __asm__ volatile(
02535 PREFETCH" 32%1 \n\t"
02536 "movq %1, %%mm0 \n\t"
02537 "movq 8%1, %%mm2 \n\t"
02538 "movq 16%1, %%mm4 \n\t"
02539 "movq 24%1, %%mm6 \n\t"
02540 "movq %%mm0, %%mm1 \n\t"
02541 "movq %%mm2, %%mm3 \n\t"
02542 "movq %%mm4, %%mm5 \n\t"
02543 "movq %%mm6, %%mm7 \n\t"
02544 "punpcklbw %%mm0, %%mm0 \n\t"
02545 "punpckhbw %%mm1, %%mm1 \n\t"
02546 "punpcklbw %%mm2, %%mm2 \n\t"
02547 "punpckhbw %%mm3, %%mm3 \n\t"
02548 "punpcklbw %%mm4, %%mm4 \n\t"
02549 "punpckhbw %%mm5, %%mm5 \n\t"
02550 "punpcklbw %%mm6, %%mm6 \n\t"
02551 "punpckhbw %%mm7, %%mm7 \n\t"
02552 MOVNTQ" %%mm0, %0 \n\t"
02553 MOVNTQ" %%mm1, 8%0 \n\t"
02554 MOVNTQ" %%mm2, 16%0 \n\t"
02555 MOVNTQ" %%mm3, 24%0 \n\t"
02556 MOVNTQ" %%mm4, 32%0 \n\t"
02557 MOVNTQ" %%mm5, 40%0 \n\t"
02558 MOVNTQ" %%mm6, 48%0 \n\t"
02559 MOVNTQ" %%mm7, 56%0"
02560 :"=m"(d[2*x])
02561 :"m"(s1[x])
02562 :"memory");
02563 }
02564 #endif
02565 for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
02566 }
02567 for (y=0;y<h;y++){
02568 const uint8_t* s2=src2+srcStride2*(y>>1);
02569 uint8_t* d=dst2+dstStride2*y;
02570 x=0;
02571 #if HAVE_MMX
02572 for (;x<w-31;x+=32)
02573 {
02574 __asm__ volatile(
02575 PREFETCH" 32%1 \n\t"
02576 "movq %1, %%mm0 \n\t"
02577 "movq 8%1, %%mm2 \n\t"
02578 "movq 16%1, %%mm4 \n\t"
02579 "movq 24%1, %%mm6 \n\t"
02580 "movq %%mm0, %%mm1 \n\t"
02581 "movq %%mm2, %%mm3 \n\t"
02582 "movq %%mm4, %%mm5 \n\t"
02583 "movq %%mm6, %%mm7 \n\t"
02584 "punpcklbw %%mm0, %%mm0 \n\t"
02585 "punpckhbw %%mm1, %%mm1 \n\t"
02586 "punpcklbw %%mm2, %%mm2 \n\t"
02587 "punpckhbw %%mm3, %%mm3 \n\t"
02588 "punpcklbw %%mm4, %%mm4 \n\t"
02589 "punpckhbw %%mm5, %%mm5 \n\t"
02590 "punpcklbw %%mm6, %%mm6 \n\t"
02591 "punpckhbw %%mm7, %%mm7 \n\t"
02592 MOVNTQ" %%mm0, %0 \n\t"
02593 MOVNTQ" %%mm1, 8%0 \n\t"
02594 MOVNTQ" %%mm2, 16%0 \n\t"
02595 MOVNTQ" %%mm3, 24%0 \n\t"
02596 MOVNTQ" %%mm4, 32%0 \n\t"
02597 MOVNTQ" %%mm5, 40%0 \n\t"
02598 MOVNTQ" %%mm6, 48%0 \n\t"
02599 MOVNTQ" %%mm7, 56%0"
02600 :"=m"(d[2*x])
02601 :"m"(s2[x])
02602 :"memory");
02603 }
02604 #endif
02605 for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
02606 }
02607 #if HAVE_MMX
02608 __asm__(
02609 EMMS" \n\t"
02610 SFENCE" \n\t"
02611 ::: "memory"
02612 );
02613 #endif
02614 }
02615
02616 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
02617 uint8_t *dst,
02618 long width, long height,
02619 long srcStride1, long srcStride2,
02620 long srcStride3, long dstStride)
02621 {
02622 long y,x,w,h;
02623 w=width/2; h=height;
02624 for (y=0;y<h;y++){
02625 const uint8_t* yp=src1+srcStride1*y;
02626 const uint8_t* up=src2+srcStride2*(y>>2);
02627 const uint8_t* vp=src3+srcStride3*(y>>2);
02628 uint8_t* d=dst+dstStride*y;
02629 x=0;
02630 #if HAVE_MMX
02631 for (;x<w-7;x+=8)
02632 {
02633 __asm__ volatile(
02634 PREFETCH" 32(%1, %0) \n\t"
02635 PREFETCH" 32(%2, %0) \n\t"
02636 PREFETCH" 32(%3, %0) \n\t"
02637 "movq (%1, %0, 4), %%mm0 \n\t"
02638 "movq (%2, %0), %%mm1 \n\t"
02639 "movq (%3, %0), %%mm2 \n\t"
02640 "movq %%mm0, %%mm3 \n\t"
02641 "movq %%mm1, %%mm4 \n\t"
02642 "movq %%mm2, %%mm5 \n\t"
02643 "punpcklbw %%mm1, %%mm1 \n\t"
02644 "punpcklbw %%mm2, %%mm2 \n\t"
02645 "punpckhbw %%mm4, %%mm4 \n\t"
02646 "punpckhbw %%mm5, %%mm5 \n\t"
02647
02648 "movq %%mm1, %%mm6 \n\t"
02649 "punpcklbw %%mm2, %%mm1 \n\t"
02650 "punpcklbw %%mm1, %%mm0 \n\t"
02651 "punpckhbw %%mm1, %%mm3 \n\t"
02652 MOVNTQ" %%mm0, (%4, %0, 8) \n\t"
02653 MOVNTQ" %%mm3, 8(%4, %0, 8) \n\t"
02654
02655 "punpckhbw %%mm2, %%mm6 \n\t"
02656 "movq 8(%1, %0, 4), %%mm0 \n\t"
02657 "movq %%mm0, %%mm3 \n\t"
02658 "punpcklbw %%mm6, %%mm0 \n\t"
02659 "punpckhbw %%mm6, %%mm3 \n\t"
02660 MOVNTQ" %%mm0, 16(%4, %0, 8) \n\t"
02661 MOVNTQ" %%mm3, 24(%4, %0, 8) \n\t"
02662
02663 "movq %%mm4, %%mm6 \n\t"
02664 "movq 16(%1, %0, 4), %%mm0 \n\t"
02665 "movq %%mm0, %%mm3 \n\t"
02666 "punpcklbw %%mm5, %%mm4 \n\t"
02667 "punpcklbw %%mm4, %%mm0 \n\t"
02668 "punpckhbw %%mm4, %%mm3 \n\t"
02669 MOVNTQ" %%mm0, 32(%4, %0, 8) \n\t"
02670 MOVNTQ" %%mm3, 40(%4, %0, 8) \n\t"
02671
02672 "punpckhbw %%mm5, %%mm6 \n\t"
02673 "movq 24(%1, %0, 4), %%mm0 \n\t"
02674 "movq %%mm0, %%mm3 \n\t"
02675 "punpcklbw %%mm6, %%mm0 \n\t"
02676 "punpckhbw %%mm6, %%mm3 \n\t"
02677 MOVNTQ" %%mm0, 48(%4, %0, 8) \n\t"
02678 MOVNTQ" %%mm3, 56(%4, %0, 8) \n\t"
02679
02680 : "+r" (x)
02681 : "r"(yp), "r" (up), "r"(vp), "r"(d)
02682 :"memory");
02683 }
02684 #endif
02685 for (; x<w; x++)
02686 {
02687 const long x2 = x<<2;
02688 d[8*x+0] = yp[x2];
02689 d[8*x+1] = up[x];
02690 d[8*x+2] = yp[x2+1];
02691 d[8*x+3] = vp[x];
02692 d[8*x+4] = yp[x2+2];
02693 d[8*x+5] = up[x];
02694 d[8*x+6] = yp[x2+3];
02695 d[8*x+7] = vp[x];
02696 }
02697 }
02698 #if HAVE_MMX
02699 __asm__(
02700 EMMS" \n\t"
02701 SFENCE" \n\t"
02702 ::: "memory"
02703 );
02704 #endif
02705 }
02706
02707 static inline void RENAME(rgb2rgb_init)(void){
02708 rgb15to16 = RENAME(rgb15to16);
02709 rgb15tobgr24 = RENAME(rgb15tobgr24);
02710 rgb15to32 = RENAME(rgb15to32);
02711 rgb16tobgr24 = RENAME(rgb16tobgr24);
02712 rgb16to32 = RENAME(rgb16to32);
02713 rgb16to15 = RENAME(rgb16to15);
02714 rgb24tobgr16 = RENAME(rgb24tobgr16);
02715 rgb24tobgr15 = RENAME(rgb24tobgr15);
02716 rgb24tobgr32 = RENAME(rgb24tobgr32);
02717 rgb32to16 = RENAME(rgb32to16);
02718 rgb32to15 = RENAME(rgb32to15);
02719 rgb32tobgr24 = RENAME(rgb32tobgr24);
02720 rgb24to15 = RENAME(rgb24to15);
02721 rgb24to16 = RENAME(rgb24to16);
02722 rgb24tobgr24 = RENAME(rgb24tobgr24);
02723 rgb32tobgr32 = RENAME(rgb32tobgr32);
02724 rgb32tobgr16 = RENAME(rgb32tobgr16);
02725 rgb32tobgr15 = RENAME(rgb32tobgr15);
02726 yv12toyuy2 = RENAME(yv12toyuy2);
02727 yv12touyvy = RENAME(yv12touyvy);
02728 yuv422ptoyuy2 = RENAME(yuv422ptoyuy2);
02729 yuv422ptouyvy = RENAME(yuv422ptouyvy);
02730 yuy2toyv12 = RENAME(yuy2toyv12);
02731
02732
02733 planar2x = RENAME(planar2x);
02734 rgb24toyv12 = RENAME(rgb24toyv12);
02735 interleaveBytes = RENAME(interleaveBytes);
02736 vu9_to_vu12 = RENAME(vu9_to_vu12);
02737 yvu9_to_yuy2 = RENAME(yvu9_to_yuy2);
02738 }