27 ptrdiff_t line_size,
int h);
29 static inline uint64_t
avg2(uint64_t
a, uint64_t
b)
34 static inline uint64_t
avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
36 uint64_t r1 = ((l1 & ~
BYTE_VEC(0x03)) >> 2)
40 uint64_t r2 = (( (l1 &
BYTE_VEC(0x03))
49 ptrdiff_t line_size,
int h)
53 if ((
size_t) pix2 & 0x7) {
82 ptrdiff_t line_size,
int h)
85 uint64_t disalign = (size_t) pix2 & 0x7;
90 uint64_t p1_l, p1_r, p2_l, p2_r;
97 p2_l =
avg2(l, (l >> 8) | ((uint64_t)
r << 56));
98 p2_r =
avg2(
r, (
r >> 8) | ((uint64_t) pix2[16] << 56));
112 uint64_t p1_l, p1_r, p2_l, p2_r;
116 p1_r =
ldq(pix1 + 8);
131 uint64_t disalign1 = disalign + 1;
132 uint64_t p1_l, p1_r, p2_l, p2_r;
136 p1_r =
ldq(pix1 + 8);
156 ptrdiff_t line_size,
int h)
160 if ((
size_t) pix2 & 0x7) {
161 uint64_t t, p2_l, p2_r;
167 uint64_t p1_l, p1_r, np2_l, np2_r;
171 p1_r =
ldq(pix1 + 8);
188 p2_r =
ldq(pix2 + 8);
190 uint64_t p1_l, p1_r, np2_l, np2_r;
193 p1_r =
ldq(pix1 + 8);
196 np2_r =
ldq(pix2 + 8);
210 ptrdiff_t line_size,
int h)
215 uint64_t p2_l, p2_r, p2_x;
218 p1_r =
ldq(pix1 + 8);
220 if ((
size_t) pix2 & 0x7) {
222 p2_r =
uldq(pix2 + 8);
223 p2_x = (uint64_t) pix2[16] << 56;
226 p2_r =
ldq(pix2 + 8);
227 p2_x =
ldq(pix2 + 16) << 56;
231 uint64_t np1_l, np1_r;
232 uint64_t np2_l, np2_r, np2_x;
238 np1_r =
ldq(pix1 + 8);
240 if ((
size_t) pix2 & 0x7) {
242 np2_r =
uldq(pix2 + 8);
243 np2_x = (uint64_t) pix2[16] << 56;
246 np2_r =
ldq(pix2 + 8);
247 np2_x =
ldq(pix2 + 16) << 56;
251 avg4( p2_l, ( p2_l >> 8) | ((uint64_t) p2_r << 56),
252 np2_l, (np2_l >> 8) | ((uint64_t) np2_r << 56)))
254 avg4( p2_r, ( p2_r >> 8) | ((uint64_t) p2_x),
255 np2_r, (np2_r >> 8) | ((uint64_t) np2_x)));