35 static void get_pixels_altivec(int16_t *restrict
block,
const uint8_t *pixels,
39 vector
unsigned char perm =
40 (vector
unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
41 0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
42 const vector
unsigned char zero =
43 (
const vector
unsigned char) vec_splat_u8(0);
45 for (
i = 0;
i < 8;
i++) {
49 vector
unsigned char bytes = vec_vsx_ld(0, pixels);
53 vector
signed short shorts = (vector
signed short) vec_perm(bytes,
zero,
perm);
56 vec_vsx_st(shorts,
i * 16, (vector
signed short *)
block);
62 static void get_pixels_altivec(int16_t *restrict
block,
const uint8_t *pixels,
68 for (
i = 0;
i < 8;
i++) {
73 vec_u8 pixl = vec_ld(0, pixels);
74 vec_u8 pixr = vec_ld(7, pixels);
90 static void diff_pixels_altivec(int16_t *restrict
block,
const uint8_t *s1,
91 const uint8_t *s2, ptrdiff_t
stride)
94 const vector
unsigned char zero =
95 (
const vector
unsigned char) vec_splat_u8(0);
96 vector
signed short shorts1, shorts2;
98 for (
i = 0;
i < 4;
i++) {
102 vector
unsigned char bytes = vec_vsx_ld(0, s1);
105 shorts1 = (vector
signed short) vec_mergeh(bytes,
zero);
108 bytes =vec_vsx_ld(0, s2);
111 shorts2 = (vector
signed short) vec_mergeh(bytes,
zero);
114 shorts1 = vec_sub(shorts1, shorts2);
117 vec_vsx_st(shorts1, 0, (vector
signed short *)
block);
129 bytes = vec_vsx_ld(0, s1);
132 shorts1 = (vector
signed short) vec_mergeh(bytes,
zero);
135 bytes = vec_vsx_ld(0, s2);
138 shorts2 = (vector
signed short) vec_mergeh(bytes,
zero);
141 shorts1 = vec_sub(shorts1, shorts2);
144 vec_vsx_st(shorts1, 0, (vector
signed short *)
block);
152 static void diff_pixels_altivec(int16_t *restrict
block,
const uint8_t *s1,
153 const uint8_t *s2, ptrdiff_t
stride)
160 for (
i = 0;
i < 4;
i++) {
164 perm = vec_lvsl(0, s1);
165 vec_u8 pixl = vec_ld(0, s1);
166 vec_u8 pixr = vec_ld(15, s1);
173 perm = vec_lvsl(0, s2);
174 pixl = vec_ld(0, s2);
175 pixr = vec_ld(15, s2);
176 bytes = vec_perm(pixl, pixr,
perm);
182 shorts1 = vec_sub(shorts1, shorts2);
197 perm = vec_lvsl(0, s1);
198 pixl = vec_ld(0, s1);
199 pixr = vec_ld(15, s1);
200 bytes = vec_perm(pixl, pixr,
perm);
206 perm = vec_lvsl(0, s2);
207 pixl = vec_ld(0, s2);
208 pixr = vec_ld(15, s2);
209 bytes = vec_perm(pixl, pixr,
perm);
215 shorts1 = vec_sub(shorts1, shorts2);
231 static void get_pixels_vsx(int16_t *restrict
block,
const uint8_t *pixels,
235 for (
i = 0;
i < 8;
i++) {
236 vec_s16 shorts = vsx_ld_u8_s16(0, pixels);
238 vec_vsx_st(shorts,
i * 16,
block);
244 static void diff_pixels_vsx(int16_t *restrict
block,
const uint8_t *s1,
245 const uint8_t *s2, ptrdiff_t
stride)
249 for (
i = 0;
i < 8;
i++) {
250 shorts1 = vsx_ld_u8_s16(0, s1);
251 shorts2 = vsx_ld_u8_s16(0, s2);
253 shorts1 = vec_sub(shorts1, shorts2);
255 vec_vsx_st(shorts1, 0,
block);
265 unsigned high_bit_depth)
271 c->diff_pixels = diff_pixels_altivec;
273 if (!high_bit_depth) {
274 c->get_pixels = get_pixels_altivec;
282 c->diff_pixels = diff_pixels_vsx;
285 c->get_pixels = get_pixels_vsx;