27 const float *src1,
int len)
30 vector
float d,
s0,
s1, h0, l0,
31 s2,
s3,
zero = (vector float)vec_splat_u32(0);
33 for(i=0; i<len-7; i+=8) {
34 s1 = vec_ld(0, src1-i);
35 s0 = vec_ld(0, src0+i);
36 l0 = vec_mergel(s1, s1);
37 s3 = vec_ld(-16, src1-i);
38 h0 = vec_mergeh(s1, s1);
39 s2 = vec_ld(16, src0+i);
40 s1 = vec_mergeh(vec_mergel(l0,h0),
43 l0 = vec_mergel(s3, s3);
44 d = vec_madd(s0, s1, zero);
45 h0 = vec_mergeh(s3, s3);
47 s3 = vec_mergeh(vec_mergel(l0,h0),
49 d = vec_madd(s2, s3, zero);
55 const float *src1,
const float *src2,
60 vector
unsigned char align = vec_lvsr(0,dst),
61 mask = vec_lvsl(0, dst);
63 for (i=0; i<len-3; i+=4) {
64 t0 = vec_ld(0, dst+i);
65 t1 = vec_ld(15, dst+i);
66 s0 = vec_ld(0, src0+i);
67 s1 = vec_ld(0, src1+i);
68 s2 = vec_ld(0, src2+i);
69 edges = vec_perm(t1 ,t0,
mask);
70 d = vec_madd(s0,s1,s2);
71 t1 = vec_perm(d, edges, align);
72 t0 = vec_perm(edges, d, align);
73 vec_st(t1, 15, dst+i);
81 const vector
unsigned char reverse =
vcprm(3,2,1,0);
88 zero = (vector float)vec_splat_u32(0);
90 for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) {
96 s1 = vec_perm(s1, s1, reverse);
97 wj = vec_perm(wj, wj, reverse);
99 t0 = vec_madd(s0, wj, zero);
100 t0 = vec_nmsub(s1, wi, t0);
101 t1 = vec_madd(s0, wi, zero);
102 t1 = vec_madd(s1, wj, t1);
103 t1 = vec_perm(t1, t1, reverse);