26 uint8_t *restrict pixels,
31 ptrdiff_t stride_2x =
stride << 1;
32 ptrdiff_t stride_4x =
stride << 2;
33 ptrdiff_t stride_3x = stride_2x +
stride;
39 __lasx_xvstelm_d(temp0, pixels, 0, 0);
40 __lasx_xvstelm_d(temp0, pixels +
stride, 0, 2);
41 __lasx_xvstelm_d(temp0, pixels + stride_2x, 0, 1);
42 __lasx_xvstelm_d(temp0, pixels + stride_3x, 0, 3);
44 __lasx_xvstelm_d(temp1, pixels, 0, 0);
45 __lasx_xvstelm_d(temp1, pixels +
stride, 0, 2);
46 __lasx_xvstelm_d(temp1, pixels + stride_2x, 0, 1);
47 __lasx_xvstelm_d(temp1, pixels + stride_3x, 0, 3);
51 uint8_t *restrict pixels,
56 __m256i const_128 = {0x0080008000800080, 0x0080008000800080,
57 0x0080008000800080, 0x0080008000800080};
58 ptrdiff_t stride_2x =
stride << 1;
59 ptrdiff_t stride_4x =
stride << 2;
60 ptrdiff_t stride_3x = stride_2x +
stride;
68 __lasx_xvstelm_d(temp0, pixels, 0, 0);
69 __lasx_xvstelm_d(temp0, pixels +
stride, 0, 2);
70 __lasx_xvstelm_d(temp0, pixels + stride_2x, 0, 1);
71 __lasx_xvstelm_d(temp0, pixels + stride_3x, 0, 3);
73 __lasx_xvstelm_d(temp1, pixels, 0, 0);
74 __lasx_xvstelm_d(temp1, pixels +
stride, 0, 2);
75 __lasx_xvstelm_d(temp1, pixels + stride_2x, 0, 1);
76 __lasx_xvstelm_d(temp1, pixels + stride_3x, 0, 3);
80 uint8_t *restrict pixels,
84 __m256i p0, p1, p2, p3, p4, p5, p6, p7;
85 __m256i temp0, temp1, temp2, temp3;
86 uint8_t *pix = pixels;
87 ptrdiff_t stride_2x =
stride << 1;
88 ptrdiff_t stride_4x =
stride << 2;
89 ptrdiff_t stride_3x = stride_2x +
stride;
93 p0 = __lasx_xvldrepl_d(pix, 0);
95 p1 = __lasx_xvldrepl_d(pix, 0);
97 p2 = __lasx_xvldrepl_d(pix, 0);
99 p3 = __lasx_xvldrepl_d(pix, 0);
101 p4 = __lasx_xvldrepl_d(pix, 0);
103 p5 = __lasx_xvldrepl_d(pix, 0);
105 p6 = __lasx_xvldrepl_d(pix, 0);
107 p7 = __lasx_xvldrepl_d(pix, 0);
108 DUP4_ARG3(__lasx_xvpermi_q, p1, p0, 0x20, p3, p2, 0x20, p5, p4, 0x20,
109 p7, p6, 0x20, temp0, temp1, temp2, temp3);
111 temp0, temp1, temp2, temp3);
112 DUP4_ARG1(__lasx_xvclip255_h, temp0, temp1, temp2, temp3,
113 temp0, temp1, temp2, temp3);
114 DUP2_ARG2(__lasx_xvpickev_b, temp1, temp0, temp3, temp2, temp0, temp1);
115 __lasx_xvstelm_d(temp0, pixels, 0, 0);
116 __lasx_xvstelm_d(temp0, pixels +
stride, 0, 2);
117 __lasx_xvstelm_d(temp0, pixels + stride_2x, 0, 1);
118 __lasx_xvstelm_d(temp0, pixels + stride_3x, 0, 3);
120 __lasx_xvstelm_d(temp1, pixels, 0, 0);
121 __lasx_xvstelm_d(temp1, pixels +
stride, 0, 2);
122 __lasx_xvstelm_d(temp1, pixels + stride_2x, 0, 1);
123 __lasx_xvstelm_d(temp1, pixels + stride_3x, 0, 3);