64 #define G2(m)  ((nz >  2) ? (m) : 0) 
   65 #define G4(m)  ((nz >  4) ? (m) : 0) 
   66 #define G8(m)  ((nz >  8) ? (m) : 0) 
   67 #define G16(m) ((nz > 16) ? (m) : 0) 
   78     const int x0 = coeffs[0 * 
stride], x1 = coeffs[1 * 
stride];
 
   80     coeffs[0 * 
stride] = 
a * (x0 + x1);
 
   81     coeffs[1 * 
stride] = 
a * (x0 - x1);
 
   94     const int a = 64, 
b = 83, 
c = 36;
 
   95     const int x0 = coeffs[0 * 
stride], x1 = coeffs[1 * 
stride];
 
   96     const int x2 = coeffs[2 * 
stride], x3 = coeffs[3 * 
stride];
 
  102         b * x1 + 
G2(+
c * x3),
 
  103         c * x1 + 
G2(-
b * x3),
 
  106     coeffs[0 * 
stride] = 
E[0] + O[0];
 
  107     coeffs[1 * 
stride] = 
E[1] + O[1];
 
  108     coeffs[2 * 
stride] = 
E[1] - O[1];
 
  109     coeffs[3 * 
stride] = 
E[0] - O[0];
 
  126     const int a = 64, 
b = 83, 
c = 36, d = 89, e = 75, 
f = 50, 
g = 18;
 
  127     const int x0 = coeffs[0 * 
stride], x1 = coeffs[1 * 
stride];
 
  128     const int x2 = coeffs[2 * 
stride], x3 = coeffs[3 * 
stride];
 
  129     const int x4 = coeffs[4 * 
stride], x5 = coeffs[5 * 
stride];
 
  130     const int x6 = coeffs[6 * 
stride], x7 = coeffs[7 * 
stride];
 
  140         EE[0] + EO[0], EE[1] + EO[1],
 
  141         EE[1] - EO[1], EE[0] - EO[0],
 
  144         d * x1 + 
G2(+e * x3) + 
G4(+
f * x5 + 
g * x7),
 
  145         e * x1 + 
G2(-
g * x3) + 
G4(-d * x5 - 
f * x7),
 
  146         f * x1 + 
G2(-d * x3) + 
G4(+
g * x5 + e * x7),
 
  147         g * x1 + 
G2(-
f * x3) + 
G4(+e * x5 - d * x7),
 
  150     coeffs[0 * 
stride] = 
E[0] + O[0];
 
  151     coeffs[1 * 
stride] = 
E[1] + O[1];
 
  152     coeffs[2 * 
stride] = 
E[2] + O[2];
 
  153     coeffs[3 * 
stride] = 
E[3] + O[3];
 
  154     coeffs[4 * 
stride] = 
E[3] - O[3];
 
  155     coeffs[5 * 
stride] = 
E[2] - O[2];
 
  156     coeffs[6 * 
stride] = 
E[1] - O[1];
 
  157     coeffs[7 * 
stride] = 
E[0] - O[0];
 
  182     const int a = 64, 
b = 83, 
c = 36, d = 89, e = 75, 
f = 50, 
g = 18, 
h = 90;
 
  183     const int i = 87, j = 80, k = 70, l = 57, m = 43, n = 25, o =  9;
 
  184     const int x0  = coeffs[0  * 
stride], x1  = coeffs[1  * 
stride];
 
  185     const int x2  = coeffs[2  * 
stride], x3  = coeffs[3  * 
stride];
 
  186     const int x4  = coeffs[4  * 
stride], x5  = coeffs[5  * 
stride];
 
  187     const int x6  = coeffs[6  * 
stride], x7  = coeffs[7  * 
stride];
 
  188     const int x8  = coeffs[8  * 
stride], x9  = coeffs[9  * 
stride];
 
  189     const int x10 = coeffs[10 * 
stride], x11 = coeffs[11 * 
stride];
 
  190     const int x12 = coeffs[12 * 
stride], x13 = coeffs[13 * 
stride];
 
  191     const int x14 = coeffs[14 * 
stride], x15 = coeffs[15 * 
stride];
 
  197         G4(
b * x4) + 
G8(+
c * x12),
 
  198         G4(
c * x4) + 
G8(-
b * x12),
 
  201         EEE[0] + EEO[0], EEE[1] + EEO[1],
 
  202         EEE[1] - EEO[1], EEE[0] - EEO[0],
 
  205         G2(d * x2)  + 
G4(+e * x6) + 
G8(+
f * x10 + 
g * x14),
 
  206         G2(e * x2)  + 
G4(-
g * x6) + 
G8(-d * x10 - 
f * x14),
 
  207         G2(
f * x2)  + 
G4(-d * x6) + 
G8(+
g * x10 + e * x14),
 
  208         G2(
g * x2)  + 
G4(-
f * x6) + 
G8(+e * x10 - d * x14),
 
  211         EE[0] + EO[0], EE[1] + EO[1], EE[2] + EO[2], EE[3] + EO[3],
 
  212         EE[3] - EO[3], EE[2] - EO[2], EE[1] - EO[1], EE[0] - EO[0],
 
  215         h * x1 + 
G2(+
i * x3) + 
G4(+j * x5 + k * x7) + 
G8(+l * x9 + m * x11 + n * x13 + o * x15),
 
  216         i * x1 + 
G2(+l * x3) + 
G4(+o * x5 - m * x7) + 
G8(-j * x9 - 
h * x11 - k * x13 - n * x15),
 
  217         j * x1 + 
G2(+o * x3) + 
G4(-k * x5 - 
i * x7) + 
G8(-n * x9 + l * x11 + 
h * x13 + m * x15),
 
  218         k * x1 + 
G2(-m * x3) + 
G4(-
i * x5 + o * x7) + 
G8(+
h * x9 + n * x11 - j * x13 - l * x15),
 
  219         l * x1 + 
G2(-j * x3) + 
G4(-n * x5 + 
h * x7) + 
G8(-o * x9 - 
i * x11 + m * x13 + k * x15),
 
  220         m * x1 + 
G2(-
h * x3) + 
G4(+l * x5 + n * x7) + 
G8(-
i * x9 + k * x11 + o * x13 - j * x15),
 
  221         n * x1 + 
G2(-k * x3) + 
G4(+
h * x5 - j * x7) + 
G8(+m * x9 + o * x11 - l * x13 + 
i * x15),
 
  222         o * x1 + 
G2(-n * x3) + 
G4(+m * x5 - l * x7) + 
G8(+k * x9 - j * x11 + 
i * x13 - 
h * x15),
 
  225     coeffs[0  * 
stride] = 
E[0] + O[0];
 
  226     coeffs[1  * 
stride] = 
E[1] + O[1];
 
  227     coeffs[2  * 
stride] = 
E[2] + O[2];
 
  228     coeffs[3  * 
stride] = 
E[3] + O[3];
 
  229     coeffs[4  * 
stride] = 
E[4] + O[4];
 
  230     coeffs[5  * 
stride] = 
E[5] + O[5];
 
  231     coeffs[6  * 
stride] = 
E[6] + O[6];
 
  232     coeffs[7  * 
stride] = 
E[7] + O[7];
 
  233     coeffs[8  * 
stride] = 
E[7] - O[7];
 
  234     coeffs[9  * 
stride] = 
E[6] - O[6];
 
  235     coeffs[10 * 
stride] = 
E[5] - O[5];
 
  236     coeffs[11 * 
stride] = 
E[4] - O[4];
 
  237     coeffs[12 * 
stride] = 
E[3] - O[3];
 
  238     coeffs[13 * 
stride] = 
E[2] - O[2];
 
  239     coeffs[14 * 
stride] = 
E[1] - O[1];
 
  240     coeffs[15 * 
stride] = 
E[0] - O[0];
 
  281     const int a = 64, 
b = 83, 
c = 36, d = 89, e = 75, 
f = 50, 
g = 18, 
h = 90;
 
  282     const int i = 87, j = 80, k = 70, l = 57, m = 43, n = 25, o =  9, 
p = 90;
 
  283     const int q = 90, 
r = 88, 
s = 85, t = 82, 
u = 78, v = 73, 
w = 67, x = 61;
 
  284     const int y = 54, z = 46, 
A = 38, 
B = 31, 
C = 22, 
D = 13, E_=  4;
 
  285     const int x0  = coeffs[0  * 
stride], x1  = coeffs[1  * 
stride];
 
  286     const int x2  = coeffs[2  * 
stride], x3  = coeffs[3  * 
stride];
 
  287     const int x4  = coeffs[4  * 
stride], x5  = coeffs[5  * 
stride];
 
  288     const int x6  = coeffs[6  * 
stride], x7  = coeffs[7  * 
stride];
 
  289     const int x8  = coeffs[8  * 
stride], x9  = coeffs[9  * 
stride];
 
  290     const int x10 = coeffs[10 * 
stride], x11 = coeffs[11 * 
stride];
 
  291     const int x12 = coeffs[12 * 
stride], x13 = coeffs[13 * 
stride];
 
  292     const int x14 = coeffs[14 * 
stride], x15 = coeffs[15 * 
stride];
 
  293     const int x16 = coeffs[16 * 
stride], x17 = coeffs[17 * 
stride];
 
  294     const int x18 = coeffs[18 * 
stride], x19 = coeffs[19 * 
stride];
 
  295     const int x20 = coeffs[20 * 
stride], x21 = coeffs[21 * 
stride];
 
  296     const int x22 = coeffs[22 * 
stride], x23 = coeffs[23 * 
stride];
 
  297     const int x24 = coeffs[24 * 
stride], x25 = coeffs[25 * 
stride];
 
  298     const int x26 = coeffs[26 * 
stride], x27 = coeffs[27 * 
stride];
 
  299     const int x28 = coeffs[28 * 
stride], x29 = coeffs[29 * 
stride];
 
  300     const int x30 = coeffs[30 * 
stride], x31 = coeffs[31 * 
stride];
 
  301     const int EEEE[2] = {
 
  302         a * (x0 + 
G16(+x16)),
 
  303         a * (x0 + 
G16(-x16)),
 
  305     const int EEEO[2] = {
 
  310         EEEE[0] + EEEO[0], EEEE[1] + EEEO[1],
 
  311         EEEE[1] - EEEO[1], EEEE[0] - EEEO[0],
 
  314         G4(d * x4) + 
G8(+e * x12) + 
G16(+
f * x20 + 
g * x28),
 
  315         G4(e * x4) + 
G8(-
g * x12) + 
G16(-d * x20 - 
f * x28),
 
  316         G4(
f * x4) + 
G8(-d * x12) + 
G16(+
g * x20 + e * x28),
 
  317         G4(
g * x4) + 
G8(-
f * x12) + 
G16(+e * x20 - d * x28),
 
  320         EEE[0] + EEO[0], EEE[1] + EEO[1], EEE[2] + EEO[2], EEE[3] + EEO[3],
 
  321         EEE[3] - EEO[3], EEE[2] - EEO[2], EEE[1] - EEO[1], EEE[0] - EEO[0],
 
  324         G2(
h * x2) + 
G4(+
i * x6) + 
G8(+ j * x10 + k * x14) + 
G16(+l * x18 + m * x22 + n * x26 + o * x30),
 
  325         G2(
i * x2) + 
G4(+l * x6) + 
G8(+ o * x10 - m * x14) + 
G16(-j * x18 - 
h * x22 - k * x26 - n * x30),
 
  326         G2(j * x2) + 
G4(+o * x6) + 
G8(- k * x10 - 
i * x14) + 
G16(-n * x18 + l * x22 + 
h * x26 + m * x30),
 
  327         G2(k * x2) + 
G4(-m * x6) + 
G8(- 
i * x10 + o * x14) + 
G16(+
h * x18 + n * x22 - j * x26 - l * x30),
 
  328         G2(l * x2) + 
G4(-j * x6) + 
G8(- n * x10 + 
h * x14) + 
G16(-o * x18 - 
i * x22 + m * x26 + k * x30),
 
  329         G2(m * x2) + 
G4(-
h * x6) + 
G8(+ l * x10 + n * x14) + 
G16(-
i * x18 + k * x22 + o * x26 - j * x30),
 
  330         G2(n * x2) + 
G4(-k * x6) + 
G8(+ 
h * x10 - j * x14) + 
G16(+m * x18 + o * x22 - l * x26 + 
i * x30),
 
  331         G2(o * x2) + 
G4(-n * x6) + 
G8(+ m * x10 - l * x14) + 
G16(+k * x18 - j * x22 + 
i * x26 - 
h * x30),
 
  334         EE[0] + EO[0], EE[1] + EO[1], EE[2] + EO[2], EE[3] + EO[3], EE[4] + EO[4], EE[5] + EO[5], EE[6] + EO[6], EE[7] + EO[7],
 
  335         EE[7] - EO[7], EE[6] - EO[6], EE[5] - EO[5], EE[4] - EO[4], EE[3] - EO[3], EE[2] - EO[2], EE[1] - EO[1], EE[0] - EO[0],
 
  338         p * x1 + 
G2(+
q * x3) + 
G4(+
r * x5 + 
s * x7) + 
G8(+t * x9 + 
u * x11 + v * x13 + 
w * x15) + 
G16(+x * x17 + y * x19 + z * x21 + 
A * x23 + 
B * x25 + 
C * x27 + 
D * x29 + E_* x31),
 
  339         q * x1 + 
G2(+t * x3) + 
G4(+
w * x5 + z * x7) + 
G8(+
C * x9 - E_* x11 - 
B * x13 - y * x15) + 
G16(-v * x17 - 
s * x19 - 
p * x21 - 
r * x23 - 
u * x25 - x * x27 - 
A * x29 - 
D * x31),
 
  340         r * x1 + 
G2(+
w * x3) + 
G4(+
B * x5 - 
D * x7) + 
G8(-y * x9 - t * x11 - 
p * x13 - 
u * x15) + 
G16(-z * x17 - E_* x19 + 
A * x21 + v * x23 + 
q * x25 + 
s * x27 + x * x29 + 
C * x31),
 
  341         s * x1 + 
G2(+z * x3) + 
G4(-
D * x5 - 
w * x7) + 
G8(-
p * x9 - v * x11 - 
C * x13 + 
A * x15) + 
G16(+t * x17 + 
r * x19 + y * x21 - E_* x23 - x * x25 - 
q * x27 - 
u * x29 - 
B * x31),
 
  342         t * x1 + 
G2(+
C * x3) + 
G4(-y * x5 - 
p * x7) + 
G8(-x * x9 + 
D * x11 + 
u * x13 + 
s * x15) + 
G16(+
B * x17 - z * x19 - 
q * x21 - 
w * x23 + E_* x25 + v * x27 + 
r * x29 + 
A * x31),
 
  343         u * x1 + 
G2(-E_* x3) + 
G4(-t * x5 - v * x7) + 
G8(+
D * x9 + 
s * x11 + 
w * x13 - 
C * x15) + 
G16(-
r * x17 - x * x19 + 
B * x21 + 
q * x23 + y * x25 - 
A * x27 - 
p * x29 - z * x31),
 
  344         v * x1 + 
G2(-
B * x3) + 
G4(-
p * x5 - 
C * x7) + 
G8(+
u * x9 + 
w * x11 - 
A * x13 - 
q * x15) + 
G16(-
D * x17 + t * x19 + x * x21 - z * x23 - 
r * x25 - E_* x27 + 
s * x29 + y * x31),
 
  345         w * x1 + 
G2(-y * x3) + 
G4(-
u * x5 + 
A * x7) + 
G8(+
s * x9 - 
C * x11 - 
q * x13 + E_* x15) + 
G16(+
p * x17 + 
D * x19 - 
r * x21 - 
B * x23 + t * x25 + z * x27 - v * x29 - x * x31),
 
  346         x * x1 + 
G2(-v * x3) + 
G4(-z * x5 + t * x7) + 
G8(+
B * x9 - 
r * x11 - 
D * x13 + 
p * x15) + 
G16(-E_* x17 - 
q * x19 + 
C * x21 + 
s * x23 - 
A * x25 - 
u * x27 + y * x29 + 
w * x31),
 
  347         y * x1 + 
G2(-
s * x3) + 
G4(-E_* x5 + 
r * x7) + 
G8(-z * x9 - x * x11 + t * x13 + 
D * x15) + 
G16(-
q * x17 + 
A * x19 + 
w * x21 - 
u * x23 - 
C * x25 + 
p * x27 - 
B * x29 - v * x31),
 
  348         z * x1 + 
G2(-
p * x3) + 
G4(+
A * x5 + y * x7) + 
G8(-
q * x9 + 
B * x11 + x * x13 - 
r * x15) + 
G16(+
C * x17 + 
w * x19 - 
s * x21 + 
D * x23 + v * x25 - t * x27 + E_* x29 + 
u * x31),
 
  349         A * x1 + 
G2(-
r * x3) + 
G4(+v * x5 - E_* x7) + 
G8(-
w * x9 + 
q * x11 - z * x13 - 
B * x15) + 
G16(+
s * x17 - 
u * x19 + 
D * x21 + x * x23 - 
p * x25 + y * x27 + 
C * x29 - t * x31),
 
  350         B * x1 + 
G2(-
u * x3) + 
G4(+
q * x5 - x * x7) + 
G8(+E_* x9 + y * x11 - 
r * x13 + t * x15) + 
G16(-
A * x17 - 
C * x19 + v * x21 - 
p * x23 + 
w * x25 - 
D * x27 - z * x29 + 
s * x31),
 
  351         C * x1 + 
G2(-x * x3) + 
G4(+
s * x5 - 
q * x7) + 
G8(+v * x9 - 
A * x11 - E_* x13 + z * x15) + 
G16(-
u * x17 + 
p * x19 - t * x21 + y * x23 - 
D * x25 - 
B * x27 + 
w * x29 - 
r * x31),
 
  352         D * x1 + 
G2(-
A * x3) + 
G4(+x * x5 - 
u * x7) + 
G8(+
r * x9 - 
p * x11 + 
s * x13 - v * x15) + 
G16(+y * x17 - 
B * x19 + E_* x21 + 
C * x23 - z * x25 + 
w * x27 - t * x29 + 
q * x31),
 
  353         E_* x1 + 
G2(-
D * x3) + 
G4(+
C * x5 - 
B * x7) + 
G8(+
A * x9 - z * x11 + y * x13 - x * x15) + 
G16(+
w * x17 - v * x19 + 
u * x21 - t * x23 + 
s * x25 - 
r * x27 + 
q * x29 - 
p * x31),
 
  356     coeffs[0  * 
stride] = 
E[0]  + O[0];
 
  357     coeffs[1  * 
stride] = 
E[1]  + O[1];
 
  358     coeffs[2  * 
stride] = 
E[2]  + O[2];
 
  359     coeffs[3  * 
stride] = 
E[3]  + O[3];
 
  360     coeffs[4  * 
stride] = 
E[4]  + O[4];
 
  361     coeffs[5  * 
stride] = 
E[5]  + O[5];
 
  362     coeffs[6  * 
stride] = 
E[6]  + O[6];
 
  363     coeffs[7  * 
stride] = 
E[7]  + O[7];
 
  364     coeffs[8  * 
stride] = 
E[8]  + O[8];
 
  365     coeffs[9  * 
stride] = 
E[9]  + O[9];
 
  366     coeffs[10 * 
stride] = 
E[10] + O[10];
 
  367     coeffs[11 * 
stride] = 
E[11] + O[11];
 
  368     coeffs[12 * 
stride] = 
E[12] + O[12];
 
  369     coeffs[13 * 
stride] = 
E[13] + O[13];
 
  370     coeffs[14 * 
stride] = 
E[14] + O[14];
 
  371     coeffs[15 * 
stride] = 
E[15] + O[15];
 
  372     coeffs[16 * 
stride] = 
E[15] - O[15];
 
  373     coeffs[17 * 
stride] = 
E[14] - O[14];
 
  374     coeffs[18 * 
stride] = 
E[13] - O[13];
 
  375     coeffs[19 * 
stride] = 
E[12] - O[12];
 
  376     coeffs[20 * 
stride] = 
E[11] - O[11];
 
  377     coeffs[21 * 
stride] = 
E[10] - O[10];
 
  378     coeffs[22 * 
stride] = 
E[9]  - O[9];
 
  379     coeffs[23 * 
stride] = 
E[8]  - O[8];
 
  380     coeffs[24 * 
stride] = 
E[7]  - O[7];
 
  381     coeffs[25 * 
stride] = 
E[6]  - O[6];
 
  382     coeffs[26 * 
stride] = 
E[5]  - O[5];
 
  383     coeffs[27 * 
stride] = 
E[4]  - O[4];
 
  384     coeffs[28 * 
stride] = 
E[3]  - O[3];
 
  385     coeffs[29 * 
stride] = 
E[2]  - O[2];
 
  386     coeffs[30 * 
stride] = 
E[1]  - O[1];
 
  387     coeffs[31 * 
stride] = 
E[0]  - O[0];
 
  461     const int aa = 64, ab = 83, ac = 36, ad = 89, ae = 75, af = 50, ag = 18, ah = 90;
 
  462     const int ai = 87, aj = 80, ak = 70, al = 57, am = 43, an = 25, ao =  9, ap = 90;
 
  463     const int aq = 90, ar = 88, as = 85, at = 82, au = 78, av = 73, aw = 67, ax = 61;
 
  464     const int ay = 54, az = 46, ba = 38, bb = 31, bc = 22, bd = 13, 
be =  4, 
bf = 91;
 
  465     const int bg = 90, bh = 90, bi = 90, bj = 88, bk = 87, bl = 86, bm = 84, bn = 83;
 
  466     const int bo = 81, bp = 79, bq = 77, br = 73, bs = 71, bt = 69, bu = 65, bv = 62;
 
  467     const int bw = 59, bx = 56, by = 52, bz = 48, ca = 44, 
cb = 41, cc = 37, cd = 33;
 
  468     const int ce = 28, cf = 24, cg = 20, ch = 15, ci = 11, cj =  7, ck =  2;
 
  469     const int x0  = coeffs[0  * 
stride], x1  = coeffs[1  * 
stride];
 
  470     const int x2  = coeffs[2  * 
stride], x3  = coeffs[3  * 
stride];
 
  471     const int x4  = coeffs[4  * 
stride], x5  = coeffs[5  * 
stride];
 
  472     const int x6  = coeffs[6  * 
stride], x7  = coeffs[7  * 
stride];
 
  473     const int x8  = coeffs[8  * 
stride], x9  = coeffs[9  * 
stride];
 
  474     const int x10 = coeffs[10 * 
stride], x11 = coeffs[11 * 
stride];
 
  475     const int x12 = coeffs[12 * 
stride], x13 = coeffs[13 * 
stride];
 
  476     const int x14 = coeffs[14 * 
stride], x15 = coeffs[15 * 
stride];
 
  477     const int x16 = coeffs[16 * 
stride], x17 = coeffs[17 * 
stride];
 
  478     const int x18 = coeffs[18 * 
stride], x19 = coeffs[19 * 
stride];
 
  479     const int x20 = coeffs[20 * 
stride], x21 = coeffs[21 * 
stride];
 
  480     const int x22 = coeffs[22 * 
stride], x23 = coeffs[23 * 
stride];
 
  481     const int x24 = coeffs[24 * 
stride], x25 = coeffs[25 * 
stride];
 
  482     const int x26 = coeffs[26 * 
stride], x27 = coeffs[27 * 
stride];
 
  483     const int x28 = coeffs[28 * 
stride], x29 = coeffs[29 * 
stride];
 
  484     const int x30 = coeffs[30 * 
stride], x31 = coeffs[31 * 
stride];
 
  486     const int EEEEE[2] = {
 
  490     const int EEEEO[2] = {
 
  494     const int EEEE[4] = {
 
  495         EEEEE[0] + EEEEO[0], EEEEE[1] + EEEEO[1],
 
  496         EEEEE[1] - EEEEO[1], EEEEE[0] - EEEEO[0],
 
  498     const int EEEO[4] = {
 
  499         G8(ad * x8)  + 
G16(+ae * x24),
 
  500         G8(ae * x8)  + 
G16(-ag * x24),
 
  501         G8(af * x8)  + 
G16(-ad * x24),
 
  502         G8(ag * x8)  + 
G16(-af * x24),
 
  505         EEEE[0] + EEEO[0], EEEE[1] + EEEO[1], EEEE[2] + EEEO[2], EEEE[3] + EEEO[3],
 
  506         EEEE[3] - EEEO[3], EEEE[2] - EEEO[2], EEEE[1] - EEEO[1], EEEE[0] - EEEO[0],
 
  509         G4(ah * x4) + 
G8(+ai * x12) + 
G16(+aj * x20 + ak * x28),
 
  510         G4(ai * x4) + 
G8(+al * x12) + 
G16(+ao * x20 - am * x28),
 
  511         G4(aj * x4) + 
G8(+ao * x12) + 
G16(-ak * x20 - ai * x28),
 
  512         G4(ak * x4) + 
G8(-am * x12) + 
G16(-ai * x20 + ao * x28),
 
  513         G4(al * x4) + 
G8(-aj * x12) + 
G16(-an * x20 + ah * x28),
 
  514         G4(am * x4) + 
G8(-ah * x12) + 
G16(+al * x20 + an * x28),
 
  515         G4(an * x4) + 
G8(-ak * x12) + 
G16(+ah * x20 - aj * x28),
 
  516         G4(ao * x4) + 
G8(-an * x12) + 
G16(+am * x20 - al * x28),
 
  519         EEE[0] + EEO[0], EEE[1] + EEO[1], EEE[2] + EEO[2], EEE[3] + EEO[3], EEE[4] + EEO[4], EEE[5] + EEO[5], EEE[6] + EEO[6], EEE[7] + EEO[7],
 
  520         EEE[7] - EEO[7], EEE[6] - EEO[6], EEE[5] - EEO[5], EEE[4] - EEO[4], EEE[3] - EEO[3], EEE[2] - EEO[2], EEE[1] - EEO[1], EEE[0] - EEO[0],
 
  523         G2(ap * x2) + 
G4(+aq * x6) + 
G8(+ar * x10 + as * x14) + 
G16(+at * x18 + au * x22 + av * x26 + aw * x30),
 
  524         G2(aq * x2) + 
G4(+at * x6) + 
G8(+aw * x10 + az * x14) + 
G16(+bc * x18 - 
be * x22 - bb * x26 - ay * x30),
 
  525         G2(ar * x2) + 
G4(+aw * x6) + 
G8(+bb * x10 - bd * x14) + 
G16(-ay * x18 - at * x22 - ap * x26 - au * x30),
 
  526         G2(as * x2) + 
G4(+az * x6) + 
G8(-bd * x10 - aw * x14) + 
G16(-ap * x18 - av * x22 - bc * x26 + ba * x30),
 
  527         G2(at * x2) + 
G4(+bc * x6) + 
G8(-ay * x10 - ap * x14) + 
G16(-ax * x18 + bd * x22 + au * x26 + as * x30),
 
  528         G2(au * x2) + 
G4(-
be * x6) + 
G8(-at * x10 - av * x14) + 
G16(+bd * x18 + as * x22 + aw * x26 - bc * x30),
 
  529         G2(av * x2) + 
G4(-bb * x6) + 
G8(-ap * x10 - bc * x14) + 
G16(+au * x18 + aw * x22 - ba * x26 - aq * x30),
 
  530         G2(aw * x2) + 
G4(-ay * x6) + 
G8(-au * x10 + ba * x14) + 
G16(+as * x18 - bc * x22 - aq * x26 + 
be * x30),
 
  531         G2(ax * x2) + 
G4(-av * x6) + 
G8(-az * x10 + at * x14) + 
G16(+bb * x18 - ar * x22 - bd * x26 + ap * x30),
 
  532         G2(ay * x2) + 
G4(-as * x6) + 
G8(-
be * x10 + ar * x14) + 
G16(-az * x18 - ax * x22 + at * x26 + bd * x30),
 
  533         G2(az * x2) + 
G4(-ap * x6) + 
G8(+ba * x10 + ay * x14) + 
G16(-aq * x18 + bb * x22 + ax * x26 - ar * x30),
 
  534         G2(ba * x2) + 
G4(-ar * x6) + 
G8(+av * x10 - 
be * x14) + 
G16(-aw * x18 + aq * x22 - az * x26 - bb * x30),
 
  535         G2(bb * x2) + 
G4(-au * x6) + 
G8(+aq * x10 - ax * x14) + 
G16(+
be * x18 + ay * x22 - ar * x26 + at * x30),
 
  536         G2(bc * x2) + 
G4(-ax * x6) + 
G8(+as * x10 - aq * x14) + 
G16(+av * x18 - ba * x22 - 
be * x26 + az * x30),
 
  537         G2(bd * x2) + 
G4(-ba * x6) + 
G8(+ax * x10 - au * x14) + 
G16(+ar * x18 - ap * x22 + as * x26 - av * x30),
 
  538         G2(
be * x2) + 
G4(-bd * x6) + 
G8(+bc * x10 - bb * x14) + 
G16(+ba * x18 - az * x22 + ay * x26 - ax * x30),
 
  541         EE[0]  + EO[0],  EE[1]  + EO[1],  EE[2]  + EO[2],  EE[3]  + EO[3],  EE[4]  + EO[4],  EE[5]  + EO[5],  EE[6] + EO[6], EE[7] + EO[7], EE[8] + EO[8], EE[9] + EO[9], EE[10] + EO[10], EE[11] + EO[11], EE[12] + EO[12], EE[13] + EO[13], EE[14] + EO[14], EE[15] + EO[15],
 
  542         EE[15] - EO[15], EE[14] - EO[14], EE[13] - EO[13], EE[12] - EO[12], EE[11] - EO[11], EE[10] - EO[10], EE[9] - EO[9], EE[8] - EO[8], EE[7] - EO[7], EE[6] - EO[6], EE[5]  - EO[5],  EE[4]  - EO[4],  EE[3]  - EO[3],  EE[2]  - EO[2],  EE[1]  - EO[1],  EE[0]  - EO[0],
 
  545         bf * x1 + 
G2(+bg * x3) + 
G4(+bh * x5 + bi * x7) + 
G8(+bj * x9 + bk * x11 + bl * x13 + bm * x15) + 
G16(+bn * x17 + bo * x19 + bp * x21 + bq * x23 +  br * x25 + bs * x27 + bt * x29 + bu * x31),
 
  546         bg * x1 + 
G2(+bj * x3) + 
G4(+bm * x5 + bp * x7) + 
G8(+bs * x9 + bv * x11 + by * x13 + 
cb * x15) + 
G16(+ce * x17 + ch * x19 + ck * x21 - ci * x23 + -cf * x25 - cc * x27 - bz * x29 - bw * x31),
 
  547         bh * x1 + 
G2(+bm * x3) + 
G4(+br * x5 + bw * x7) + 
G8(+
cb * x9 + cg * x11 - ck * x13 - cf * x15) + 
G16(-ca * x17 - bv * x19 - bq * x21 - bl * x23 + -bg * x25 - bi * x27 - bn * x29 - bs * x31),
 
  548         bi * x1 + 
G2(+bp * x3) + 
G4(+bw * x5 + cd * x7) + 
G8(+ck * x9 - ce * x11 - bx * x13 - bq * x15) + 
G16(-bj * x17 - bh * x19 - bo * x21 - bv * x23 + -cc * x25 - cj * x27 + cf * x29 + by * x31),
 
  549         bj * x1 + 
G2(+bs * x3) + 
G4(+
cb * x5 + ck * x7) + 
G8(-cc * x9 - bt * x11 - bk * x13 - bi * x15) + 
G16(-br * x17 - ca * x19 - cj * x21 + cd * x23 +  bu * x25 + bl * x27 + bh * x29 + bq * x31),
 
  550         bk * x1 + 
G2(+bv * x3) + 
G4(+cg * x5 - ce * x7) + 
G8(-bt * x9 - bi * x11 - bm * x13 - bx * x15) + 
G16(-ci * x17 + cc * x19 + br * x21 + bg * x23 +  bo * x25 + bz * x27 + ck * x29 - ca * x31),
 
  551         bl * x1 + 
G2(+by * x3) + 
G4(-ck * x5 - bx * x7) + 
G8(-bk * x9 - bm * x11 - bz * x13 + cj * x15) + 
G16(+bw * x17 + bj * x19 + bn * x21 + ca * x23 + -ci * x25 - bv * x27 - bi * x29 - bo * x31),
 
  552         bm * x1 + 
G2(+
cb * x3) + 
G4(-cf * x5 - bq * x7) + 
G8(-bi * x9 - bx * x11 + cj * x13 + bu * x15) + 
G16(+
bf * x17 + bt * x19 + ci * x21 - by * x23 + -bj * x25 - bp * x27 - ce * x29 + cc * x31),
 
  553         bn * x1 + 
G2(+ce * x3) + 
G4(-ca * x5 - bj * x7) + 
G8(-br * x9 - ci * x11 + bw * x13 + 
bf * x15) + 
G16(+bv * x17 - cj * x19 - bs * x21 - bi * x23 + -bz * x25 + cf * x27 + bo * x29 + bm * x31),
 
  554         bo * x1 + 
G2(+ch * x3) + 
G4(-bv * x5 - bh * x7) + 
G8(-ca * x9 + cc * x11 + bj * x13 + bt * x15) + 
G16(-cj * x17 - bq * x19 - bm * x21 - cf * x23 +  bx * x25 + 
bf * x27 + by * x29 - ce * x31),
 
  555         bp * x1 + 
G2(+ck * x3) + 
G4(-bq * x5 - bo * x7) + 
G8(-cj * x9 + br * x11 + bn * x13 + ci * x15) + 
G16(-bs * x17 - bm * x19 - ch * x21 + bt * x23 +  bl * x25 + cg * x27 - bu * x29 - bk * x31),
 
  556         bq * x1 + 
G2(-ci * x3) + 
G4(-bl * x5 - bv * x7) + 
G8(+cd * x9 + bg * x11 + ca * x13 - by * x15) + 
G16(-bi * x17 - cf * x19 + bt * x21 + bn * x23 +  ck * x25 - bo * x27 - bs * x29 + cg * x31),
 
  557         br * x1 + 
G2(-cf * x3) + 
G4(-bg * x5 - cc * x7) + 
G8(+bu * x9 + bo * x11 - ci * x13 - bj * x15) + 
G16(-bz * x17 + bx * x19 + bl * x21 + ck * x23 + -bm * x25 - bw * x27 + ca * x29 + bi * x31),
 
  558         bs * x1 + 
G2(-cc * x3) + 
G4(-bi * x5 - cj * x7) + 
G8(+bl * x9 + bz * x11 - bv * x13 - bp * x15) + 
G16(+cf * x17 + 
bf * x19 + cg * x21 - bo * x23 + -bw * x25 + by * x27 + bm * x29 - ci * x31),
 
  559         bt * x1 + 
G2(-bz * x3) + 
G4(-bn * x5 + cf * x7) + 
G8(+bh * x9 + ck * x11 - bi * x13 - ce * x15) + 
G16(+bo * x17 + by * x19 - bu * x21 - bs * x23 +  ca * x25 + bm * x27 - cg * x29 - bg * x31),
 
  560         bu * x1 + 
G2(-bw * x3) + 
G4(-bs * x5 + by * x7) + 
G8(+bq * x9 - ca * x11 - bo * x13 + cc * x15) + 
G16(+bm * x17 - ce * x19 - bk * x21 + cg * x23 +  bi * x25 - ci * x27 - bg * x29 + ck * x31),
 
  561         bv * x1 + 
G2(-bt * x3) + 
G4(-bx * x5 + br * x7) + 
G8(+bz * x9 - bp * x11 - 
cb * x13 + bn * x15) + 
G16(+cd * x17 - bl * x19 - cf * x21 + bj * x23 +  ch * x25 - bh * x27 - cj * x29 + 
bf * x31),
 
  562         bw * x1 + 
G2(-bq * x3) + 
G4(-cc * x5 + bk * x7) + 
G8(+ci * x9 - 
bf * x11 + ch * x13 + bl * x15) + 
G16(-
cb * x17 - br * x19 + bv * x21 + bx * x23 + -bp * x25 - cd * x27 + bj * x29 + cj * x31),
 
  563         bx * x1 + 
G2(-bn * x3) + 
G4(-ch * x5 + bg * x7) + 
G8(-ce * x9 - bq * x11 + bu * x13 + ca * x15) + 
G16(-bk * x17 - ck * x19 + bj * x21 - 
cb * x23 + -bt * x25 + br * x27 + cd * x29 - bh * x31),
 
  564         by * x1 + 
G2(-bk * x3) + 
G4(+cj * x5 + bn * x7) + 
G8(-bv * x9 - 
cb * x11 + bh * x13 - cg * x15) + 
G16(-bq * x17 + bs * x19 + ce * x21 - 
bf * x23 +  cd * x25 + bt * x27 - bp * x29 - ch * x31),
 
  565         bz * x1 + 
G2(-bh * x3) + 
G4(+ce * x5 + bu * x7) + 
G8(-bm * x9 + cj * x11 + bp * x13 - br * x15) + 
G16(-ch * x17 + bk * x19 - bw * x21 - cc * x23 +  
bf * x25 - 
cb * x27 - bx * x29 + bj * x31),
 
  566         ca * x1 + 
G2(-
bf * x3) + 
G4(+bz * x5 + 
cb * x7) + 
G8(-bg * x9 + by * x11 + cc * x13 - bh * x15) + 
G16(+bx * x17 + cd * x19 - bi * x21 + bw * x23 +  ce * x25 - bj * x27 + bv * x29 + cf * x31),
 
  567         cb * x1 + 
G2(-bi * x3) + 
G4(+bu * x5 + ci * x7) + 
G8(-bp * x9 + bn * x11 - cg * x13 - bw * x15) + 
G16(+bg * x17 - bz * x19 - cd * x21 + bk * x23 + -bs * x25 - ck * x27 + br * x29 - bl * x31),
 
  568         cc * x1 + 
G2(-bl * x3) + 
G4(+bp * x5 - cg * x7) + 
G8(-by * x9 + bh * x11 - bt * x13 + ck * x15) + 
G16(+bu * x17 - bg * x19 + bx * x21 + ch * x23 + -bq * x25 + bk * x27 - 
cb * x29 - cd * x31),
 
  569         cd * x1 + 
G2(-bo * x3) + 
G4(+bk * x5 - bz * x7) + 
G8(-ch * x9 + bs * x11 - bg * x13 + bv * x15) + 
G16(-ck * x17 - bw * x19 + bh * x21 - br * x23 +  cg * x25 + ca * x27 - bl * x29 + bn * x31),
 
  570         ce * x1 + 
G2(-br * x3) + 
G4(+
bf * x5 - bs * x7) + 
G8(+cf * x9 + cd * x11 - bq * x13 + bg * x15) + 
G16(-bt * x17 + cg * x19 + cc * x21 - bp * x23 +  bh * x25 - bu * x27 + ch * x29 + 
cb * x31),
 
  571         cf * x1 + 
G2(-bu * x3) + 
G4(+bj * x5 - bl * x7) + 
G8(+bw * x9 - ch * x11 - cd * x13 + bs * x15) + 
G16(-bh * x17 + bn * x19 - by * x21 + cj * x23 +  
cb * x25 - bq * x27 + 
bf * x29 - bp * x31),
 
  572         cg * x1 + 
G2(-bx * x3) + 
G4(+bo * x5 - 
bf * x7) + 
G8(+bn * x9 - bw * x11 + cf * x13 + ch * x15) + 
G16(-by * x17 + bp * x19 - bg * x21 + bm * x23 + -bv * x25 + ce * x27 + ci * x29 - bz * x31),
 
  573         ch * x1 + 
G2(-ca * x3) + 
G4(+bt * x5 - bm * x7) + 
G8(+
bf * x9 - bl * x11 + bs * x13 - bz * x15) + 
G16(+cg * x17 + ci * x19 - 
cb * x21 + bu * x23 + -bn * x25 + bg * x27 - bk * x29 + br * x31),
 
  574         ci * x1 + 
G2(-cd * x3) + 
G4(+by * x5 - bt * x7) + 
G8(+bo * x9 - bj * x11 + 
bf * x13 - bk * x15) + 
G16(+bp * x17 - bu * x19 + bz * x21 - ce * x23 +  cj * x25 + ch * x27 - cc * x29 + bx * x31),
 
  575         cj * x1 + 
G2(-cg * x3) + 
G4(+cd * x5 - ca * x7) + 
G8(+bx * x9 - bu * x11 + br * x13 - bo * x15) + 
G16(+bl * x17 - bi * x19 + 
bf * x21 - bh * x23 +  bk * x25 - bn * x27 + bq * x29 - bt * x31),
 
  576         ck * x1 + 
G2(-cj * x3) + 
G4(+ci * x5 - ch * x7) + 
G8(+cg * x9 - cf * x11 + ce * x13 - cd * x15) + 
G16(+cc * x17 - 
cb * x19 + ca * x21 - bz * x23 +  by * x25 - bx * x27 + bw * x29 - bv * x31),
 
  578     coeffs[0  * 
stride] = 
E[0 ] + O[0 ];
 
  579     coeffs[1  * 
stride] = 
E[1 ] + O[1 ];
 
  580     coeffs[2  * 
stride] = 
E[2 ] + O[2 ];
 
  581     coeffs[3  * 
stride] = 
E[3 ] + O[3 ];
 
  582     coeffs[4  * 
stride] = 
E[4 ] + O[4 ];
 
  583     coeffs[5  * 
stride] = 
E[5 ] + O[5 ];
 
  584     coeffs[6  * 
stride] = 
E[6 ] + O[6 ];
 
  585     coeffs[7  * 
stride] = 
E[7 ] + O[7 ];
 
  586     coeffs[8  * 
stride] = 
E[8 ] + O[8 ];
 
  587     coeffs[9  * 
stride] = 
E[9 ] + O[9 ];
 
  588     coeffs[10 * 
stride] = 
E[10] + O[10];
 
  589     coeffs[11 * 
stride] = 
E[11] + O[11];
 
  590     coeffs[12 * 
stride] = 
E[12] + O[12];
 
  591     coeffs[13 * 
stride] = 
E[13] + O[13];
 
  592     coeffs[14 * 
stride] = 
E[14] + O[14];
 
  593     coeffs[15 * 
stride] = 
E[15] + O[15];
 
  594     coeffs[16 * 
stride] = 
E[16] + O[16];
 
  595     coeffs[17 * 
stride] = 
E[17] + O[17];
 
  596     coeffs[18 * 
stride] = 
E[18] + O[18];
 
  597     coeffs[19 * 
stride] = 
E[19] + O[19];
 
  598     coeffs[20 * 
stride] = 
E[20] + O[20];
 
  599     coeffs[21 * 
stride] = 
E[21] + O[21];
 
  600     coeffs[22 * 
stride] = 
E[22] + O[22];
 
  601     coeffs[23 * 
stride] = 
E[23] + O[23];
 
  602     coeffs[24 * 
stride] = 
E[24] + O[24];
 
  603     coeffs[25 * 
stride] = 
E[25] + O[25];
 
  604     coeffs[26 * 
stride] = 
E[26] + O[26];
 
  605     coeffs[27 * 
stride] = 
E[27] + O[27];
 
  606     coeffs[28 * 
stride] = 
E[28] + O[28];
 
  607     coeffs[29 * 
stride] = 
E[29] + O[29];
 
  608     coeffs[30 * 
stride] = 
E[30] + O[30];
 
  609     coeffs[31 * 
stride] = 
E[31] + O[31];
 
  610     coeffs[32 * 
stride] = 
E[31] - O[31];
 
  611     coeffs[33 * 
stride] = 
E[30] - O[30];
 
  612     coeffs[34 * 
stride] = 
E[29] - O[29];
 
  613     coeffs[35 * 
stride] = 
E[28] - O[28];
 
  614     coeffs[36 * 
stride] = 
E[27] - O[27];
 
  615     coeffs[37 * 
stride] = 
E[26] - O[26];
 
  616     coeffs[38 * 
stride] = 
E[25] - O[25];
 
  617     coeffs[39 * 
stride] = 
E[24] - O[24];
 
  618     coeffs[40 * 
stride] = 
E[23] - O[23];
 
  619     coeffs[41 * 
stride] = 
E[22] - O[22];
 
  620     coeffs[42 * 
stride] = 
E[21] - O[21];
 
  621     coeffs[43 * 
stride] = 
E[20] - O[20];
 
  622     coeffs[44 * 
stride] = 
E[19] - O[19];
 
  623     coeffs[45 * 
stride] = 
E[18] - O[18];
 
  624     coeffs[46 * 
stride] = 
E[17] - O[17];
 
  625     coeffs[47 * 
stride] = 
E[16] - O[16];
 
  626     coeffs[48 * 
stride] = 
E[15] - O[15];
 
  627     coeffs[49 * 
stride] = 
E[14] - O[14];
 
  628     coeffs[50 * 
stride] = 
E[13] - O[13];
 
  629     coeffs[51 * 
stride] = 
E[12] - O[12];
 
  630     coeffs[52 * 
stride] = 
E[11] - O[11];
 
  631     coeffs[53 * 
stride] = 
E[10] - O[10];
 
  632     coeffs[54 * 
stride] = 
E[9]  - O[9];
 
  633     coeffs[55 * 
stride] = 
E[8]  - O[8];
 
  634     coeffs[56 * 
stride] = 
E[7]  - O[7];
 
  635     coeffs[57 * 
stride] = 
E[6]  - O[6];
 
  636     coeffs[58 * 
stride] = 
E[5]  - O[5];
 
  637     coeffs[59 * 
stride] = 
E[4]  - O[4];
 
  638     coeffs[60 * 
stride] = 
E[3]  - O[3];
 
  639     coeffs[61 * 
stride] = 
E[2]  - O[2];
 
  640     coeffs[62 * 
stride] = 
E[1]  - O[1];
 
  641     coeffs[63 * 
stride] = 
E[0]  - O[0];
 
  649     for (
int i = 0; 
i < nz; 
i++)
 
  652     for (
int i = 0; 
i < 
size; 
i++) {
 
  655          for (
int j = 0; j < nz; j++)
 
  668 #define DEFINE_INV_DCT8_1D(S)                                                   \ 
  669 void ff_vvc_inv_dct8_ ## S(int *coeffs, const ptrdiff_t stride, const size_t nz) \ 
  671     inv_dct8(coeffs, stride, &ff_vvc_dct8_##S##x##S[0][0], S, nz);               \ 
  684 #define DEFINE_INV_DST7_1D(S)                                                   \ 
  685 void ff_vvc_inv_dst7_ ## S(int *coeffs, const ptrdiff_t stride, const size_t nz) \ 
  687     inv_dst7(coeffs, stride, &ff_vvc_dst7_##S##x##S[0][0], S, nz);               \ 
  696     int pred_mode_intra, 
int lfnst_idx, 
int log2_transform_range)
 
  701      for (
int j = 0; j < n_tr_s; j++, tr_mat++) {
 
  704         for (
int i = 0; 
i < no_zero_size; 
i++)
 
  705             t += 
u[
i] * tr_mat[
i * n_tr_s];