69 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
76 static const uint8_t uquad_sign_bits[81] = {
77 0, 1, 1, 1, 2, 2, 1, 2, 2,
78 1, 2, 2, 2, 3, 3, 2, 3, 3,
79 1, 2, 2, 2, 3, 3, 2, 3, 3,
80 1, 2, 2, 2, 3, 3, 2, 3, 3,
81 2, 3, 3, 3, 4, 4, 3, 4, 4,
82 2, 3, 3, 3, 4, 4, 3, 4, 4,
83 1, 2, 2, 2, 3, 3, 2, 3, 3,
84 2, 3, 3, 3, 4, 4, 3, 4, 4,
85 2, 3, 3, 3, 4, 4, 3, 4, 4
88 static const uint8_t upair7_sign_bits[64] = {
89 0, 1, 1, 1, 1, 1, 1, 1,
90 1, 2, 2, 2, 2, 2, 2, 2,
91 1, 2, 2, 2, 2, 2, 2, 2,
92 1, 2, 2, 2, 2, 2, 2, 2,
93 1, 2, 2, 2, 2, 2, 2, 2,
94 1, 2, 2, 2, 2, 2, 2, 2,
95 1, 2, 2, 2, 2, 2, 2, 2,
96 1, 2, 2, 2, 2, 2, 2, 2,
99 static const uint8_t upair12_sign_bits[169] = {
100 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
103 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
104 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
111 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
112 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
115 static const uint8_t esc_sign_bits[289] = {
116 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
117 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
123 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
124 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
125 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
126 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
127 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
128 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
129 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
132 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
138 static void quantize_and_encode_band_cost_SQUAD_mips(
struct AACEncContext *
s,
140 const float *scaled,
int size,
int scale_idx,
141 int cb,
const float lambda,
const float uplim,
142 int *bits,
float *energy,
const float ROUNDING)
147 int qc1, qc2, qc3, qc4;
148 float qenergy = 0.0f;
156 for (i = 0; i <
size; i += 4) {
158 int *in_int = (
int *)&in[i];
169 ".set noreorder \n\t"
171 "slt %[qc1], $zero, %[qc1] \n\t"
172 "slt %[qc2], $zero, %[qc2] \n\t"
173 "slt %[qc3], $zero, %[qc3] \n\t"
174 "slt %[qc4], $zero, %[qc4] \n\t"
175 "lw %[t0], 0(%[in_int]) \n\t"
176 "lw %[t1], 4(%[in_int]) \n\t"
177 "lw %[t2], 8(%[in_int]) \n\t"
178 "lw %[t3], 12(%[in_int]) \n\t"
179 "srl %[t0], %[t0], 31 \n\t"
180 "srl %[t1], %[t1], 31 \n\t"
181 "srl %[t2], %[t2], 31 \n\t"
182 "srl %[t3], %[t3], 31 \n\t"
183 "subu %[t4], $zero, %[qc1] \n\t"
184 "subu %[t5], $zero, %[qc2] \n\t"
185 "subu %[t6], $zero, %[qc3] \n\t"
186 "subu %[t7], $zero, %[qc4] \n\t"
187 "movn %[qc1], %[t4], %[t0] \n\t"
188 "movn %[qc2], %[t5], %[t1] \n\t"
189 "movn %[qc3], %[t6], %[t2] \n\t"
190 "movn %[qc4], %[t7], %[t3] \n\t"
194 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
195 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
196 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
197 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
198 : [in_int]
"r"(in_int)
211 put_bits(pb, p_bits[curidx], p_codes[curidx]);
215 vec = &p_vec[curidx*4];
227 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
234 static void quantize_and_encode_band_cost_UQUAD_mips(
struct AACEncContext *s,
236 const float *scaled,
int size,
int scale_idx,
237 int cb,
const float lambda,
const float uplim,
238 int *bits,
float *energy,
const float ROUNDING)
243 int qc1, qc2, qc3, qc4;
244 float qenergy = 0.0f;
252 for (i = 0; i <
size; i += 4) {
253 int curidx, sign,
count;
254 int *in_int = (
int *)&in[i];
256 unsigned int v_codes;
267 ".set noreorder \n\t"
269 "ori %[t4], $zero, 2 \n\t"
270 "ori %[sign], $zero, 0 \n\t"
271 "slt %[t0], %[t4], %[qc1] \n\t"
272 "slt %[t1], %[t4], %[qc2] \n\t"
273 "slt %[t2], %[t4], %[qc3] \n\t"
274 "slt %[t3], %[t4], %[qc4] \n\t"
275 "movn %[qc1], %[t4], %[t0] \n\t"
276 "movn %[qc2], %[t4], %[t1] \n\t"
277 "movn %[qc3], %[t4], %[t2] \n\t"
278 "movn %[qc4], %[t4], %[t3] \n\t"
279 "lw %[t0], 0(%[in_int]) \n\t"
280 "lw %[t1], 4(%[in_int]) \n\t"
281 "lw %[t2], 8(%[in_int]) \n\t"
282 "lw %[t3], 12(%[in_int]) \n\t"
283 "slt %[t0], %[t0], $zero \n\t"
284 "movn %[sign], %[t0], %[qc1] \n\t"
285 "slt %[t1], %[t1], $zero \n\t"
286 "slt %[t2], %[t2], $zero \n\t"
287 "slt %[t3], %[t3], $zero \n\t"
288 "sll %[t0], %[sign], 1 \n\t"
289 "or %[t0], %[t0], %[t1] \n\t"
290 "movn %[sign], %[t0], %[qc2] \n\t"
291 "slt %[t4], $zero, %[qc1] \n\t"
292 "slt %[t1], $zero, %[qc2] \n\t"
293 "slt %[count], $zero, %[qc3] \n\t"
294 "sll %[t0], %[sign], 1 \n\t"
295 "or %[t0], %[t0], %[t2] \n\t"
296 "movn %[sign], %[t0], %[qc3] \n\t"
297 "slt %[t2], $zero, %[qc4] \n\t"
298 "addu %[count], %[count], %[t4] \n\t"
299 "addu %[count], %[count], %[t1] \n\t"
300 "sll %[t0], %[sign], 1 \n\t"
301 "or %[t0], %[t0], %[t3] \n\t"
302 "movn %[sign], %[t0], %[qc4] \n\t"
303 "addu %[count], %[count], %[t2] \n\t"
307 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
308 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
309 [sign]
"=&r"(sign), [count]
"=&r"(count),
310 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
312 : [in_int]
"r"(in_int)
324 v_codes = (p_codes[curidx] <<
count) | (sign & ((1 << count) - 1));
325 v_bits = p_bits[curidx] +
count;
330 vec = &p_vec[curidx*4];
331 e1 = copysignf(vec[0] * IQ, in[i+0]);
332 e2 = copysignf(vec[1] * IQ, in[i+1]);
333 e3 = copysignf(vec[2] * IQ, in[i+2]);
334 e4 = copysignf(vec[3] * IQ, in[i+3]);
342 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
349 static void quantize_and_encode_band_cost_SPAIR_mips(
struct AACEncContext *s,
351 const float *scaled,
int size,
int scale_idx,
352 int cb,
const float lambda,
const float uplim,
353 int *bits,
float *energy,
const float ROUNDING)
358 int qc1, qc2, qc3, qc4;
359 float qenergy = 0.0f;
367 for (i = 0; i <
size; i += 4) {
369 int *in_int = (
int *)&in[i];
371 unsigned int v_codes;
373 const float *vec1, *vec2;
382 ".set noreorder \n\t"
384 "ori %[t4], $zero, 4 \n\t"
385 "slt %[t0], %[t4], %[qc1] \n\t"
386 "slt %[t1], %[t4], %[qc2] \n\t"
387 "slt %[t2], %[t4], %[qc3] \n\t"
388 "slt %[t3], %[t4], %[qc4] \n\t"
389 "movn %[qc1], %[t4], %[t0] \n\t"
390 "movn %[qc2], %[t4], %[t1] \n\t"
391 "movn %[qc3], %[t4], %[t2] \n\t"
392 "movn %[qc4], %[t4], %[t3] \n\t"
393 "lw %[t0], 0(%[in_int]) \n\t"
394 "lw %[t1], 4(%[in_int]) \n\t"
395 "lw %[t2], 8(%[in_int]) \n\t"
396 "lw %[t3], 12(%[in_int]) \n\t"
397 "srl %[t0], %[t0], 31 \n\t"
398 "srl %[t1], %[t1], 31 \n\t"
399 "srl %[t2], %[t2], 31 \n\t"
400 "srl %[t3], %[t3], 31 \n\t"
401 "subu %[t4], $zero, %[qc1] \n\t"
402 "subu %[t5], $zero, %[qc2] \n\t"
403 "subu %[t6], $zero, %[qc3] \n\t"
404 "subu %[t7], $zero, %[qc4] \n\t"
405 "movn %[qc1], %[t4], %[t0] \n\t"
406 "movn %[qc2], %[t5], %[t1] \n\t"
407 "movn %[qc3], %[t6], %[t2] \n\t"
408 "movn %[qc4], %[t7], %[t3] \n\t"
412 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
413 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
414 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
415 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
416 : [in_int]
"r"(in_int)
426 v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
427 v_bits = p_bits[curidx] + p_bits[curidx2];
432 vec1 = &p_vec[curidx*2 ];
433 vec2 = &p_vec[curidx2*2];
445 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
452 static void quantize_and_encode_band_cost_UPAIR7_mips(
struct AACEncContext *s,
454 const float *scaled,
int size,
int scale_idx,
455 int cb,
const float lambda,
const float uplim,
456 int *bits,
float *energy,
const float ROUNDING)
461 int qc1, qc2, qc3, qc4;
462 float qenergy = 0.0f;
470 for (i = 0; i <
size; i += 4) {
471 int curidx1, curidx2, sign1, count1, sign2, count2;
472 int *in_int = (
int *)&in[i];
474 unsigned int v_codes;
476 const float *vec1, *vec2;
485 ".set noreorder \n\t"
487 "ori %[t4], $zero, 7 \n\t"
488 "ori %[sign1], $zero, 0 \n\t"
489 "ori %[sign2], $zero, 0 \n\t"
490 "slt %[t0], %[t4], %[qc1] \n\t"
491 "slt %[t1], %[t4], %[qc2] \n\t"
492 "slt %[t2], %[t4], %[qc3] \n\t"
493 "slt %[t3], %[t4], %[qc4] \n\t"
494 "movn %[qc1], %[t4], %[t0] \n\t"
495 "movn %[qc2], %[t4], %[t1] \n\t"
496 "movn %[qc3], %[t4], %[t2] \n\t"
497 "movn %[qc4], %[t4], %[t3] \n\t"
498 "lw %[t0], 0(%[in_int]) \n\t"
499 "lw %[t1], 4(%[in_int]) \n\t"
500 "lw %[t2], 8(%[in_int]) \n\t"
501 "lw %[t3], 12(%[in_int]) \n\t"
502 "slt %[t0], %[t0], $zero \n\t"
503 "movn %[sign1], %[t0], %[qc1] \n\t"
504 "slt %[t2], %[t2], $zero \n\t"
505 "movn %[sign2], %[t2], %[qc3] \n\t"
506 "slt %[t1], %[t1], $zero \n\t"
507 "sll %[t0], %[sign1], 1 \n\t"
508 "or %[t0], %[t0], %[t1] \n\t"
509 "movn %[sign1], %[t0], %[qc2] \n\t"
510 "slt %[t3], %[t3], $zero \n\t"
511 "sll %[t0], %[sign2], 1 \n\t"
512 "or %[t0], %[t0], %[t3] \n\t"
513 "movn %[sign2], %[t0], %[qc4] \n\t"
514 "slt %[count1], $zero, %[qc1] \n\t"
515 "slt %[t1], $zero, %[qc2] \n\t"
516 "slt %[count2], $zero, %[qc3] \n\t"
517 "slt %[t2], $zero, %[qc4] \n\t"
518 "addu %[count1], %[count1], %[t1] \n\t"
519 "addu %[count2], %[count2], %[t2] \n\t"
523 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
524 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
525 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
526 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
527 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
529 : [in_int]
"r"(in_int)
530 :
"t0",
"t1",
"t2",
"t3",
"t4",
537 v_codes = (p_codes[curidx1] << count1) | sign1;
538 v_bits = p_bits[curidx1] + count1;
544 v_codes = (p_codes[curidx2] << count2) | sign2;
545 v_bits = p_bits[curidx2] + count2;
550 vec1 = &p_vec[curidx1*2];
551 vec2 = &p_vec[curidx2*2];
552 e1 = copysignf(vec1[0] * IQ, in[i+0]);
553 e2 = copysignf(vec1[1] * IQ, in[i+1]);
554 e3 = copysignf(vec2[0] * IQ, in[i+2]);
555 e4 = copysignf(vec2[1] * IQ, in[i+3]);
563 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
570 static void quantize_and_encode_band_cost_UPAIR12_mips(
struct AACEncContext *s,
572 const float *scaled,
int size,
int scale_idx,
573 int cb,
const float lambda,
const float uplim,
574 int *bits,
float *energy,
const float ROUNDING)
579 int qc1, qc2, qc3, qc4;
580 float qenergy = 0.0f;
588 for (i = 0; i <
size; i += 4) {
589 int curidx1, curidx2, sign1, count1, sign2, count2;
590 int *in_int = (
int *)&in[i];
592 unsigned int v_codes;
594 const float *vec1, *vec2;
603 ".set noreorder \n\t"
605 "ori %[t4], $zero, 12 \n\t"
606 "ori %[sign1], $zero, 0 \n\t"
607 "ori %[sign2], $zero, 0 \n\t"
608 "slt %[t0], %[t4], %[qc1] \n\t"
609 "slt %[t1], %[t4], %[qc2] \n\t"
610 "slt %[t2], %[t4], %[qc3] \n\t"
611 "slt %[t3], %[t4], %[qc4] \n\t"
612 "movn %[qc1], %[t4], %[t0] \n\t"
613 "movn %[qc2], %[t4], %[t1] \n\t"
614 "movn %[qc3], %[t4], %[t2] \n\t"
615 "movn %[qc4], %[t4], %[t3] \n\t"
616 "lw %[t0], 0(%[in_int]) \n\t"
617 "lw %[t1], 4(%[in_int]) \n\t"
618 "lw %[t2], 8(%[in_int]) \n\t"
619 "lw %[t3], 12(%[in_int]) \n\t"
620 "slt %[t0], %[t0], $zero \n\t"
621 "movn %[sign1], %[t0], %[qc1] \n\t"
622 "slt %[t2], %[t2], $zero \n\t"
623 "movn %[sign2], %[t2], %[qc3] \n\t"
624 "slt %[t1], %[t1], $zero \n\t"
625 "sll %[t0], %[sign1], 1 \n\t"
626 "or %[t0], %[t0], %[t1] \n\t"
627 "movn %[sign1], %[t0], %[qc2] \n\t"
628 "slt %[t3], %[t3], $zero \n\t"
629 "sll %[t0], %[sign2], 1 \n\t"
630 "or %[t0], %[t0], %[t3] \n\t"
631 "movn %[sign2], %[t0], %[qc4] \n\t"
632 "slt %[count1], $zero, %[qc1] \n\t"
633 "slt %[t1], $zero, %[qc2] \n\t"
634 "slt %[count2], $zero, %[qc3] \n\t"
635 "slt %[t2], $zero, %[qc4] \n\t"
636 "addu %[count1], %[count1], %[t1] \n\t"
637 "addu %[count2], %[count2], %[t2] \n\t"
641 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
642 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
643 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
644 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
645 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
647 : [in_int]
"r"(in_int)
654 v_codes = (p_codes[curidx1] << count1) | sign1;
655 v_bits = p_bits[curidx1] + count1;
661 v_codes = (p_codes[curidx2] << count2) | sign2;
662 v_bits = p_bits[curidx2] + count2;
667 vec1 = &p_vec[curidx1*2];
668 vec2 = &p_vec[curidx2*2];
669 e1 = copysignf(vec1[0] * IQ, in[i+0]);
670 e2 = copysignf(vec1[1] * IQ, in[i+1]);
671 e3 = copysignf(vec2[0] * IQ, in[i+2]);
672 e4 = copysignf(vec2[1] * IQ, in[i+3]);
680 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
687 static void quantize_and_encode_band_cost_ESC_mips(
struct AACEncContext *s,
689 const float *scaled,
int size,
int scale_idx,
690 int cb,
const float lambda,
const float uplim,
691 int *bits,
float *energy,
const float ROUNDING)
696 int qc1, qc2, qc3, qc4;
697 float qenergy = 0.0f;
707 for (i = 0; i <
size; i += 4) {
708 int curidx, curidx2, sign1, count1, sign2, count2;
709 int *in_int = (
int *)&in[i];
711 unsigned int v_codes;
713 const float *vec1, *vec2;
715 qc1 = scaled[i ] * Q34 + ROUNDING;
716 qc2 = scaled[i+1] * Q34 + ROUNDING;
717 qc3 = scaled[i+2] * Q34 + ROUNDING;
718 qc4 = scaled[i+3] * Q34 + ROUNDING;
722 ".set noreorder \n\t"
724 "ori %[t4], $zero, 16 \n\t"
725 "ori %[sign1], $zero, 0 \n\t"
726 "ori %[sign2], $zero, 0 \n\t"
727 "slt %[t0], %[t4], %[qc1] \n\t"
728 "slt %[t1], %[t4], %[qc2] \n\t"
729 "slt %[t2], %[t4], %[qc3] \n\t"
730 "slt %[t3], %[t4], %[qc4] \n\t"
731 "movn %[qc1], %[t4], %[t0] \n\t"
732 "movn %[qc2], %[t4], %[t1] \n\t"
733 "movn %[qc3], %[t4], %[t2] \n\t"
734 "movn %[qc4], %[t4], %[t3] \n\t"
735 "lw %[t0], 0(%[in_int]) \n\t"
736 "lw %[t1], 4(%[in_int]) \n\t"
737 "lw %[t2], 8(%[in_int]) \n\t"
738 "lw %[t3], 12(%[in_int]) \n\t"
739 "slt %[t0], %[t0], $zero \n\t"
740 "movn %[sign1], %[t0], %[qc1] \n\t"
741 "slt %[t2], %[t2], $zero \n\t"
742 "movn %[sign2], %[t2], %[qc3] \n\t"
743 "slt %[t1], %[t1], $zero \n\t"
744 "sll %[t0], %[sign1], 1 \n\t"
745 "or %[t0], %[t0], %[t1] \n\t"
746 "movn %[sign1], %[t0], %[qc2] \n\t"
747 "slt %[t3], %[t3], $zero \n\t"
748 "sll %[t0], %[sign2], 1 \n\t"
749 "or %[t0], %[t0], %[t3] \n\t"
750 "movn %[sign2], %[t0], %[qc4] \n\t"
751 "slt %[count1], $zero, %[qc1] \n\t"
752 "slt %[t1], $zero, %[qc2] \n\t"
753 "slt %[count2], $zero, %[qc3] \n\t"
754 "slt %[t2], $zero, %[qc4] \n\t"
755 "addu %[count1], %[count1], %[t1] \n\t"
756 "addu %[count2], %[count2], %[t2] \n\t"
760 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
761 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
762 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
763 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
764 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
766 : [in_int]
"r"(in_int)
775 v_codes = (p_codes[curidx] << count1) | sign1;
776 v_bits = p_bits[curidx] + count1;
779 v_codes = (p_codes[curidx2] << count2) | sign2;
780 v_bits = p_bits[curidx2] + count2;
785 vec1 = &p_vectors[curidx*2 ];
786 vec2 = &p_vectors[curidx2*2];
787 e1 = copysignf(vec1[0] * IQ, in[i+0]);
788 e2 = copysignf(vec1[1] * IQ, in[i+1]);
789 e3 = copysignf(vec2[0] * IQ, in[i+2]);
790 e4 = copysignf(vec2[1] * IQ, in[i+3]);
798 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
802 for (i = 0; i <
size; i += 4) {
803 int curidx, curidx2, sign1, count1, sign2, count2;
804 int *in_int = (
int *)&in[i];
806 unsigned int v_codes;
810 qc1 = scaled[i ] * Q34 + ROUNDING;
811 qc2 = scaled[i+1] * Q34 + ROUNDING;
812 qc3 = scaled[i+2] * Q34 + ROUNDING;
813 qc4 = scaled[i+3] * Q34 + ROUNDING;
817 ".set noreorder \n\t"
819 "ori %[t4], $zero, 16 \n\t"
820 "ori %[sign1], $zero, 0 \n\t"
821 "ori %[sign2], $zero, 0 \n\t"
822 "shll_s.w %[c1], %[qc1], 18 \n\t"
823 "shll_s.w %[c2], %[qc2], 18 \n\t"
824 "shll_s.w %[c3], %[qc3], 18 \n\t"
825 "shll_s.w %[c4], %[qc4], 18 \n\t"
826 "srl %[c1], %[c1], 18 \n\t"
827 "srl %[c2], %[c2], 18 \n\t"
828 "srl %[c3], %[c3], 18 \n\t"
829 "srl %[c4], %[c4], 18 \n\t"
830 "slt %[t0], %[t4], %[qc1] \n\t"
831 "slt %[t1], %[t4], %[qc2] \n\t"
832 "slt %[t2], %[t4], %[qc3] \n\t"
833 "slt %[t3], %[t4], %[qc4] \n\t"
834 "movn %[qc1], %[t4], %[t0] \n\t"
835 "movn %[qc2], %[t4], %[t1] \n\t"
836 "movn %[qc3], %[t4], %[t2] \n\t"
837 "movn %[qc4], %[t4], %[t3] \n\t"
838 "lw %[t0], 0(%[in_int]) \n\t"
839 "lw %[t1], 4(%[in_int]) \n\t"
840 "lw %[t2], 8(%[in_int]) \n\t"
841 "lw %[t3], 12(%[in_int]) \n\t"
842 "slt %[t0], %[t0], $zero \n\t"
843 "movn %[sign1], %[t0], %[qc1] \n\t"
844 "slt %[t2], %[t2], $zero \n\t"
845 "movn %[sign2], %[t2], %[qc3] \n\t"
846 "slt %[t1], %[t1], $zero \n\t"
847 "sll %[t0], %[sign1], 1 \n\t"
848 "or %[t0], %[t0], %[t1] \n\t"
849 "movn %[sign1], %[t0], %[qc2] \n\t"
850 "slt %[t3], %[t3], $zero \n\t"
851 "sll %[t0], %[sign2], 1 \n\t"
852 "or %[t0], %[t0], %[t3] \n\t"
853 "movn %[sign2], %[t0], %[qc4] \n\t"
854 "slt %[count1], $zero, %[qc1] \n\t"
855 "slt %[t1], $zero, %[qc2] \n\t"
856 "slt %[count2], $zero, %[qc3] \n\t"
857 "slt %[t2], $zero, %[qc4] \n\t"
858 "addu %[count1], %[count1], %[t1] \n\t"
859 "addu %[count2], %[count2], %[t2] \n\t"
863 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
864 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
865 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
866 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
867 [
c1]
"=&r"(
c1), [c2]
"=&r"(c2),
868 [c3]
"=&r"(c3), [c4]
"=&r"(c4),
869 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
871 : [in_int]
"r"(in_int)
881 v_codes = (p_codes[curidx] << count1) | sign1;
882 v_bits = p_bits[curidx] + count1;
885 if (p_vectors[curidx*2 ] == 64.0f) {
887 v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 <<
len) - 1));
890 if (p_vectors[curidx*2+1] == 64.0f) {
892 v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 <<
len) - 1));
896 v_codes = (p_codes[curidx2] << count2) | sign2;
897 v_bits = p_bits[curidx2] + count2;
900 if (p_vectors[curidx2*2 ] == 64.0f) {
902 v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 <<
len) - 1));
905 if (p_vectors[curidx2*2+1] == 64.0f) {
907 v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 <<
len) - 1));
912 float e1, e2, e3, e4;
913 e1 = copysignf(c1 *
cbrtf(c1) * IQ, in[i+0]);
914 e2 = copysignf(c2 *
cbrtf(c2) * IQ, in[i+1]);
915 e3 = copysignf(c3 *
cbrtf(c3) * IQ, in[i+2]);
916 e4 = copysignf(c4 *
cbrtf(c4) * IQ, in[i+3]);
924 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
932 static void quantize_and_encode_band_cost_NONE_mips(
struct AACEncContext *s,
934 const float *scaled,
int size,
int scale_idx,
935 int cb,
const float lambda,
const float uplim,
936 int *bits,
float *energy,
const float ROUNDING) {
940 static void quantize_and_encode_band_cost_ZERO_mips(
struct AACEncContext *s,
942 const float *scaled,
int size,
int scale_idx,
943 int cb,
const float lambda,
const float uplim,
944 int *bits,
float *energy,
const float ROUNDING) {
949 for (i = 0; i <
size; i += 4) {
962 const float *scaled,
int size,
int scale_idx,
963 int cb,
const float lambda,
const float uplim,
964 int *bits,
float *energy,
const float ROUNDING) = {
965 quantize_and_encode_band_cost_ZERO_mips,
966 quantize_and_encode_band_cost_SQUAD_mips,
967 quantize_and_encode_band_cost_SQUAD_mips,
968 quantize_and_encode_band_cost_UQUAD_mips,
969 quantize_and_encode_band_cost_UQUAD_mips,
970 quantize_and_encode_band_cost_SPAIR_mips,
971 quantize_and_encode_band_cost_SPAIR_mips,
972 quantize_and_encode_band_cost_UPAIR7_mips,
973 quantize_and_encode_band_cost_UPAIR7_mips,
974 quantize_and_encode_band_cost_UPAIR12_mips,
975 quantize_and_encode_band_cost_UPAIR12_mips,
976 quantize_and_encode_band_cost_ESC_mips,
977 quantize_and_encode_band_cost_NONE_mips,
978 quantize_and_encode_band_cost_ZERO_mips,
979 quantize_and_encode_band_cost_ZERO_mips,
980 quantize_and_encode_band_cost_ZERO_mips,
983 #define quantize_and_encode_band_cost( \
984 s, pb, in, out, scaled, size, scale_idx, cb, \
985 lambda, uplim, bits, energy, ROUNDING) \
986 quantize_and_encode_band_cost_arr[cb]( \
987 s, pb, in, out, scaled, size, scale_idx, cb, \
988 lambda, uplim, bits, energy, ROUNDING)
991 const float *in,
float *out,
int size,
int scale_idx,
992 int cb,
const float lambda,
int rtz)
1001 static float get_band_numbits_ZERO_mips(
struct AACEncContext *s,
1003 const float *scaled,
int size,
int scale_idx,
1004 int cb,
const float lambda,
const float uplim,
1010 static float get_band_numbits_NONE_mips(
struct AACEncContext *s,
1012 const float *scaled,
int size,
int scale_idx,
1013 int cb,
const float lambda,
const float uplim,
1020 static float get_band_numbits_SQUAD_mips(
struct AACEncContext *s,
1022 const float *scaled,
int size,
int scale_idx,
1023 int cb,
const float lambda,
const float uplim,
1028 int qc1, qc2, qc3, qc4;
1033 for (i = 0; i <
size; i += 4) {
1035 int *in_int = (
int *)&in[i];
1045 ".set noreorder \n\t"
1047 "slt %[qc1], $zero, %[qc1] \n\t"
1048 "slt %[qc2], $zero, %[qc2] \n\t"
1049 "slt %[qc3], $zero, %[qc3] \n\t"
1050 "slt %[qc4], $zero, %[qc4] \n\t"
1051 "lw %[t0], 0(%[in_int]) \n\t"
1052 "lw %[t1], 4(%[in_int]) \n\t"
1053 "lw %[t2], 8(%[in_int]) \n\t"
1054 "lw %[t3], 12(%[in_int]) \n\t"
1055 "srl %[t0], %[t0], 31 \n\t"
1056 "srl %[t1], %[t1], 31 \n\t"
1057 "srl %[t2], %[t2], 31 \n\t"
1058 "srl %[t3], %[t3], 31 \n\t"
1059 "subu %[t4], $zero, %[qc1] \n\t"
1060 "subu %[t5], $zero, %[qc2] \n\t"
1061 "subu %[t6], $zero, %[qc3] \n\t"
1062 "subu %[t7], $zero, %[qc4] \n\t"
1063 "movn %[qc1], %[t4], %[t0] \n\t"
1064 "movn %[qc2], %[t5], %[t1] \n\t"
1065 "movn %[qc3], %[t6], %[t2] \n\t"
1066 "movn %[qc4], %[t7], %[t3] \n\t"
1070 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1071 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1072 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1073 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
1074 : [in_int]
"r"(in_int)
1087 curbits += p_bits[curidx];
1092 static float get_band_numbits_UQUAD_mips(
struct AACEncContext *s,
1094 const float *scaled,
int size,
int scale_idx,
1095 int cb,
const float lambda,
const float uplim,
1101 int qc1, qc2, qc3, qc4;
1105 for (i = 0; i <
size; i += 4) {
1116 ".set noreorder \n\t"
1118 "ori %[t4], $zero, 2 \n\t"
1119 "slt %[t0], %[t4], %[qc1] \n\t"
1120 "slt %[t1], %[t4], %[qc2] \n\t"
1121 "slt %[t2], %[t4], %[qc3] \n\t"
1122 "slt %[t3], %[t4], %[qc4] \n\t"
1123 "movn %[qc1], %[t4], %[t0] \n\t"
1124 "movn %[qc2], %[t4], %[t1] \n\t"
1125 "movn %[qc3], %[t4], %[t2] \n\t"
1126 "movn %[qc4], %[t4], %[t3] \n\t"
1130 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1131 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1132 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1144 curbits += p_bits[curidx];
1145 curbits += uquad_sign_bits[curidx];
1150 static float get_band_numbits_SPAIR_mips(
struct AACEncContext *s,
1152 const float *scaled,
int size,
int scale_idx,
1153 int cb,
const float lambda,
const float uplim,
1158 int qc1, qc2, qc3, qc4;
1163 for (i = 0; i <
size; i += 4) {
1164 int curidx, curidx2;
1165 int *in_int = (
int *)&in[i];
1175 ".set noreorder \n\t"
1177 "ori %[t4], $zero, 4 \n\t"
1178 "slt %[t0], %[t4], %[qc1] \n\t"
1179 "slt %[t1], %[t4], %[qc2] \n\t"
1180 "slt %[t2], %[t4], %[qc3] \n\t"
1181 "slt %[t3], %[t4], %[qc4] \n\t"
1182 "movn %[qc1], %[t4], %[t0] \n\t"
1183 "movn %[qc2], %[t4], %[t1] \n\t"
1184 "movn %[qc3], %[t4], %[t2] \n\t"
1185 "movn %[qc4], %[t4], %[t3] \n\t"
1186 "lw %[t0], 0(%[in_int]) \n\t"
1187 "lw %[t1], 4(%[in_int]) \n\t"
1188 "lw %[t2], 8(%[in_int]) \n\t"
1189 "lw %[t3], 12(%[in_int]) \n\t"
1190 "srl %[t0], %[t0], 31 \n\t"
1191 "srl %[t1], %[t1], 31 \n\t"
1192 "srl %[t2], %[t2], 31 \n\t"
1193 "srl %[t3], %[t3], 31 \n\t"
1194 "subu %[t4], $zero, %[qc1] \n\t"
1195 "subu %[t5], $zero, %[qc2] \n\t"
1196 "subu %[t6], $zero, %[qc3] \n\t"
1197 "subu %[t7], $zero, %[qc4] \n\t"
1198 "movn %[qc1], %[t4], %[t0] \n\t"
1199 "movn %[qc2], %[t5], %[t1] \n\t"
1200 "movn %[qc3], %[t6], %[t2] \n\t"
1201 "movn %[qc4], %[t7], %[t3] \n\t"
1205 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1206 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1207 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1208 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
1209 : [in_int]
"r"(in_int)
1217 curidx2 += qc4 + 40;
1219 curbits += p_bits[curidx] + p_bits[curidx2];
1224 static float get_band_numbits_UPAIR7_mips(
struct AACEncContext *s,
1226 const float *scaled,
int size,
int scale_idx,
1227 int cb,
const float lambda,
const float uplim,
1232 int qc1, qc2, qc3, qc4;
1237 for (i = 0; i <
size; i += 4) {
1238 int curidx, curidx2;
1248 ".set noreorder \n\t"
1250 "ori %[t4], $zero, 7 \n\t"
1251 "slt %[t0], %[t4], %[qc1] \n\t"
1252 "slt %[t1], %[t4], %[qc2] \n\t"
1253 "slt %[t2], %[t4], %[qc3] \n\t"
1254 "slt %[t3], %[t4], %[qc4] \n\t"
1255 "movn %[qc1], %[t4], %[t0] \n\t"
1256 "movn %[qc2], %[t4], %[t1] \n\t"
1257 "movn %[qc3], %[t4], %[t2] \n\t"
1258 "movn %[qc4], %[t4], %[t3] \n\t"
1262 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1263 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1264 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1274 curbits += p_bits[curidx] +
1275 upair7_sign_bits[curidx] +
1277 upair7_sign_bits[curidx2];
1282 static float get_band_numbits_UPAIR12_mips(
struct AACEncContext *s,
1284 const float *scaled,
int size,
int scale_idx,
1285 int cb,
const float lambda,
const float uplim,
1290 int qc1, qc2, qc3, qc4;
1295 for (i = 0; i <
size; i += 4) {
1296 int curidx, curidx2;
1306 ".set noreorder \n\t"
1308 "ori %[t4], $zero, 12 \n\t"
1309 "slt %[t0], %[t4], %[qc1] \n\t"
1310 "slt %[t1], %[t4], %[qc2] \n\t"
1311 "slt %[t2], %[t4], %[qc3] \n\t"
1312 "slt %[t3], %[t4], %[qc4] \n\t"
1313 "movn %[qc1], %[t4], %[t0] \n\t"
1314 "movn %[qc2], %[t4], %[t1] \n\t"
1315 "movn %[qc3], %[t4], %[t2] \n\t"
1316 "movn %[qc4], %[t4], %[t3] \n\t"
1320 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1321 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1322 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1332 curbits += p_bits[curidx] +
1334 upair12_sign_bits[curidx] +
1335 upair12_sign_bits[curidx2];
1340 static float get_band_numbits_ESC_mips(
struct AACEncContext *s,
1342 const float *scaled,
int size,
int scale_idx,
1343 int cb,
const float lambda,
const float uplim,
1348 int qc1, qc2, qc3, qc4;
1353 for (i = 0; i <
size; i += 4) {
1354 int curidx, curidx2;
1355 int cond0, cond1, cond2, cond3;
1366 ".set noreorder \n\t"
1368 "ori %[t4], $zero, 15 \n\t"
1369 "ori %[t5], $zero, 16 \n\t"
1370 "shll_s.w %[c1], %[qc1], 18 \n\t"
1371 "shll_s.w %[c2], %[qc2], 18 \n\t"
1372 "shll_s.w %[c3], %[qc3], 18 \n\t"
1373 "shll_s.w %[c4], %[qc4], 18 \n\t"
1374 "srl %[c1], %[c1], 18 \n\t"
1375 "srl %[c2], %[c2], 18 \n\t"
1376 "srl %[c3], %[c3], 18 \n\t"
1377 "srl %[c4], %[c4], 18 \n\t"
1378 "slt %[cond0], %[t4], %[qc1] \n\t"
1379 "slt %[cond1], %[t4], %[qc2] \n\t"
1380 "slt %[cond2], %[t4], %[qc3] \n\t"
1381 "slt %[cond3], %[t4], %[qc4] \n\t"
1382 "movn %[qc1], %[t5], %[cond0] \n\t"
1383 "movn %[qc2], %[t5], %[cond1] \n\t"
1384 "movn %[qc3], %[t5], %[cond2] \n\t"
1385 "movn %[qc4], %[t5], %[cond3] \n\t"
1386 "ori %[t5], $zero, 31 \n\t"
1387 "clz %[c1], %[c1] \n\t"
1388 "clz %[c2], %[c2] \n\t"
1389 "clz %[c3], %[c3] \n\t"
1390 "clz %[c4], %[c4] \n\t"
1391 "subu %[c1], %[t5], %[c1] \n\t"
1392 "subu %[c2], %[t5], %[c2] \n\t"
1393 "subu %[c3], %[t5], %[c3] \n\t"
1394 "subu %[c4], %[t5], %[c4] \n\t"
1395 "sll %[c1], %[c1], 1 \n\t"
1396 "sll %[c2], %[c2], 1 \n\t"
1397 "sll %[c3], %[c3], 1 \n\t"
1398 "sll %[c4], %[c4], 1 \n\t"
1399 "addiu %[c1], %[c1], -3 \n\t"
1400 "addiu %[c2], %[c2], -3 \n\t"
1401 "addiu %[c3], %[c3], -3 \n\t"
1402 "addiu %[c4], %[c4], -3 \n\t"
1403 "subu %[cond0], $zero, %[cond0] \n\t"
1404 "subu %[cond1], $zero, %[cond1] \n\t"
1405 "subu %[cond2], $zero, %[cond2] \n\t"
1406 "subu %[cond3], $zero, %[cond3] \n\t"
1407 "and %[c1], %[c1], %[cond0] \n\t"
1408 "and %[c2], %[c2], %[cond1] \n\t"
1409 "and %[c3], %[c3], %[cond2] \n\t"
1410 "and %[c4], %[c4], %[cond3] \n\t"
1414 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1415 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1416 [cond0]
"=&r"(cond0), [cond1]
"=&r"(cond1),
1417 [cond2]
"=&r"(cond2), [cond3]
"=&r"(cond3),
1418 [
c1]
"=&r"(
c1), [c2]
"=&r"(c2),
1419 [c3]
"=&r"(c3), [c4]
"=&r"(c4),
1420 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5)
1429 curbits += p_bits[curidx];
1430 curbits += esc_sign_bits[curidx];
1431 curbits += p_bits[curidx2];
1432 curbits += esc_sign_bits[curidx2];
1442 static float (*
const get_band_numbits_arr[])(
struct AACEncContext *
s,
1444 const float *scaled,
int size,
int scale_idx,
1445 int cb,
const float lambda,
const float uplim,
1447 get_band_numbits_ZERO_mips,
1448 get_band_numbits_SQUAD_mips,
1449 get_band_numbits_SQUAD_mips,
1450 get_band_numbits_UQUAD_mips,
1451 get_band_numbits_UQUAD_mips,
1452 get_band_numbits_SPAIR_mips,
1453 get_band_numbits_SPAIR_mips,
1454 get_band_numbits_UPAIR7_mips,
1455 get_band_numbits_UPAIR7_mips,
1456 get_band_numbits_UPAIR12_mips,
1457 get_band_numbits_UPAIR12_mips,
1458 get_band_numbits_ESC_mips,
1459 get_band_numbits_NONE_mips,
1460 get_band_numbits_ZERO_mips,
1461 get_band_numbits_ZERO_mips,
1462 get_band_numbits_ZERO_mips,
1465 #define get_band_numbits( \
1466 s, pb, in, scaled, size, scale_idx, cb, \
1467 lambda, uplim, bits) \
1468 get_band_numbits_arr[cb]( \
1469 s, pb, in, scaled, size, scale_idx, cb, \
1470 lambda, uplim, bits)
1473 const float *scaled,
int size,
int scale_idx,
1474 int cb,
const float lambda,
const float uplim,
1475 int *bits,
float *energy,
int rtz)
1477 return get_band_numbits(s,
NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
1484 static float get_band_cost_ZERO_mips(
struct AACEncContext *s,
1486 const float *scaled,
int size,
int scale_idx,
1487 int cb,
const float lambda,
const float uplim,
1488 int *bits,
float *energy)
1493 for (i = 0; i <
size; i += 4) {
1494 cost += in[i ] * in[i ];
1495 cost += in[i+1] * in[i+1];
1496 cost += in[i+2] * in[i+2];
1497 cost += in[i+3] * in[i+3];
1506 static float get_band_cost_NONE_mips(
struct AACEncContext *s,
1508 const float *scaled,
int size,
int scale_idx,
1509 int cb,
const float lambda,
const float uplim,
1510 int *bits,
float *energy)
1516 static float get_band_cost_SQUAD_mips(
struct AACEncContext *s,
1518 const float *scaled,
int size,
int scale_idx,
1519 int cb,
const float lambda,
const float uplim,
1520 int *bits,
float *energy)
1526 float qenergy = 0.0f;
1527 int qc1, qc2, qc3, qc4;
1533 for (i = 0; i <
size; i += 4) {
1536 int *in_int = (
int *)&in[i];
1537 float *in_pos = (
float *)&in[i];
1538 float di0, di1, di2, di3;
1548 ".set noreorder \n\t"
1550 "slt %[qc1], $zero, %[qc1] \n\t"
1551 "slt %[qc2], $zero, %[qc2] \n\t"
1552 "slt %[qc3], $zero, %[qc3] \n\t"
1553 "slt %[qc4], $zero, %[qc4] \n\t"
1554 "lw %[t0], 0(%[in_int]) \n\t"
1555 "lw %[t1], 4(%[in_int]) \n\t"
1556 "lw %[t2], 8(%[in_int]) \n\t"
1557 "lw %[t3], 12(%[in_int]) \n\t"
1558 "srl %[t0], %[t0], 31 \n\t"
1559 "srl %[t1], %[t1], 31 \n\t"
1560 "srl %[t2], %[t2], 31 \n\t"
1561 "srl %[t3], %[t3], 31 \n\t"
1562 "subu %[t4], $zero, %[qc1] \n\t"
1563 "subu %[t5], $zero, %[qc2] \n\t"
1564 "subu %[t6], $zero, %[qc3] \n\t"
1565 "subu %[t7], $zero, %[qc4] \n\t"
1566 "movn %[qc1], %[t4], %[t0] \n\t"
1567 "movn %[qc2], %[t5], %[t1] \n\t"
1568 "movn %[qc3], %[t6], %[t2] \n\t"
1569 "movn %[qc4], %[t7], %[t3] \n\t"
1573 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1574 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1575 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1576 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
1577 : [in_int]
"r"(in_int)
1590 curbits += p_bits[curidx];
1591 vec = &p_codes[curidx*4];
1593 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1594 + vec[2]*vec[2] + vec[3]*vec[3];
1598 ".set noreorder \n\t"
1600 "lwc1 $f0, 0(%[in_pos]) \n\t"
1601 "lwc1 $f1, 0(%[vec]) \n\t"
1602 "lwc1 $f2, 4(%[in_pos]) \n\t"
1603 "lwc1 $f3, 4(%[vec]) \n\t"
1604 "lwc1 $f4, 8(%[in_pos]) \n\t"
1605 "lwc1 $f5, 8(%[vec]) \n\t"
1606 "lwc1 $f6, 12(%[in_pos]) \n\t"
1607 "lwc1 $f7, 12(%[vec]) \n\t"
1608 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1609 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1610 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1611 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1615 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1616 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1617 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1619 :
"$f0",
"$f1",
"$f2",
"$f3",
1620 "$f4",
"$f5",
"$f6",
"$f7",
1624 cost += di0 * di0 + di1 * di1
1625 + di2 * di2 + di3 * di3;
1631 *energy = qenergy * (IQ*IQ);
1632 return cost * lambda + curbits;
1635 static float get_band_cost_UQUAD_mips(
struct AACEncContext *s,
1637 const float *scaled,
int size,
int scale_idx,
1638 int cb,
const float lambda,
const float uplim,
1639 int *bits,
float *energy)
1645 float qenergy = 0.0f;
1647 int qc1, qc2, qc3, qc4;
1652 for (i = 0; i <
size; i += 4) {
1655 float *in_pos = (
float *)&in[i];
1656 float di0, di1, di2, di3;
1666 ".set noreorder \n\t"
1668 "ori %[t4], $zero, 2 \n\t"
1669 "slt %[t0], %[t4], %[qc1] \n\t"
1670 "slt %[t1], %[t4], %[qc2] \n\t"
1671 "slt %[t2], %[t4], %[qc3] \n\t"
1672 "slt %[t3], %[t4], %[qc4] \n\t"
1673 "movn %[qc1], %[t4], %[t0] \n\t"
1674 "movn %[qc2], %[t4], %[t1] \n\t"
1675 "movn %[qc3], %[t4], %[t2] \n\t"
1676 "movn %[qc4], %[t4], %[t3] \n\t"
1680 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1681 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1682 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1694 curbits += p_bits[curidx];
1695 curbits += uquad_sign_bits[curidx];
1696 vec = &p_codes[curidx*4];
1698 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1699 + vec[2]*vec[2] + vec[3]*vec[3];
1703 ".set noreorder \n\t"
1705 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1706 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1707 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1708 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1709 "abs.s %[di0], %[di0] \n\t"
1710 "abs.s %[di1], %[di1] \n\t"
1711 "abs.s %[di2], %[di2] \n\t"
1712 "abs.s %[di3], %[di3] \n\t"
1713 "lwc1 $f0, 0(%[vec]) \n\t"
1714 "lwc1 $f1, 4(%[vec]) \n\t"
1715 "lwc1 $f2, 8(%[vec]) \n\t"
1716 "lwc1 $f3, 12(%[vec]) \n\t"
1717 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1718 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1719 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1720 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1724 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1725 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1726 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1728 :
"$f0",
"$f1",
"$f2",
"$f3",
1732 cost += di0 * di0 + di1 * di1
1733 + di2 * di2 + di3 * di3;
1739 *energy = qenergy * (IQ*IQ);
1740 return cost * lambda + curbits;
1743 static float get_band_cost_SPAIR_mips(
struct AACEncContext *s,
1745 const float *scaled,
int size,
int scale_idx,
1746 int cb,
const float lambda,
const float uplim,
1747 int *bits,
float *energy)
1753 float qenergy = 0.0f;
1754 int qc1, qc2, qc3, qc4;
1760 for (i = 0; i <
size; i += 4) {
1761 const float *vec, *vec2;
1762 int curidx, curidx2;
1763 int *in_int = (
int *)&in[i];
1764 float *in_pos = (
float *)&in[i];
1765 float di0, di1, di2, di3;
1775 ".set noreorder \n\t"
1777 "ori %[t4], $zero, 4 \n\t"
1778 "slt %[t0], %[t4], %[qc1] \n\t"
1779 "slt %[t1], %[t4], %[qc2] \n\t"
1780 "slt %[t2], %[t4], %[qc3] \n\t"
1781 "slt %[t3], %[t4], %[qc4] \n\t"
1782 "movn %[qc1], %[t4], %[t0] \n\t"
1783 "movn %[qc2], %[t4], %[t1] \n\t"
1784 "movn %[qc3], %[t4], %[t2] \n\t"
1785 "movn %[qc4], %[t4], %[t3] \n\t"
1786 "lw %[t0], 0(%[in_int]) \n\t"
1787 "lw %[t1], 4(%[in_int]) \n\t"
1788 "lw %[t2], 8(%[in_int]) \n\t"
1789 "lw %[t3], 12(%[in_int]) \n\t"
1790 "srl %[t0], %[t0], 31 \n\t"
1791 "srl %[t1], %[t1], 31 \n\t"
1792 "srl %[t2], %[t2], 31 \n\t"
1793 "srl %[t3], %[t3], 31 \n\t"
1794 "subu %[t4], $zero, %[qc1] \n\t"
1795 "subu %[t5], $zero, %[qc2] \n\t"
1796 "subu %[t6], $zero, %[qc3] \n\t"
1797 "subu %[t7], $zero, %[qc4] \n\t"
1798 "movn %[qc1], %[t4], %[t0] \n\t"
1799 "movn %[qc2], %[t5], %[t1] \n\t"
1800 "movn %[qc3], %[t6], %[t2] \n\t"
1801 "movn %[qc4], %[t7], %[t3] \n\t"
1805 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1806 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1807 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1808 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
1809 : [in_int]
"r"(in_int)
1817 curidx2 += qc4 + 40;
1819 curbits += p_bits[curidx];
1820 curbits += p_bits[curidx2];
1822 vec = &p_codes[curidx*2];
1823 vec2 = &p_codes[curidx2*2];
1825 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1826 + vec2[0]*vec2[0] + vec2[1]*vec2[1];
1830 ".set noreorder \n\t"
1832 "lwc1 $f0, 0(%[in_pos]) \n\t"
1833 "lwc1 $f1, 0(%[vec]) \n\t"
1834 "lwc1 $f2, 4(%[in_pos]) \n\t"
1835 "lwc1 $f3, 4(%[vec]) \n\t"
1836 "lwc1 $f4, 8(%[in_pos]) \n\t"
1837 "lwc1 $f5, 0(%[vec2]) \n\t"
1838 "lwc1 $f6, 12(%[in_pos]) \n\t"
1839 "lwc1 $f7, 4(%[vec2]) \n\t"
1840 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1841 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1842 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1843 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1847 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1848 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1849 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1850 [vec2]
"r"(vec2), [IQ]
"f"(IQ)
1851 :
"$f0",
"$f1",
"$f2",
"$f3",
1852 "$f4",
"$f5",
"$f6",
"$f7",
1856 cost += di0 * di0 + di1 * di1
1857 + di2 * di2 + di3 * di3;
1863 *energy = qenergy * (IQ*IQ);
1864 return cost * lambda + curbits;
1867 static float get_band_cost_UPAIR7_mips(
struct AACEncContext *s,
1869 const float *scaled,
int size,
int scale_idx,
1870 int cb,
const float lambda,
const float uplim,
1871 int *bits,
float *energy)
1877 float qenergy = 0.0f;
1878 int qc1, qc2, qc3, qc4;
1884 for (i = 0; i <
size; i += 4) {
1885 const float *vec, *vec2;
1886 int curidx, curidx2, sign1, count1, sign2, count2;
1887 int *in_int = (
int *)&in[i];
1888 float *in_pos = (
float *)&in[i];
1889 float di0, di1, di2, di3;
1899 ".set noreorder \n\t"
1901 "ori %[t4], $zero, 7 \n\t"
1902 "ori %[sign1], $zero, 0 \n\t"
1903 "ori %[sign2], $zero, 0 \n\t"
1904 "slt %[t0], %[t4], %[qc1] \n\t"
1905 "slt %[t1], %[t4], %[qc2] \n\t"
1906 "slt %[t2], %[t4], %[qc3] \n\t"
1907 "slt %[t3], %[t4], %[qc4] \n\t"
1908 "movn %[qc1], %[t4], %[t0] \n\t"
1909 "movn %[qc2], %[t4], %[t1] \n\t"
1910 "movn %[qc3], %[t4], %[t2] \n\t"
1911 "movn %[qc4], %[t4], %[t3] \n\t"
1912 "lw %[t0], 0(%[in_int]) \n\t"
1913 "lw %[t1], 4(%[in_int]) \n\t"
1914 "lw %[t2], 8(%[in_int]) \n\t"
1915 "lw %[t3], 12(%[in_int]) \n\t"
1916 "slt %[t0], %[t0], $zero \n\t"
1917 "movn %[sign1], %[t0], %[qc1] \n\t"
1918 "slt %[t2], %[t2], $zero \n\t"
1919 "movn %[sign2], %[t2], %[qc3] \n\t"
1920 "slt %[t1], %[t1], $zero \n\t"
1921 "sll %[t0], %[sign1], 1 \n\t"
1922 "or %[t0], %[t0], %[t1] \n\t"
1923 "movn %[sign1], %[t0], %[qc2] \n\t"
1924 "slt %[t3], %[t3], $zero \n\t"
1925 "sll %[t0], %[sign2], 1 \n\t"
1926 "or %[t0], %[t0], %[t3] \n\t"
1927 "movn %[sign2], %[t0], %[qc4] \n\t"
1928 "slt %[count1], $zero, %[qc1] \n\t"
1929 "slt %[t1], $zero, %[qc2] \n\t"
1930 "slt %[count2], $zero, %[qc3] \n\t"
1931 "slt %[t2], $zero, %[qc4] \n\t"
1932 "addu %[count1], %[count1], %[t1] \n\t"
1933 "addu %[count2], %[count2], %[t2] \n\t"
1937 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1938 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1939 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
1940 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
1941 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1943 : [in_int]
"r"(in_int)
1953 curbits += p_bits[curidx];
1954 curbits += upair7_sign_bits[curidx];
1955 vec = &p_codes[curidx*2];
1957 curbits += p_bits[curidx2];
1958 curbits += upair7_sign_bits[curidx2];
1959 vec2 = &p_codes[curidx2*2];
1961 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1962 + vec2[0]*vec2[0] + vec2[1]*vec2[1];
1966 ".set noreorder \n\t"
1968 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1969 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1970 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1971 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1972 "abs.s %[di0], %[di0] \n\t"
1973 "abs.s %[di1], %[di1] \n\t"
1974 "abs.s %[di2], %[di2] \n\t"
1975 "abs.s %[di3], %[di3] \n\t"
1976 "lwc1 $f0, 0(%[vec]) \n\t"
1977 "lwc1 $f1, 4(%[vec]) \n\t"
1978 "lwc1 $f2, 0(%[vec2]) \n\t"
1979 "lwc1 $f3, 4(%[vec2]) \n\t"
1980 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1981 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1982 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1983 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1987 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1988 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1989 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1990 [vec2]
"r"(vec2), [IQ]
"f"(IQ)
1991 :
"$f0",
"$f1",
"$f2",
"$f3",
1995 cost += di0 * di0 + di1 * di1
1996 + di2 * di2 + di3 * di3;
2002 *energy = qenergy * (IQ*IQ);
2003 return cost * lambda + curbits;
2006 static float get_band_cost_UPAIR12_mips(
struct AACEncContext *s,
2008 const float *scaled,
int size,
int scale_idx,
2009 int cb,
const float lambda,
const float uplim,
2010 int *bits,
float *energy)
2016 float qenergy = 0.0f;
2017 int qc1, qc2, qc3, qc4;
2023 for (i = 0; i <
size; i += 4) {
2024 const float *vec, *vec2;
2025 int curidx, curidx2;
2026 int sign1, count1, sign2, count2;
2027 int *in_int = (
int *)&in[i];
2028 float *in_pos = (
float *)&in[i];
2029 float di0, di1, di2, di3;
2039 ".set noreorder \n\t"
2041 "ori %[t4], $zero, 12 \n\t"
2042 "ori %[sign1], $zero, 0 \n\t"
2043 "ori %[sign2], $zero, 0 \n\t"
2044 "slt %[t0], %[t4], %[qc1] \n\t"
2045 "slt %[t1], %[t4], %[qc2] \n\t"
2046 "slt %[t2], %[t4], %[qc3] \n\t"
2047 "slt %[t3], %[t4], %[qc4] \n\t"
2048 "movn %[qc1], %[t4], %[t0] \n\t"
2049 "movn %[qc2], %[t4], %[t1] \n\t"
2050 "movn %[qc3], %[t4], %[t2] \n\t"
2051 "movn %[qc4], %[t4], %[t3] \n\t"
2052 "lw %[t0], 0(%[in_int]) \n\t"
2053 "lw %[t1], 4(%[in_int]) \n\t"
2054 "lw %[t2], 8(%[in_int]) \n\t"
2055 "lw %[t3], 12(%[in_int]) \n\t"
2056 "slt %[t0], %[t0], $zero \n\t"
2057 "movn %[sign1], %[t0], %[qc1] \n\t"
2058 "slt %[t2], %[t2], $zero \n\t"
2059 "movn %[sign2], %[t2], %[qc3] \n\t"
2060 "slt %[t1], %[t1], $zero \n\t"
2061 "sll %[t0], %[sign1], 1 \n\t"
2062 "or %[t0], %[t0], %[t1] \n\t"
2063 "movn %[sign1], %[t0], %[qc2] \n\t"
2064 "slt %[t3], %[t3], $zero \n\t"
2065 "sll %[t0], %[sign2], 1 \n\t"
2066 "or %[t0], %[t0], %[t3] \n\t"
2067 "movn %[sign2], %[t0], %[qc4] \n\t"
2068 "slt %[count1], $zero, %[qc1] \n\t"
2069 "slt %[t1], $zero, %[qc2] \n\t"
2070 "slt %[count2], $zero, %[qc3] \n\t"
2071 "slt %[t2], $zero, %[qc4] \n\t"
2072 "addu %[count1], %[count1], %[t1] \n\t"
2073 "addu %[count2], %[count2], %[t2] \n\t"
2077 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
2078 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
2079 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
2080 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
2081 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
2083 : [in_int]
"r"(in_int)
2093 curbits += p_bits[curidx];
2094 curbits += p_bits[curidx2];
2095 curbits += upair12_sign_bits[curidx];
2096 curbits += upair12_sign_bits[curidx2];
2097 vec = &p_codes[curidx*2];
2098 vec2 = &p_codes[curidx2*2];
2100 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
2101 + vec2[0]*vec2[0] + vec2[1]*vec2[1];
2105 ".set noreorder \n\t"
2107 "lwc1 %[di0], 0(%[in_pos]) \n\t"
2108 "lwc1 %[di1], 4(%[in_pos]) \n\t"
2109 "lwc1 %[di2], 8(%[in_pos]) \n\t"
2110 "lwc1 %[di3], 12(%[in_pos]) \n\t"
2111 "abs.s %[di0], %[di0] \n\t"
2112 "abs.s %[di1], %[di1] \n\t"
2113 "abs.s %[di2], %[di2] \n\t"
2114 "abs.s %[di3], %[di3] \n\t"
2115 "lwc1 $f0, 0(%[vec]) \n\t"
2116 "lwc1 $f1, 4(%[vec]) \n\t"
2117 "lwc1 $f2, 0(%[vec2]) \n\t"
2118 "lwc1 $f3, 4(%[vec2]) \n\t"
2119 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
2120 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
2121 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
2122 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
2126 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
2127 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
2128 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
2129 [vec2]
"r"(vec2), [IQ]
"f"(IQ)
2130 :
"$f0",
"$f1",
"$f2",
"$f3",
2134 cost += di0 * di0 + di1 * di1
2135 + di2 * di2 + di3 * di3;
2141 *energy = qenergy * (IQ*IQ);
2142 return cost * lambda + curbits;
2145 static float get_band_cost_ESC_mips(
struct AACEncContext *s,
2147 const float *scaled,
int size,
int scale_idx,
2148 int cb,
const float lambda,
const float uplim,
2149 int *bits,
float *energy)
2153 const float CLIPPED_ESCAPE = 165140.0f * IQ;
2156 float qenergy = 0.0f;
2157 int qc1, qc2, qc3, qc4;
2163 for (i = 0; i <
size; i += 4) {
2164 const float *vec, *vec2;
2165 int curidx, curidx2;
2167 float di1, di2, di3, di4;
2168 int cond0, cond1, cond2, cond3;
2179 ".set noreorder \n\t"
2181 "ori %[t6], $zero, 15 \n\t"
2182 "ori %[t7], $zero, 16 \n\t"
2183 "shll_s.w %[c1], %[qc1], 18 \n\t"
2184 "shll_s.w %[c2], %[qc2], 18 \n\t"
2185 "shll_s.w %[c3], %[qc3], 18 \n\t"
2186 "shll_s.w %[c4], %[qc4], 18 \n\t"
2187 "srl %[c1], %[c1], 18 \n\t"
2188 "srl %[c2], %[c2], 18 \n\t"
2189 "srl %[c3], %[c3], 18 \n\t"
2190 "srl %[c4], %[c4], 18 \n\t"
2191 "slt %[cond0], %[t6], %[qc1] \n\t"
2192 "slt %[cond1], %[t6], %[qc2] \n\t"
2193 "slt %[cond2], %[t6], %[qc3] \n\t"
2194 "slt %[cond3], %[t6], %[qc4] \n\t"
2195 "movn %[qc1], %[t7], %[cond0] \n\t"
2196 "movn %[qc2], %[t7], %[cond1] \n\t"
2197 "movn %[qc3], %[t7], %[cond2] \n\t"
2198 "movn %[qc4], %[t7], %[cond3] \n\t"
2202 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
2203 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
2204 [cond0]
"=&r"(cond0), [cond1]
"=&r"(cond1),
2205 [cond2]
"=&r"(cond2), [cond3]
"=&r"(cond3),
2206 [
c1]
"=&r"(
c1), [c2]
"=&r"(c2),
2207 [c3]
"=&r"(c3), [c4]
"=&r"(c4),
2208 [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
2217 curbits += p_bits[curidx];
2218 curbits += esc_sign_bits[curidx];
2219 vec = &p_codes[curidx*2];
2221 curbits += p_bits[curidx2];
2222 curbits += esc_sign_bits[curidx2];
2223 vec2 = &p_codes[curidx2*2];
2225 curbits += (
av_log2(c1) * 2 - 3) & (-cond0);
2226 curbits += (
av_log2(c2) * 2 - 3) & (-cond1);
2227 curbits += (
av_log2(c3) * 2 - 3) & (-cond2);
2228 curbits += (
av_log2(c4) * 2 - 3) & (-cond3);
2231 t2 = fabsf(in[i+1]);
2232 t3 = fabsf(in[i+2]);
2233 t4 = fabsf(in[i+3]);
2236 if (t1 >= CLIPPED_ESCAPE) {
2237 di1 = t1 - CLIPPED_ESCAPE;
2238 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2240 di1 = t1 - (V = c1 *
cbrtf(c1) * IQ);
2244 di1 = t1 - (V = vec[0] * IQ);
2249 if (t2 >= CLIPPED_ESCAPE) {
2250 di2 = t2 - CLIPPED_ESCAPE;
2251 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2253 di2 = t2 - (V = c2 *
cbrtf(c2) * IQ);
2257 di2 = t2 - (V = vec[1] * IQ);
2262 if (t3 >= CLIPPED_ESCAPE) {
2263 di3 = t3 - CLIPPED_ESCAPE;
2264 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2266 di3 = t3 - (V = c3 *
cbrtf(c3) * IQ);
2270 di3 = t3 - (V = vec2[0] * IQ);
2275 if (t4 >= CLIPPED_ESCAPE) {
2276 di4 = t4 - CLIPPED_ESCAPE;
2277 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2279 di4 = t4 - (V = c4 *
cbrtf(c4) * IQ);
2283 di4 = t4 - (V = vec2[1]*IQ);
2287 cost += di1 * di1 + di2 * di2
2288 + di3 * di3 + di4 * di4;
2293 return cost * lambda + curbits;
2296 static float (*
const get_band_cost_arr[])(
struct AACEncContext *
s,
2298 const float *scaled,
int size,
int scale_idx,
2299 int cb,
const float lambda,
const float uplim,
2300 int *bits,
float *energy) = {
2301 get_band_cost_ZERO_mips,
2302 get_band_cost_SQUAD_mips,
2303 get_band_cost_SQUAD_mips,
2304 get_band_cost_UQUAD_mips,
2305 get_band_cost_UQUAD_mips,
2306 get_band_cost_SPAIR_mips,
2307 get_band_cost_SPAIR_mips,
2308 get_band_cost_UPAIR7_mips,
2309 get_band_cost_UPAIR7_mips,
2310 get_band_cost_UPAIR12_mips,
2311 get_band_cost_UPAIR12_mips,
2312 get_band_cost_ESC_mips,
2313 get_band_cost_NONE_mips,
2314 get_band_cost_ZERO_mips,
2315 get_band_cost_ZERO_mips,
2316 get_band_cost_ZERO_mips,
2319 #define get_band_cost( \
2320 s, pb, in, scaled, size, scale_idx, cb, \
2321 lambda, uplim, bits, energy) \
2322 get_band_cost_arr[cb]( \
2323 s, pb, in, scaled, size, scale_idx, cb, \
2324 lambda, uplim, bits, energy)
2327 const float *scaled,
int size,
int scale_idx,
2328 int cb,
const float lambda,
const float uplim,
2329 int *bits,
float *energy,
int rtz)
2331 return get_band_cost(s,
NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits, energy);
2340 int start = 0, i,
w, w2,
g, sid_sf_boost, prev_mid, prev_side;
2341 uint8_t nextband0[128], nextband1[128];
2342 float M[128],
S[128];
2344 const float lambda = s->
lambda;
2345 const float mslambda =
FFMIN(1.0f, lambda / 120.f);
2355 prev_mid = sce0->
sf_idx[0];
2356 prev_side = sce1->
sf_idx[0];
2364 float Mmax = 0.0f, Smax = 0.0f;
2369 M[i] = (sce0->
coeffs[start+(w+w2)*128+i]
2370 + sce1->
coeffs[start+(w+w2)*128+i]) * 0.5;
2372 - sce1->
coeffs[start+(w+w2)*128+i];
2377 Mmax =
FFMAX(Mmax, M34[i]);
2378 Smax =
FFMAX(Smax, S34[i]);
2382 for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
2383 float dist1 = 0.0f, dist2 = 0.0f;
2403 midcb =
FFMAX(1,midcb);
2404 sidcb =
FFMAX(1,sidcb);
2412 M[i] = (sce0->
coeffs[start+(w+w2)*128+i]
2413 + sce1->
coeffs[start+(w+w2)*128+i]) * 0.5;
2415 - sce1->
coeffs[start+(w+w2)*128+i];
2454 sce0->
sf_idx[w*16+
g] = mididx;
2455 sce1->
sf_idx[w*16+
g] = sididx;
2463 }
else if (
B1 > B0) {
2470 prev_mid = sce0->
sf_idx[w*16+
g];
2472 prev_side = sce1->
sf_idx[w*16+
g];
2486 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
const AACCoefficientsEncoder * coder
Band types following are encoded differently from others.
AAC encoder trellis codebook selector.
static void abs_pow34_v(float *out, const float *in, const int size)
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
FFPsyBand psy_bands[PSY_MAX_BANDS]
channel bands information
#define SCALE_MAX_POS
scalefactor index maximum value
static av_always_inline float bval2bmax(float b)
approximates exp10f(-3.0f*(0.5f + 0.5f * cosf(FFMIN(b,15.5f) / 15.5f)))
int common_window
Set if channels share a common 'IndividualChannelStream' in bitstream.
int prev_idx
pointer to the previous path point
uint8_t ms_mask[128]
Set if mid/side stereo is used for each scalefactor window band.
Spectral data are scaled white noise not coded in the bitstream.
#define quantize_and_encode_band_cost(s, pb, in, quant, scaled, size, scale_idx, cb, lambda, uplim, bits, energy, rtz)
static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda)
#define av_assert0(cond)
assert() equivalent, that is always enabled.
static int ff_sfdelta_can_replace(const SingleChannelElement *sce, const uint8_t *nextband, int prev_sf, int new_sf, int band)
static double cb(void *priv, double x, double y)
AACEncOptions options
encoding options
SingleChannelElement ch[2]
void ff_aac_coder_init_mips(AACEncContext *c)
static float(*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy)
single band psychoacoustic information
int num_swb
number of scalefactor window bands
void(* search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s, SingleChannelElement *sce, const float lambda)
const float *const ff_aac_codebook_vectors[]
float ff_aac_pow2sf_tab[428]
#define SCALE_DIV_512
scalefactor difference that corresponds to scale difference in 512 times
float ff_aac_pow34sf_tab[428]
int cur_channel
current channel for coder context
const uint8_t *const ff_aac_spectral_bits[11]
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
static void ff_init_nextband_map(const SingleChannelElement *sce, uint8_t *nextband)
void(* quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, int size, int scale_idx, int cb, const float lambda, int rtz)
AAC definitions and structures.
AAC encoder twoloop coder.
static int quantize_band_cost_bits(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int rtz)
Libavcodec external API header.
void(* search_for_ms)(struct AACEncContext *s, ChannelElement *cpe)
static int find_min_book(float maxval, int sf)
IndividualChannelStream ics
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> in
void(* encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda)
static av_always_inline float cbrtf(float x)
structure used in optimal codebook search
Replacements for frequently missing libm functions.
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
uint8_t zeroes[128]
band is not coded (used by encoder)
int sf_idx[128]
scalefactor indices (used by encoder)
INTFLOAT coeffs[1024]
coefficients for IMDCT, maybe processed
#define SCALE_ONE_POS
scalefactor index that corresponds to scale=1.0
static void search_for_quantizers_twoloop(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, const float lambda)
two-loop quantizers search taken from ISO 13818-7 Appendix C
Single Channel Element - used for both SCE and LFE elements.
ChannelElement * cpe
channel elements
channel element - generic struct for SCE/CPE/CCE/LFE
const uint16_t *const ff_aac_spectral_codes[11]
FFPsyChannel * ch
single channel information
enum BandType band_type[128]
band types
AAC encoder quantization misc reusable function templates.
#define POW_SF2_ZERO
ff_aac_pow2sf_tab index corresponding to pow(2, 0);
static float quantize_band_cost(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int rtz)
uint8_t is_mask[128]
Set if intensity stereo is used (used by encoder)
float scoefs[1024]
scaled coefficients