FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
aaccoder_mips.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012
3  * MIPS Technologies, Inc., California.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14  * contributors may be used to endorse or promote products derived from
15  * this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * Author: Stanislav Ocovaj (socovaj@mips.com)
30  * Szabolcs Pal (sabolc@mips.com)
31  *
32  * AAC coefficients encoder optimized for MIPS floating-point architecture
33  *
34  * This file is part of FFmpeg.
35  *
36  * FFmpeg is free software; you can redistribute it and/or
37  * modify it under the terms of the GNU Lesser General Public
38  * License as published by the Free Software Foundation; either
39  * version 2.1 of the License, or (at your option) any later version.
40  *
41  * FFmpeg is distributed in the hope that it will be useful,
42  * but WITHOUT ANY WARRANTY; without even the implied warranty of
43  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
44  * Lesser General Public License for more details.
45  *
46  * You should have received a copy of the GNU Lesser General Public
47  * License along with FFmpeg; if not, write to the Free Software
48  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
49  */
50 
51 /**
52  * @file
53  * Reference: libavcodec/aaccoder.c
54  */
55 
56 #include "libavutil/libm.h"
57 
58 #include <float.h>
59 #include "libavutil/mathematics.h"
60 #include "libavcodec/avcodec.h"
61 #include "libavcodec/put_bits.h"
62 #include "libavcodec/aac.h"
63 #include "libavcodec/aacenc.h"
64 #include "libavcodec/aactab.h"
65 #include "libavcodec/aacenctab.h"
67 
68 #if HAVE_INLINE_ASM
69 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
70 typedef struct BandCodingPath {
71  int prev_idx;
72  float cost;
73  int run;
75 
76 static const uint8_t uquad_sign_bits[81] = {
77  0, 1, 1, 1, 2, 2, 1, 2, 2,
78  1, 2, 2, 2, 3, 3, 2, 3, 3,
79  1, 2, 2, 2, 3, 3, 2, 3, 3,
80  1, 2, 2, 2, 3, 3, 2, 3, 3,
81  2, 3, 3, 3, 4, 4, 3, 4, 4,
82  2, 3, 3, 3, 4, 4, 3, 4, 4,
83  1, 2, 2, 2, 3, 3, 2, 3, 3,
84  2, 3, 3, 3, 4, 4, 3, 4, 4,
85  2, 3, 3, 3, 4, 4, 3, 4, 4
86 };
87 
88 static const uint8_t upair7_sign_bits[64] = {
89  0, 1, 1, 1, 1, 1, 1, 1,
90  1, 2, 2, 2, 2, 2, 2, 2,
91  1, 2, 2, 2, 2, 2, 2, 2,
92  1, 2, 2, 2, 2, 2, 2, 2,
93  1, 2, 2, 2, 2, 2, 2, 2,
94  1, 2, 2, 2, 2, 2, 2, 2,
95  1, 2, 2, 2, 2, 2, 2, 2,
96  1, 2, 2, 2, 2, 2, 2, 2,
97 };
98 
99 static const uint8_t upair12_sign_bits[169] = {
100  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
103  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
104  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
111  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
112  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
113 };
114 
115 static const uint8_t esc_sign_bits[289] = {
116  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
117  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
123  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
124  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
125  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
126  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
127  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
128  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
129  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
132  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
133 };
134 
135 /**
136  * Functions developed from template function and optimized for quantizing and encoding band
137  */
138 static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
139  PutBitContext *pb, const float *in, float *out,
140  const float *scaled, int size, int scale_idx,
141  int cb, const float lambda, const float uplim,
142  int *bits, float *energy, const float ROUNDING)
143 {
144  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
145  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
146  int i;
147  int qc1, qc2, qc3, qc4;
148  float qenergy = 0.0f;
149 
150  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
151  uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
152  float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
153 
154  abs_pow34_v(s->scoefs, in, size);
155  scaled = s->scoefs;
156  for (i = 0; i < size; i += 4) {
157  int curidx;
158  int *in_int = (int *)&in[i];
159  int t0, t1, t2, t3, t4, t5, t6, t7;
160  const float *vec;
161 
162  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
163  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
164  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
165  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
166 
167  __asm__ volatile (
168  ".set push \n\t"
169  ".set noreorder \n\t"
170 
171  "slt %[qc1], $zero, %[qc1] \n\t"
172  "slt %[qc2], $zero, %[qc2] \n\t"
173  "slt %[qc3], $zero, %[qc3] \n\t"
174  "slt %[qc4], $zero, %[qc4] \n\t"
175  "lw %[t0], 0(%[in_int]) \n\t"
176  "lw %[t1], 4(%[in_int]) \n\t"
177  "lw %[t2], 8(%[in_int]) \n\t"
178  "lw %[t3], 12(%[in_int]) \n\t"
179  "srl %[t0], %[t0], 31 \n\t"
180  "srl %[t1], %[t1], 31 \n\t"
181  "srl %[t2], %[t2], 31 \n\t"
182  "srl %[t3], %[t3], 31 \n\t"
183  "subu %[t4], $zero, %[qc1] \n\t"
184  "subu %[t5], $zero, %[qc2] \n\t"
185  "subu %[t6], $zero, %[qc3] \n\t"
186  "subu %[t7], $zero, %[qc4] \n\t"
187  "movn %[qc1], %[t4], %[t0] \n\t"
188  "movn %[qc2], %[t5], %[t1] \n\t"
189  "movn %[qc3], %[t6], %[t2] \n\t"
190  "movn %[qc4], %[t7], %[t3] \n\t"
191 
192  ".set pop \n\t"
193 
194  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
195  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
196  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
197  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
198  : [in_int]"r"(in_int)
199  : "memory"
200  );
201 
202  curidx = qc1;
203  curidx *= 3;
204  curidx += qc2;
205  curidx *= 3;
206  curidx += qc3;
207  curidx *= 3;
208  curidx += qc4;
209  curidx += 40;
210 
211  put_bits(pb, p_bits[curidx], p_codes[curidx]);
212 
213  if (out || energy) {
214  float e1,e2,e3,e4;
215  vec = &p_vec[curidx*4];
216  e1 = vec[0] * IQ;
217  e2 = vec[1] * IQ;
218  e3 = vec[2] * IQ;
219  e4 = vec[3] * IQ;
220  if (out) {
221  out[i+0] = e1;
222  out[i+1] = e2;
223  out[i+2] = e3;
224  out[i+3] = e4;
225  }
226  if (energy)
227  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
228  }
229  }
230  if (energy)
231  *energy = qenergy;
232 }
233 
234 static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
235  PutBitContext *pb, const float *in, float *out,
236  const float *scaled, int size, int scale_idx,
237  int cb, const float lambda, const float uplim,
238  int *bits, float *energy, const float ROUNDING)
239 {
240  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
241  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
242  int i;
243  int qc1, qc2, qc3, qc4;
244  float qenergy = 0.0f;
245 
246  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
247  uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
248  float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
249 
250  abs_pow34_v(s->scoefs, in, size);
251  scaled = s->scoefs;
252  for (i = 0; i < size; i += 4) {
253  int curidx, sign, count;
254  int *in_int = (int *)&in[i];
255  uint8_t v_bits;
256  unsigned int v_codes;
257  int t0, t1, t2, t3, t4;
258  const float *vec;
259 
260  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
261  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
262  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
263  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
264 
265  __asm__ volatile (
266  ".set push \n\t"
267  ".set noreorder \n\t"
268 
269  "ori %[t4], $zero, 2 \n\t"
270  "ori %[sign], $zero, 0 \n\t"
271  "slt %[t0], %[t4], %[qc1] \n\t"
272  "slt %[t1], %[t4], %[qc2] \n\t"
273  "slt %[t2], %[t4], %[qc3] \n\t"
274  "slt %[t3], %[t4], %[qc4] \n\t"
275  "movn %[qc1], %[t4], %[t0] \n\t"
276  "movn %[qc2], %[t4], %[t1] \n\t"
277  "movn %[qc3], %[t4], %[t2] \n\t"
278  "movn %[qc4], %[t4], %[t3] \n\t"
279  "lw %[t0], 0(%[in_int]) \n\t"
280  "lw %[t1], 4(%[in_int]) \n\t"
281  "lw %[t2], 8(%[in_int]) \n\t"
282  "lw %[t3], 12(%[in_int]) \n\t"
283  "slt %[t0], %[t0], $zero \n\t"
284  "movn %[sign], %[t0], %[qc1] \n\t"
285  "slt %[t1], %[t1], $zero \n\t"
286  "slt %[t2], %[t2], $zero \n\t"
287  "slt %[t3], %[t3], $zero \n\t"
288  "sll %[t0], %[sign], 1 \n\t"
289  "or %[t0], %[t0], %[t1] \n\t"
290  "movn %[sign], %[t0], %[qc2] \n\t"
291  "slt %[t4], $zero, %[qc1] \n\t"
292  "slt %[t1], $zero, %[qc2] \n\t"
293  "slt %[count], $zero, %[qc3] \n\t"
294  "sll %[t0], %[sign], 1 \n\t"
295  "or %[t0], %[t0], %[t2] \n\t"
296  "movn %[sign], %[t0], %[qc3] \n\t"
297  "slt %[t2], $zero, %[qc4] \n\t"
298  "addu %[count], %[count], %[t4] \n\t"
299  "addu %[count], %[count], %[t1] \n\t"
300  "sll %[t0], %[sign], 1 \n\t"
301  "or %[t0], %[t0], %[t3] \n\t"
302  "movn %[sign], %[t0], %[qc4] \n\t"
303  "addu %[count], %[count], %[t2] \n\t"
304 
305  ".set pop \n\t"
306 
307  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
308  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
309  [sign]"=&r"(sign), [count]"=&r"(count),
310  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
311  [t4]"=&r"(t4)
312  : [in_int]"r"(in_int)
313  : "memory"
314  );
315 
316  curidx = qc1;
317  curidx *= 3;
318  curidx += qc2;
319  curidx *= 3;
320  curidx += qc3;
321  curidx *= 3;
322  curidx += qc4;
323 
324  v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
325  v_bits = p_bits[curidx] + count;
326  put_bits(pb, v_bits, v_codes);
327 
328  if (out || energy) {
329  float e1,e2,e3,e4;
330  vec = &p_vec[curidx*4];
331  e1 = copysignf(vec[0] * IQ, in[i+0]);
332  e2 = copysignf(vec[1] * IQ, in[i+1]);
333  e3 = copysignf(vec[2] * IQ, in[i+2]);
334  e4 = copysignf(vec[3] * IQ, in[i+3]);
335  if (out) {
336  out[i+0] = e1;
337  out[i+1] = e2;
338  out[i+2] = e3;
339  out[i+3] = e4;
340  }
341  if (energy)
342  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
343  }
344  }
345  if (energy)
346  *energy = qenergy;
347 }
348 
349 static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
350  PutBitContext *pb, const float *in, float *out,
351  const float *scaled, int size, int scale_idx,
352  int cb, const float lambda, const float uplim,
353  int *bits, float *energy, const float ROUNDING)
354 {
355  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
356  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
357  int i;
358  int qc1, qc2, qc3, qc4;
359  float qenergy = 0.0f;
360 
361  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
362  uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
363  float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
364 
365  abs_pow34_v(s->scoefs, in, size);
366  scaled = s->scoefs;
367  for (i = 0; i < size; i += 4) {
368  int curidx, curidx2;
369  int *in_int = (int *)&in[i];
370  uint8_t v_bits;
371  unsigned int v_codes;
372  int t0, t1, t2, t3, t4, t5, t6, t7;
373  const float *vec1, *vec2;
374 
375  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
376  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
377  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
378  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
379 
380  __asm__ volatile (
381  ".set push \n\t"
382  ".set noreorder \n\t"
383 
384  "ori %[t4], $zero, 4 \n\t"
385  "slt %[t0], %[t4], %[qc1] \n\t"
386  "slt %[t1], %[t4], %[qc2] \n\t"
387  "slt %[t2], %[t4], %[qc3] \n\t"
388  "slt %[t3], %[t4], %[qc4] \n\t"
389  "movn %[qc1], %[t4], %[t0] \n\t"
390  "movn %[qc2], %[t4], %[t1] \n\t"
391  "movn %[qc3], %[t4], %[t2] \n\t"
392  "movn %[qc4], %[t4], %[t3] \n\t"
393  "lw %[t0], 0(%[in_int]) \n\t"
394  "lw %[t1], 4(%[in_int]) \n\t"
395  "lw %[t2], 8(%[in_int]) \n\t"
396  "lw %[t3], 12(%[in_int]) \n\t"
397  "srl %[t0], %[t0], 31 \n\t"
398  "srl %[t1], %[t1], 31 \n\t"
399  "srl %[t2], %[t2], 31 \n\t"
400  "srl %[t3], %[t3], 31 \n\t"
401  "subu %[t4], $zero, %[qc1] \n\t"
402  "subu %[t5], $zero, %[qc2] \n\t"
403  "subu %[t6], $zero, %[qc3] \n\t"
404  "subu %[t7], $zero, %[qc4] \n\t"
405  "movn %[qc1], %[t4], %[t0] \n\t"
406  "movn %[qc2], %[t5], %[t1] \n\t"
407  "movn %[qc3], %[t6], %[t2] \n\t"
408  "movn %[qc4], %[t7], %[t3] \n\t"
409 
410  ".set pop \n\t"
411 
412  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
413  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
414  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
415  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
416  : [in_int]"r"(in_int)
417  : "memory"
418  );
419 
420  curidx = 9 * qc1;
421  curidx += qc2 + 40;
422 
423  curidx2 = 9 * qc3;
424  curidx2 += qc4 + 40;
425 
426  v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
427  v_bits = p_bits[curidx] + p_bits[curidx2];
428  put_bits(pb, v_bits, v_codes);
429 
430  if (out || energy) {
431  float e1,e2,e3,e4;
432  vec1 = &p_vec[curidx*2 ];
433  vec2 = &p_vec[curidx2*2];
434  e1 = vec1[0] * IQ;
435  e2 = vec1[1] * IQ;
436  e3 = vec2[0] * IQ;
437  e4 = vec2[1] * IQ;
438  if (out) {
439  out[i+0] = e1;
440  out[i+1] = e2;
441  out[i+2] = e3;
442  out[i+3] = e4;
443  }
444  if (energy)
445  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
446  }
447  }
448  if (energy)
449  *energy = qenergy;
450 }
451 
452 static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
453  PutBitContext *pb, const float *in, float *out,
454  const float *scaled, int size, int scale_idx,
455  int cb, const float lambda, const float uplim,
456  int *bits, float *energy, const float ROUNDING)
457 {
458  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
459  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
460  int i;
461  int qc1, qc2, qc3, qc4;
462  float qenergy = 0.0f;
463 
464  uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
465  uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
466  float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
467 
468  abs_pow34_v(s->scoefs, in, size);
469  scaled = s->scoefs;
470  for (i = 0; i < size; i += 4) {
471  int curidx1, curidx2, sign1, count1, sign2, count2;
472  int *in_int = (int *)&in[i];
473  uint8_t v_bits;
474  unsigned int v_codes;
475  int t0, t1, t2, t3, t4;
476  const float *vec1, *vec2;
477 
478  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
479  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
480  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
481  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
482 
483  __asm__ volatile (
484  ".set push \n\t"
485  ".set noreorder \n\t"
486 
487  "ori %[t4], $zero, 7 \n\t"
488  "ori %[sign1], $zero, 0 \n\t"
489  "ori %[sign2], $zero, 0 \n\t"
490  "slt %[t0], %[t4], %[qc1] \n\t"
491  "slt %[t1], %[t4], %[qc2] \n\t"
492  "slt %[t2], %[t4], %[qc3] \n\t"
493  "slt %[t3], %[t4], %[qc4] \n\t"
494  "movn %[qc1], %[t4], %[t0] \n\t"
495  "movn %[qc2], %[t4], %[t1] \n\t"
496  "movn %[qc3], %[t4], %[t2] \n\t"
497  "movn %[qc4], %[t4], %[t3] \n\t"
498  "lw %[t0], 0(%[in_int]) \n\t"
499  "lw %[t1], 4(%[in_int]) \n\t"
500  "lw %[t2], 8(%[in_int]) \n\t"
501  "lw %[t3], 12(%[in_int]) \n\t"
502  "slt %[t0], %[t0], $zero \n\t"
503  "movn %[sign1], %[t0], %[qc1] \n\t"
504  "slt %[t2], %[t2], $zero \n\t"
505  "movn %[sign2], %[t2], %[qc3] \n\t"
506  "slt %[t1], %[t1], $zero \n\t"
507  "sll %[t0], %[sign1], 1 \n\t"
508  "or %[t0], %[t0], %[t1] \n\t"
509  "movn %[sign1], %[t0], %[qc2] \n\t"
510  "slt %[t3], %[t3], $zero \n\t"
511  "sll %[t0], %[sign2], 1 \n\t"
512  "or %[t0], %[t0], %[t3] \n\t"
513  "movn %[sign2], %[t0], %[qc4] \n\t"
514  "slt %[count1], $zero, %[qc1] \n\t"
515  "slt %[t1], $zero, %[qc2] \n\t"
516  "slt %[count2], $zero, %[qc3] \n\t"
517  "slt %[t2], $zero, %[qc4] \n\t"
518  "addu %[count1], %[count1], %[t1] \n\t"
519  "addu %[count2], %[count2], %[t2] \n\t"
520 
521  ".set pop \n\t"
522 
523  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
524  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
525  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
526  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
527  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
528  [t4]"=&r"(t4)
529  : [in_int]"r"(in_int)
530  : "t0", "t1", "t2", "t3", "t4",
531  "memory"
532  );
533 
534  curidx1 = 8 * qc1;
535  curidx1 += qc2;
536 
537  v_codes = (p_codes[curidx1] << count1) | sign1;
538  v_bits = p_bits[curidx1] + count1;
539  put_bits(pb, v_bits, v_codes);
540 
541  curidx2 = 8 * qc3;
542  curidx2 += qc4;
543 
544  v_codes = (p_codes[curidx2] << count2) | sign2;
545  v_bits = p_bits[curidx2] + count2;
546  put_bits(pb, v_bits, v_codes);
547 
548  if (out || energy) {
549  float e1,e2,e3,e4;
550  vec1 = &p_vec[curidx1*2];
551  vec2 = &p_vec[curidx2*2];
552  e1 = copysignf(vec1[0] * IQ, in[i+0]);
553  e2 = copysignf(vec1[1] * IQ, in[i+1]);
554  e3 = copysignf(vec2[0] * IQ, in[i+2]);
555  e4 = copysignf(vec2[1] * IQ, in[i+3]);
556  if (out) {
557  out[i+0] = e1;
558  out[i+1] = e2;
559  out[i+2] = e3;
560  out[i+3] = e4;
561  }
562  if (energy)
563  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
564  }
565  }
566  if (energy)
567  *energy = qenergy;
568 }
569 
570 static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
571  PutBitContext *pb, const float *in, float *out,
572  const float *scaled, int size, int scale_idx,
573  int cb, const float lambda, const float uplim,
574  int *bits, float *energy, const float ROUNDING)
575 {
576  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
577  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
578  int i;
579  int qc1, qc2, qc3, qc4;
580  float qenergy = 0.0f;
581 
582  uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
583  uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
584  float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
585 
586  abs_pow34_v(s->scoefs, in, size);
587  scaled = s->scoefs;
588  for (i = 0; i < size; i += 4) {
589  int curidx1, curidx2, sign1, count1, sign2, count2;
590  int *in_int = (int *)&in[i];
591  uint8_t v_bits;
592  unsigned int v_codes;
593  int t0, t1, t2, t3, t4;
594  const float *vec1, *vec2;
595 
596  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
597  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
598  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
599  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
600 
601  __asm__ volatile (
602  ".set push \n\t"
603  ".set noreorder \n\t"
604 
605  "ori %[t4], $zero, 12 \n\t"
606  "ori %[sign1], $zero, 0 \n\t"
607  "ori %[sign2], $zero, 0 \n\t"
608  "slt %[t0], %[t4], %[qc1] \n\t"
609  "slt %[t1], %[t4], %[qc2] \n\t"
610  "slt %[t2], %[t4], %[qc3] \n\t"
611  "slt %[t3], %[t4], %[qc4] \n\t"
612  "movn %[qc1], %[t4], %[t0] \n\t"
613  "movn %[qc2], %[t4], %[t1] \n\t"
614  "movn %[qc3], %[t4], %[t2] \n\t"
615  "movn %[qc4], %[t4], %[t3] \n\t"
616  "lw %[t0], 0(%[in_int]) \n\t"
617  "lw %[t1], 4(%[in_int]) \n\t"
618  "lw %[t2], 8(%[in_int]) \n\t"
619  "lw %[t3], 12(%[in_int]) \n\t"
620  "slt %[t0], %[t0], $zero \n\t"
621  "movn %[sign1], %[t0], %[qc1] \n\t"
622  "slt %[t2], %[t2], $zero \n\t"
623  "movn %[sign2], %[t2], %[qc3] \n\t"
624  "slt %[t1], %[t1], $zero \n\t"
625  "sll %[t0], %[sign1], 1 \n\t"
626  "or %[t0], %[t0], %[t1] \n\t"
627  "movn %[sign1], %[t0], %[qc2] \n\t"
628  "slt %[t3], %[t3], $zero \n\t"
629  "sll %[t0], %[sign2], 1 \n\t"
630  "or %[t0], %[t0], %[t3] \n\t"
631  "movn %[sign2], %[t0], %[qc4] \n\t"
632  "slt %[count1], $zero, %[qc1] \n\t"
633  "slt %[t1], $zero, %[qc2] \n\t"
634  "slt %[count2], $zero, %[qc3] \n\t"
635  "slt %[t2], $zero, %[qc4] \n\t"
636  "addu %[count1], %[count1], %[t1] \n\t"
637  "addu %[count2], %[count2], %[t2] \n\t"
638 
639  ".set pop \n\t"
640 
641  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
642  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
643  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
644  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
645  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
646  [t4]"=&r"(t4)
647  : [in_int]"r"(in_int)
648  : "memory"
649  );
650 
651  curidx1 = 13 * qc1;
652  curidx1 += qc2;
653 
654  v_codes = (p_codes[curidx1] << count1) | sign1;
655  v_bits = p_bits[curidx1] + count1;
656  put_bits(pb, v_bits, v_codes);
657 
658  curidx2 = 13 * qc3;
659  curidx2 += qc4;
660 
661  v_codes = (p_codes[curidx2] << count2) | sign2;
662  v_bits = p_bits[curidx2] + count2;
663  put_bits(pb, v_bits, v_codes);
664 
665  if (out || energy) {
666  float e1,e2,e3,e4;
667  vec1 = &p_vec[curidx1*2];
668  vec2 = &p_vec[curidx2*2];
669  e1 = copysignf(vec1[0] * IQ, in[i+0]);
670  e2 = copysignf(vec1[1] * IQ, in[i+1]);
671  e3 = copysignf(vec2[0] * IQ, in[i+2]);
672  e4 = copysignf(vec2[1] * IQ, in[i+3]);
673  if (out) {
674  out[i+0] = e1;
675  out[i+1] = e2;
676  out[i+2] = e3;
677  out[i+3] = e4;
678  }
679  if (energy)
680  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
681  }
682  }
683  if (energy)
684  *energy = qenergy;
685 }
686 
687 static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
688  PutBitContext *pb, const float *in, float *out,
689  const float *scaled, int size, int scale_idx,
690  int cb, const float lambda, const float uplim,
691  int *bits, float *energy, const float ROUNDING)
692 {
693  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
694  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
695  int i;
696  int qc1, qc2, qc3, qc4;
697  float qenergy = 0.0f;
698 
699  uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
700  uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
701  float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1];
702 
703  abs_pow34_v(s->scoefs, in, size);
704  scaled = s->scoefs;
705 
706  if (cb < 11) {
707  for (i = 0; i < size; i += 4) {
708  int curidx, curidx2, sign1, count1, sign2, count2;
709  int *in_int = (int *)&in[i];
710  uint8_t v_bits;
711  unsigned int v_codes;
712  int t0, t1, t2, t3, t4;
713  const float *vec1, *vec2;
714 
715  qc1 = scaled[i ] * Q34 + ROUNDING;
716  qc2 = scaled[i+1] * Q34 + ROUNDING;
717  qc3 = scaled[i+2] * Q34 + ROUNDING;
718  qc4 = scaled[i+3] * Q34 + ROUNDING;
719 
720  __asm__ volatile (
721  ".set push \n\t"
722  ".set noreorder \n\t"
723 
724  "ori %[t4], $zero, 16 \n\t"
725  "ori %[sign1], $zero, 0 \n\t"
726  "ori %[sign2], $zero, 0 \n\t"
727  "slt %[t0], %[t4], %[qc1] \n\t"
728  "slt %[t1], %[t4], %[qc2] \n\t"
729  "slt %[t2], %[t4], %[qc3] \n\t"
730  "slt %[t3], %[t4], %[qc4] \n\t"
731  "movn %[qc1], %[t4], %[t0] \n\t"
732  "movn %[qc2], %[t4], %[t1] \n\t"
733  "movn %[qc3], %[t4], %[t2] \n\t"
734  "movn %[qc4], %[t4], %[t3] \n\t"
735  "lw %[t0], 0(%[in_int]) \n\t"
736  "lw %[t1], 4(%[in_int]) \n\t"
737  "lw %[t2], 8(%[in_int]) \n\t"
738  "lw %[t3], 12(%[in_int]) \n\t"
739  "slt %[t0], %[t0], $zero \n\t"
740  "movn %[sign1], %[t0], %[qc1] \n\t"
741  "slt %[t2], %[t2], $zero \n\t"
742  "movn %[sign2], %[t2], %[qc3] \n\t"
743  "slt %[t1], %[t1], $zero \n\t"
744  "sll %[t0], %[sign1], 1 \n\t"
745  "or %[t0], %[t0], %[t1] \n\t"
746  "movn %[sign1], %[t0], %[qc2] \n\t"
747  "slt %[t3], %[t3], $zero \n\t"
748  "sll %[t0], %[sign2], 1 \n\t"
749  "or %[t0], %[t0], %[t3] \n\t"
750  "movn %[sign2], %[t0], %[qc4] \n\t"
751  "slt %[count1], $zero, %[qc1] \n\t"
752  "slt %[t1], $zero, %[qc2] \n\t"
753  "slt %[count2], $zero, %[qc3] \n\t"
754  "slt %[t2], $zero, %[qc4] \n\t"
755  "addu %[count1], %[count1], %[t1] \n\t"
756  "addu %[count2], %[count2], %[t2] \n\t"
757 
758  ".set pop \n\t"
759 
760  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
761  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
762  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
763  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
764  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
765  [t4]"=&r"(t4)
766  : [in_int]"r"(in_int)
767  : "memory"
768  );
769 
770  curidx = 17 * qc1;
771  curidx += qc2;
772  curidx2 = 17 * qc3;
773  curidx2 += qc4;
774 
775  v_codes = (p_codes[curidx] << count1) | sign1;
776  v_bits = p_bits[curidx] + count1;
777  put_bits(pb, v_bits, v_codes);
778 
779  v_codes = (p_codes[curidx2] << count2) | sign2;
780  v_bits = p_bits[curidx2] + count2;
781  put_bits(pb, v_bits, v_codes);
782 
783  if (out || energy) {
784  float e1,e2,e3,e4;
785  vec1 = &p_vectors[curidx*2 ];
786  vec2 = &p_vectors[curidx2*2];
787  e1 = copysignf(vec1[0] * IQ, in[i+0]);
788  e2 = copysignf(vec1[1] * IQ, in[i+1]);
789  e3 = copysignf(vec2[0] * IQ, in[i+2]);
790  e4 = copysignf(vec2[1] * IQ, in[i+3]);
791  if (out) {
792  out[i+0] = e1;
793  out[i+1] = e2;
794  out[i+2] = e3;
795  out[i+3] = e4;
796  }
797  if (energy)
798  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
799  }
800  }
801  } else {
802  for (i = 0; i < size; i += 4) {
803  int curidx, curidx2, sign1, count1, sign2, count2;
804  int *in_int = (int *)&in[i];
805  uint8_t v_bits;
806  unsigned int v_codes;
807  int c1, c2, c3, c4;
808  int t0, t1, t2, t3, t4;
809 
810  qc1 = scaled[i ] * Q34 + ROUNDING;
811  qc2 = scaled[i+1] * Q34 + ROUNDING;
812  qc3 = scaled[i+2] * Q34 + ROUNDING;
813  qc4 = scaled[i+3] * Q34 + ROUNDING;
814 
815  __asm__ volatile (
816  ".set push \n\t"
817  ".set noreorder \n\t"
818 
819  "ori %[t4], $zero, 16 \n\t"
820  "ori %[sign1], $zero, 0 \n\t"
821  "ori %[sign2], $zero, 0 \n\t"
822  "shll_s.w %[c1], %[qc1], 18 \n\t"
823  "shll_s.w %[c2], %[qc2], 18 \n\t"
824  "shll_s.w %[c3], %[qc3], 18 \n\t"
825  "shll_s.w %[c4], %[qc4], 18 \n\t"
826  "srl %[c1], %[c1], 18 \n\t"
827  "srl %[c2], %[c2], 18 \n\t"
828  "srl %[c3], %[c3], 18 \n\t"
829  "srl %[c4], %[c4], 18 \n\t"
830  "slt %[t0], %[t4], %[qc1] \n\t"
831  "slt %[t1], %[t4], %[qc2] \n\t"
832  "slt %[t2], %[t4], %[qc3] \n\t"
833  "slt %[t3], %[t4], %[qc4] \n\t"
834  "movn %[qc1], %[t4], %[t0] \n\t"
835  "movn %[qc2], %[t4], %[t1] \n\t"
836  "movn %[qc3], %[t4], %[t2] \n\t"
837  "movn %[qc4], %[t4], %[t3] \n\t"
838  "lw %[t0], 0(%[in_int]) \n\t"
839  "lw %[t1], 4(%[in_int]) \n\t"
840  "lw %[t2], 8(%[in_int]) \n\t"
841  "lw %[t3], 12(%[in_int]) \n\t"
842  "slt %[t0], %[t0], $zero \n\t"
843  "movn %[sign1], %[t0], %[qc1] \n\t"
844  "slt %[t2], %[t2], $zero \n\t"
845  "movn %[sign2], %[t2], %[qc3] \n\t"
846  "slt %[t1], %[t1], $zero \n\t"
847  "sll %[t0], %[sign1], 1 \n\t"
848  "or %[t0], %[t0], %[t1] \n\t"
849  "movn %[sign1], %[t0], %[qc2] \n\t"
850  "slt %[t3], %[t3], $zero \n\t"
851  "sll %[t0], %[sign2], 1 \n\t"
852  "or %[t0], %[t0], %[t3] \n\t"
853  "movn %[sign2], %[t0], %[qc4] \n\t"
854  "slt %[count1], $zero, %[qc1] \n\t"
855  "slt %[t1], $zero, %[qc2] \n\t"
856  "slt %[count2], $zero, %[qc3] \n\t"
857  "slt %[t2], $zero, %[qc4] \n\t"
858  "addu %[count1], %[count1], %[t1] \n\t"
859  "addu %[count2], %[count2], %[t2] \n\t"
860 
861  ".set pop \n\t"
862 
863  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
864  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
865  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
866  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
867  [c1]"=&r"(c1), [c2]"=&r"(c2),
868  [c3]"=&r"(c3), [c4]"=&r"(c4),
869  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
870  [t4]"=&r"(t4)
871  : [in_int]"r"(in_int)
872  : "memory"
873  );
874 
875  curidx = 17 * qc1;
876  curidx += qc2;
877 
878  curidx2 = 17 * qc3;
879  curidx2 += qc4;
880 
881  v_codes = (p_codes[curidx] << count1) | sign1;
882  v_bits = p_bits[curidx] + count1;
883  put_bits(pb, v_bits, v_codes);
884 
885  if (p_vectors[curidx*2 ] == 64.0f) {
886  int len = av_log2(c1);
887  v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
888  put_bits(pb, len * 2 - 3, v_codes);
889  }
890  if (p_vectors[curidx*2+1] == 64.0f) {
891  int len = av_log2(c2);
892  v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
893  put_bits(pb, len*2-3, v_codes);
894  }
895 
896  v_codes = (p_codes[curidx2] << count2) | sign2;
897  v_bits = p_bits[curidx2] + count2;
898  put_bits(pb, v_bits, v_codes);
899 
900  if (p_vectors[curidx2*2 ] == 64.0f) {
901  int len = av_log2(c3);
902  v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
903  put_bits(pb, len* 2 - 3, v_codes);
904  }
905  if (p_vectors[curidx2*2+1] == 64.0f) {
906  int len = av_log2(c4);
907  v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
908  put_bits(pb, len * 2 - 3, v_codes);
909  }
910 
911  if (out || energy) {
912  float e1, e2, e3, e4;
913  e1 = copysignf(c1 * cbrtf(c1) * IQ, in[i+0]);
914  e2 = copysignf(c2 * cbrtf(c2) * IQ, in[i+1]);
915  e3 = copysignf(c3 * cbrtf(c3) * IQ, in[i+2]);
916  e4 = copysignf(c4 * cbrtf(c4) * IQ, in[i+3]);
917  if (out) {
918  out[i+0] = e1;
919  out[i+1] = e2;
920  out[i+2] = e3;
921  out[i+3] = e4;
922  }
923  if (energy)
924  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
925  }
926  }
927  }
928  if (energy)
929  *energy = qenergy;
930 }
931 
932 static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,
933  PutBitContext *pb, const float *in, float *out,
934  const float *scaled, int size, int scale_idx,
935  int cb, const float lambda, const float uplim,
936  int *bits, float *energy, const float ROUNDING) {
937  av_assert0(0);
938 }
939 
940 static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s,
941  PutBitContext *pb, const float *in, float *out,
942  const float *scaled, int size, int scale_idx,
943  int cb, const float lambda, const float uplim,
944  int *bits, float *energy, const float ROUNDING) {
945  int i;
946  if (bits)
947  *bits = 0;
948  if (out) {
949  for (i = 0; i < size; i += 4) {
950  out[i ] = 0.0f;
951  out[i+1] = 0.0f;
952  out[i+2] = 0.0f;
953  out[i+3] = 0.0f;
954  }
955  }
956  if (energy)
957  *energy = 0.0f;
958 }
959 
960 static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
961  PutBitContext *pb, const float *in, float *out,
962  const float *scaled, int size, int scale_idx,
963  int cb, const float lambda, const float uplim,
964  int *bits, float *energy, const float ROUNDING) = {
965  quantize_and_encode_band_cost_ZERO_mips,
966  quantize_and_encode_band_cost_SQUAD_mips,
967  quantize_and_encode_band_cost_SQUAD_mips,
968  quantize_and_encode_band_cost_UQUAD_mips,
969  quantize_and_encode_band_cost_UQUAD_mips,
970  quantize_and_encode_band_cost_SPAIR_mips,
971  quantize_and_encode_band_cost_SPAIR_mips,
972  quantize_and_encode_band_cost_UPAIR7_mips,
973  quantize_and_encode_band_cost_UPAIR7_mips,
974  quantize_and_encode_band_cost_UPAIR12_mips,
975  quantize_and_encode_band_cost_UPAIR12_mips,
976  quantize_and_encode_band_cost_ESC_mips,
977  quantize_and_encode_band_cost_NONE_mips, /* cb 12 doesn't exist */
978  quantize_and_encode_band_cost_ZERO_mips,
979  quantize_and_encode_band_cost_ZERO_mips,
980  quantize_and_encode_band_cost_ZERO_mips,
981 };
982 
983 #define quantize_and_encode_band_cost( \
984  s, pb, in, out, scaled, size, scale_idx, cb, \
985  lambda, uplim, bits, energy, ROUNDING) \
986  quantize_and_encode_band_cost_arr[cb]( \
987  s, pb, in, out, scaled, size, scale_idx, cb, \
988  lambda, uplim, bits, energy, ROUNDING)
989 
990 static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
991  const float *in, float *out, int size, int scale_idx,
992  int cb, const float lambda, int rtz)
993 {
994  quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
995  INFINITY, NULL, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
996 }
997 
998 /**
999  * Functions developed from template function and optimized for getting the number of bits
1000  */
1001 static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
1002  PutBitContext *pb, const float *in,
1003  const float *scaled, int size, int scale_idx,
1004  int cb, const float lambda, const float uplim,
1005  int *bits)
1006 {
1007  return 0;
1008 }
1009 
1010 static float get_band_numbits_NONE_mips(struct AACEncContext *s,
1011  PutBitContext *pb, const float *in,
1012  const float *scaled, int size, int scale_idx,
1013  int cb, const float lambda, const float uplim,
1014  int *bits)
1015 {
1016  av_assert0(0);
1017  return 0;
1018 }
1019 
1020 static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
1021  PutBitContext *pb, const float *in,
1022  const float *scaled, int size, int scale_idx,
1023  int cb, const float lambda, const float uplim,
1024  int *bits)
1025 {
1026  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1027  int i;
1028  int qc1, qc2, qc3, qc4;
1029  int curbits = 0;
1030 
1031  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1032 
1033  for (i = 0; i < size; i += 4) {
1034  int curidx;
1035  int *in_int = (int *)&in[i];
1036  int t0, t1, t2, t3, t4, t5, t6, t7;
1037 
1038  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1039  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1040  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1041  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1042 
1043  __asm__ volatile (
1044  ".set push \n\t"
1045  ".set noreorder \n\t"
1046 
1047  "slt %[qc1], $zero, %[qc1] \n\t"
1048  "slt %[qc2], $zero, %[qc2] \n\t"
1049  "slt %[qc3], $zero, %[qc3] \n\t"
1050  "slt %[qc4], $zero, %[qc4] \n\t"
1051  "lw %[t0], 0(%[in_int]) \n\t"
1052  "lw %[t1], 4(%[in_int]) \n\t"
1053  "lw %[t2], 8(%[in_int]) \n\t"
1054  "lw %[t3], 12(%[in_int]) \n\t"
1055  "srl %[t0], %[t0], 31 \n\t"
1056  "srl %[t1], %[t1], 31 \n\t"
1057  "srl %[t2], %[t2], 31 \n\t"
1058  "srl %[t3], %[t3], 31 \n\t"
1059  "subu %[t4], $zero, %[qc1] \n\t"
1060  "subu %[t5], $zero, %[qc2] \n\t"
1061  "subu %[t6], $zero, %[qc3] \n\t"
1062  "subu %[t7], $zero, %[qc4] \n\t"
1063  "movn %[qc1], %[t4], %[t0] \n\t"
1064  "movn %[qc2], %[t5], %[t1] \n\t"
1065  "movn %[qc3], %[t6], %[t2] \n\t"
1066  "movn %[qc4], %[t7], %[t3] \n\t"
1067 
1068  ".set pop \n\t"
1069 
1070  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1071  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1072  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1073  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1074  : [in_int]"r"(in_int)
1075  : "memory"
1076  );
1077 
1078  curidx = qc1;
1079  curidx *= 3;
1080  curidx += qc2;
1081  curidx *= 3;
1082  curidx += qc3;
1083  curidx *= 3;
1084  curidx += qc4;
1085  curidx += 40;
1086 
1087  curbits += p_bits[curidx];
1088  }
1089  return curbits;
1090 }
1091 
1092 static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
1093  PutBitContext *pb, const float *in,
1094  const float *scaled, int size, int scale_idx,
1095  int cb, const float lambda, const float uplim,
1096  int *bits)
1097 {
1098  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1099  int i;
1100  int curbits = 0;
1101  int qc1, qc2, qc3, qc4;
1102 
1103  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1104 
1105  for (i = 0; i < size; i += 4) {
1106  int curidx;
1107  int t0, t1, t2, t3, t4;
1108 
1109  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1110  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1111  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1112  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1113 
1114  __asm__ volatile (
1115  ".set push \n\t"
1116  ".set noreorder \n\t"
1117 
1118  "ori %[t4], $zero, 2 \n\t"
1119  "slt %[t0], %[t4], %[qc1] \n\t"
1120  "slt %[t1], %[t4], %[qc2] \n\t"
1121  "slt %[t2], %[t4], %[qc3] \n\t"
1122  "slt %[t3], %[t4], %[qc4] \n\t"
1123  "movn %[qc1], %[t4], %[t0] \n\t"
1124  "movn %[qc2], %[t4], %[t1] \n\t"
1125  "movn %[qc3], %[t4], %[t2] \n\t"
1126  "movn %[qc4], %[t4], %[t3] \n\t"
1127 
1128  ".set pop \n\t"
1129 
1130  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1131  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1132  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1133  [t4]"=&r"(t4)
1134  );
1135 
1136  curidx = qc1;
1137  curidx *= 3;
1138  curidx += qc2;
1139  curidx *= 3;
1140  curidx += qc3;
1141  curidx *= 3;
1142  curidx += qc4;
1143 
1144  curbits += p_bits[curidx];
1145  curbits += uquad_sign_bits[curidx];
1146  }
1147  return curbits;
1148 }
1149 
1150 static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
1151  PutBitContext *pb, const float *in,
1152  const float *scaled, int size, int scale_idx,
1153  int cb, const float lambda, const float uplim,
1154  int *bits)
1155 {
1156  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1157  int i;
1158  int qc1, qc2, qc3, qc4;
1159  int curbits = 0;
1160 
1161  uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1162 
1163  for (i = 0; i < size; i += 4) {
1164  int curidx, curidx2;
1165  int *in_int = (int *)&in[i];
1166  int t0, t1, t2, t3, t4, t5, t6, t7;
1167 
1168  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1169  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1170  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1171  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1172 
1173  __asm__ volatile (
1174  ".set push \n\t"
1175  ".set noreorder \n\t"
1176 
1177  "ori %[t4], $zero, 4 \n\t"
1178  "slt %[t0], %[t4], %[qc1] \n\t"
1179  "slt %[t1], %[t4], %[qc2] \n\t"
1180  "slt %[t2], %[t4], %[qc3] \n\t"
1181  "slt %[t3], %[t4], %[qc4] \n\t"
1182  "movn %[qc1], %[t4], %[t0] \n\t"
1183  "movn %[qc2], %[t4], %[t1] \n\t"
1184  "movn %[qc3], %[t4], %[t2] \n\t"
1185  "movn %[qc4], %[t4], %[t3] \n\t"
1186  "lw %[t0], 0(%[in_int]) \n\t"
1187  "lw %[t1], 4(%[in_int]) \n\t"
1188  "lw %[t2], 8(%[in_int]) \n\t"
1189  "lw %[t3], 12(%[in_int]) \n\t"
1190  "srl %[t0], %[t0], 31 \n\t"
1191  "srl %[t1], %[t1], 31 \n\t"
1192  "srl %[t2], %[t2], 31 \n\t"
1193  "srl %[t3], %[t3], 31 \n\t"
1194  "subu %[t4], $zero, %[qc1] \n\t"
1195  "subu %[t5], $zero, %[qc2] \n\t"
1196  "subu %[t6], $zero, %[qc3] \n\t"
1197  "subu %[t7], $zero, %[qc4] \n\t"
1198  "movn %[qc1], %[t4], %[t0] \n\t"
1199  "movn %[qc2], %[t5], %[t1] \n\t"
1200  "movn %[qc3], %[t6], %[t2] \n\t"
1201  "movn %[qc4], %[t7], %[t3] \n\t"
1202 
1203  ".set pop \n\t"
1204 
1205  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1206  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1207  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1208  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1209  : [in_int]"r"(in_int)
1210  : "memory"
1211  );
1212 
1213  curidx = 9 * qc1;
1214  curidx += qc2 + 40;
1215 
1216  curidx2 = 9 * qc3;
1217  curidx2 += qc4 + 40;
1218 
1219  curbits += p_bits[curidx] + p_bits[curidx2];
1220  }
1221  return curbits;
1222 }
1223 
1224 static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
1225  PutBitContext *pb, const float *in,
1226  const float *scaled, int size, int scale_idx,
1227  int cb, const float lambda, const float uplim,
1228  int *bits)
1229 {
1230  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1231  int i;
1232  int qc1, qc2, qc3, qc4;
1233  int curbits = 0;
1234 
1235  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1236 
1237  for (i = 0; i < size; i += 4) {
1238  int curidx, curidx2;
1239  int t0, t1, t2, t3, t4;
1240 
1241  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1242  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1243  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1244  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1245 
1246  __asm__ volatile (
1247  ".set push \n\t"
1248  ".set noreorder \n\t"
1249 
1250  "ori %[t4], $zero, 7 \n\t"
1251  "slt %[t0], %[t4], %[qc1] \n\t"
1252  "slt %[t1], %[t4], %[qc2] \n\t"
1253  "slt %[t2], %[t4], %[qc3] \n\t"
1254  "slt %[t3], %[t4], %[qc4] \n\t"
1255  "movn %[qc1], %[t4], %[t0] \n\t"
1256  "movn %[qc2], %[t4], %[t1] \n\t"
1257  "movn %[qc3], %[t4], %[t2] \n\t"
1258  "movn %[qc4], %[t4], %[t3] \n\t"
1259 
1260  ".set pop \n\t"
1261 
1262  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1263  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1264  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1265  [t4]"=&r"(t4)
1266  );
1267 
1268  curidx = 8 * qc1;
1269  curidx += qc2;
1270 
1271  curidx2 = 8 * qc3;
1272  curidx2 += qc4;
1273 
1274  curbits += p_bits[curidx] +
1275  upair7_sign_bits[curidx] +
1276  p_bits[curidx2] +
1277  upair7_sign_bits[curidx2];
1278  }
1279  return curbits;
1280 }
1281 
1282 static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
1283  PutBitContext *pb, const float *in,
1284  const float *scaled, int size, int scale_idx,
1285  int cb, const float lambda, const float uplim,
1286  int *bits)
1287 {
1288  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1289  int i;
1290  int qc1, qc2, qc3, qc4;
1291  int curbits = 0;
1292 
1293  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1294 
1295  for (i = 0; i < size; i += 4) {
1296  int curidx, curidx2;
1297  int t0, t1, t2, t3, t4;
1298 
1299  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1300  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1301  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1302  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1303 
1304  __asm__ volatile (
1305  ".set push \n\t"
1306  ".set noreorder \n\t"
1307 
1308  "ori %[t4], $zero, 12 \n\t"
1309  "slt %[t0], %[t4], %[qc1] \n\t"
1310  "slt %[t1], %[t4], %[qc2] \n\t"
1311  "slt %[t2], %[t4], %[qc3] \n\t"
1312  "slt %[t3], %[t4], %[qc4] \n\t"
1313  "movn %[qc1], %[t4], %[t0] \n\t"
1314  "movn %[qc2], %[t4], %[t1] \n\t"
1315  "movn %[qc3], %[t4], %[t2] \n\t"
1316  "movn %[qc4], %[t4], %[t3] \n\t"
1317 
1318  ".set pop \n\t"
1319 
1320  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1321  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1322  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1323  [t4]"=&r"(t4)
1324  );
1325 
1326  curidx = 13 * qc1;
1327  curidx += qc2;
1328 
1329  curidx2 = 13 * qc3;
1330  curidx2 += qc4;
1331 
1332  curbits += p_bits[curidx] +
1333  p_bits[curidx2] +
1334  upair12_sign_bits[curidx] +
1335  upair12_sign_bits[curidx2];
1336  }
1337  return curbits;
1338 }
1339 
1340 static float get_band_numbits_ESC_mips(struct AACEncContext *s,
1341  PutBitContext *pb, const float *in,
1342  const float *scaled, int size, int scale_idx,
1343  int cb, const float lambda, const float uplim,
1344  int *bits)
1345 {
1346  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1347  int i;
1348  int qc1, qc2, qc3, qc4;
1349  int curbits = 0;
1350 
1351  uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1352 
1353  for (i = 0; i < size; i += 4) {
1354  int curidx, curidx2;
1355  int cond0, cond1, cond2, cond3;
1356  int c1, c2, c3, c4;
1357  int t4, t5;
1358 
1359  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1360  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1361  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1362  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1363 
1364  __asm__ volatile (
1365  ".set push \n\t"
1366  ".set noreorder \n\t"
1367 
1368  "ori %[t4], $zero, 15 \n\t"
1369  "ori %[t5], $zero, 16 \n\t"
1370  "shll_s.w %[c1], %[qc1], 18 \n\t"
1371  "shll_s.w %[c2], %[qc2], 18 \n\t"
1372  "shll_s.w %[c3], %[qc3], 18 \n\t"
1373  "shll_s.w %[c4], %[qc4], 18 \n\t"
1374  "srl %[c1], %[c1], 18 \n\t"
1375  "srl %[c2], %[c2], 18 \n\t"
1376  "srl %[c3], %[c3], 18 \n\t"
1377  "srl %[c4], %[c4], 18 \n\t"
1378  "slt %[cond0], %[t4], %[qc1] \n\t"
1379  "slt %[cond1], %[t4], %[qc2] \n\t"
1380  "slt %[cond2], %[t4], %[qc3] \n\t"
1381  "slt %[cond3], %[t4], %[qc4] \n\t"
1382  "movn %[qc1], %[t5], %[cond0] \n\t"
1383  "movn %[qc2], %[t5], %[cond1] \n\t"
1384  "movn %[qc3], %[t5], %[cond2] \n\t"
1385  "movn %[qc4], %[t5], %[cond3] \n\t"
1386  "ori %[t5], $zero, 31 \n\t"
1387  "clz %[c1], %[c1] \n\t"
1388  "clz %[c2], %[c2] \n\t"
1389  "clz %[c3], %[c3] \n\t"
1390  "clz %[c4], %[c4] \n\t"
1391  "subu %[c1], %[t5], %[c1] \n\t"
1392  "subu %[c2], %[t5], %[c2] \n\t"
1393  "subu %[c3], %[t5], %[c3] \n\t"
1394  "subu %[c4], %[t5], %[c4] \n\t"
1395  "sll %[c1], %[c1], 1 \n\t"
1396  "sll %[c2], %[c2], 1 \n\t"
1397  "sll %[c3], %[c3], 1 \n\t"
1398  "sll %[c4], %[c4], 1 \n\t"
1399  "addiu %[c1], %[c1], -3 \n\t"
1400  "addiu %[c2], %[c2], -3 \n\t"
1401  "addiu %[c3], %[c3], -3 \n\t"
1402  "addiu %[c4], %[c4], -3 \n\t"
1403  "subu %[cond0], $zero, %[cond0] \n\t"
1404  "subu %[cond1], $zero, %[cond1] \n\t"
1405  "subu %[cond2], $zero, %[cond2] \n\t"
1406  "subu %[cond3], $zero, %[cond3] \n\t"
1407  "and %[c1], %[c1], %[cond0] \n\t"
1408  "and %[c2], %[c2], %[cond1] \n\t"
1409  "and %[c3], %[c3], %[cond2] \n\t"
1410  "and %[c4], %[c4], %[cond3] \n\t"
1411 
1412  ".set pop \n\t"
1413 
1414  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1415  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1416  [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
1417  [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
1418  [c1]"=&r"(c1), [c2]"=&r"(c2),
1419  [c3]"=&r"(c3), [c4]"=&r"(c4),
1420  [t4]"=&r"(t4), [t5]"=&r"(t5)
1421  );
1422 
1423  curidx = 17 * qc1;
1424  curidx += qc2;
1425 
1426  curidx2 = 17 * qc3;
1427  curidx2 += qc4;
1428 
1429  curbits += p_bits[curidx];
1430  curbits += esc_sign_bits[curidx];
1431  curbits += p_bits[curidx2];
1432  curbits += esc_sign_bits[curidx2];
1433 
1434  curbits += c1;
1435  curbits += c2;
1436  curbits += c3;
1437  curbits += c4;
1438  }
1439  return curbits;
1440 }
1441 
1442 static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
1443  PutBitContext *pb, const float *in,
1444  const float *scaled, int size, int scale_idx,
1445  int cb, const float lambda, const float uplim,
1446  int *bits) = {
1447  get_band_numbits_ZERO_mips,
1448  get_band_numbits_SQUAD_mips,
1449  get_band_numbits_SQUAD_mips,
1450  get_band_numbits_UQUAD_mips,
1451  get_band_numbits_UQUAD_mips,
1452  get_band_numbits_SPAIR_mips,
1453  get_band_numbits_SPAIR_mips,
1454  get_band_numbits_UPAIR7_mips,
1455  get_band_numbits_UPAIR7_mips,
1456  get_band_numbits_UPAIR12_mips,
1457  get_band_numbits_UPAIR12_mips,
1458  get_band_numbits_ESC_mips,
1459  get_band_numbits_NONE_mips, /* cb 12 doesn't exist */
1460  get_band_numbits_ZERO_mips,
1461  get_band_numbits_ZERO_mips,
1462  get_band_numbits_ZERO_mips,
1463 };
1464 
1465 #define get_band_numbits( \
1466  s, pb, in, scaled, size, scale_idx, cb, \
1467  lambda, uplim, bits) \
1468  get_band_numbits_arr[cb]( \
1469  s, pb, in, scaled, size, scale_idx, cb, \
1470  lambda, uplim, bits)
1471 
1472 static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
1473  const float *scaled, int size, int scale_idx,
1474  int cb, const float lambda, const float uplim,
1475  int *bits, float *energy, int rtz)
1476 {
1477  return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
1478 }
1479 
1480 /**
1481  * Functions developed from template function and optimized for getting the band cost
1482  */
1483 #if HAVE_MIPSFPU
1484 static float get_band_cost_ZERO_mips(struct AACEncContext *s,
1485  PutBitContext *pb, const float *in,
1486  const float *scaled, int size, int scale_idx,
1487  int cb, const float lambda, const float uplim,
1488  int *bits, float *energy)
1489 {
1490  int i;
1491  float cost = 0;
1492 
1493  for (i = 0; i < size; i += 4) {
1494  cost += in[i ] * in[i ];
1495  cost += in[i+1] * in[i+1];
1496  cost += in[i+2] * in[i+2];
1497  cost += in[i+3] * in[i+3];
1498  }
1499  if (bits)
1500  *bits = 0;
1501  if (energy)
1502  *energy = 0.0f;
1503  return cost * lambda;
1504 }
1505 
1506 static float get_band_cost_NONE_mips(struct AACEncContext *s,
1507  PutBitContext *pb, const float *in,
1508  const float *scaled, int size, int scale_idx,
1509  int cb, const float lambda, const float uplim,
1510  int *bits, float *energy)
1511 {
1512  av_assert0(0);
1513  return 0;
1514 }
1515 
1516 static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
1517  PutBitContext *pb, const float *in,
1518  const float *scaled, int size, int scale_idx,
1519  int cb, const float lambda, const float uplim,
1520  int *bits, float *energy)
1521 {
1522  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1523  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1524  int i;
1525  float cost = 0;
1526  float qenergy = 0.0f;
1527  int qc1, qc2, qc3, qc4;
1528  int curbits = 0;
1529 
1530  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1531  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1532 
1533  for (i = 0; i < size; i += 4) {
1534  const float *vec;
1535  int curidx;
1536  int *in_int = (int *)&in[i];
1537  float *in_pos = (float *)&in[i];
1538  float di0, di1, di2, di3;
1539  int t0, t1, t2, t3, t4, t5, t6, t7;
1540 
1541  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1542  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1543  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1544  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1545 
1546  __asm__ volatile (
1547  ".set push \n\t"
1548  ".set noreorder \n\t"
1549 
1550  "slt %[qc1], $zero, %[qc1] \n\t"
1551  "slt %[qc2], $zero, %[qc2] \n\t"
1552  "slt %[qc3], $zero, %[qc3] \n\t"
1553  "slt %[qc4], $zero, %[qc4] \n\t"
1554  "lw %[t0], 0(%[in_int]) \n\t"
1555  "lw %[t1], 4(%[in_int]) \n\t"
1556  "lw %[t2], 8(%[in_int]) \n\t"
1557  "lw %[t3], 12(%[in_int]) \n\t"
1558  "srl %[t0], %[t0], 31 \n\t"
1559  "srl %[t1], %[t1], 31 \n\t"
1560  "srl %[t2], %[t2], 31 \n\t"
1561  "srl %[t3], %[t3], 31 \n\t"
1562  "subu %[t4], $zero, %[qc1] \n\t"
1563  "subu %[t5], $zero, %[qc2] \n\t"
1564  "subu %[t6], $zero, %[qc3] \n\t"
1565  "subu %[t7], $zero, %[qc4] \n\t"
1566  "movn %[qc1], %[t4], %[t0] \n\t"
1567  "movn %[qc2], %[t5], %[t1] \n\t"
1568  "movn %[qc3], %[t6], %[t2] \n\t"
1569  "movn %[qc4], %[t7], %[t3] \n\t"
1570 
1571  ".set pop \n\t"
1572 
1573  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1574  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1575  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1576  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1577  : [in_int]"r"(in_int)
1578  : "memory"
1579  );
1580 
1581  curidx = qc1;
1582  curidx *= 3;
1583  curidx += qc2;
1584  curidx *= 3;
1585  curidx += qc3;
1586  curidx *= 3;
1587  curidx += qc4;
1588  curidx += 40;
1589 
1590  curbits += p_bits[curidx];
1591  vec = &p_codes[curidx*4];
1592 
1593  qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1594  + vec[2]*vec[2] + vec[3]*vec[3];
1595 
1596  __asm__ volatile (
1597  ".set push \n\t"
1598  ".set noreorder \n\t"
1599 
1600  "lwc1 $f0, 0(%[in_pos]) \n\t"
1601  "lwc1 $f1, 0(%[vec]) \n\t"
1602  "lwc1 $f2, 4(%[in_pos]) \n\t"
1603  "lwc1 $f3, 4(%[vec]) \n\t"
1604  "lwc1 $f4, 8(%[in_pos]) \n\t"
1605  "lwc1 $f5, 8(%[vec]) \n\t"
1606  "lwc1 $f6, 12(%[in_pos]) \n\t"
1607  "lwc1 $f7, 12(%[vec]) \n\t"
1608  "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1609  "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1610  "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1611  "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1612 
1613  ".set pop \n\t"
1614 
1615  : [di0]"=&f"(di0), [di1]"=&f"(di1),
1616  [di2]"=&f"(di2), [di3]"=&f"(di3)
1617  : [in_pos]"r"(in_pos), [vec]"r"(vec),
1618  [IQ]"f"(IQ)
1619  : "$f0", "$f1", "$f2", "$f3",
1620  "$f4", "$f5", "$f6", "$f7",
1621  "memory"
1622  );
1623 
1624  cost += di0 * di0 + di1 * di1
1625  + di2 * di2 + di3 * di3;
1626  }
1627 
1628  if (bits)
1629  *bits = curbits;
1630  if (energy)
1631  *energy = qenergy * (IQ*IQ);
1632  return cost * lambda + curbits;
1633 }
1634 
1635 static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
1636  PutBitContext *pb, const float *in,
1637  const float *scaled, int size, int scale_idx,
1638  int cb, const float lambda, const float uplim,
1639  int *bits, float *energy)
1640 {
1641  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1642  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1643  int i;
1644  float cost = 0;
1645  float qenergy = 0.0f;
1646  int curbits = 0;
1647  int qc1, qc2, qc3, qc4;
1648 
1649  uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1650  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1651 
1652  for (i = 0; i < size; i += 4) {
1653  const float *vec;
1654  int curidx;
1655  float *in_pos = (float *)&in[i];
1656  float di0, di1, di2, di3;
1657  int t0, t1, t2, t3, t4;
1658 
1659  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1660  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1661  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1662  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1663 
1664  __asm__ volatile (
1665  ".set push \n\t"
1666  ".set noreorder \n\t"
1667 
1668  "ori %[t4], $zero, 2 \n\t"
1669  "slt %[t0], %[t4], %[qc1] \n\t"
1670  "slt %[t1], %[t4], %[qc2] \n\t"
1671  "slt %[t2], %[t4], %[qc3] \n\t"
1672  "slt %[t3], %[t4], %[qc4] \n\t"
1673  "movn %[qc1], %[t4], %[t0] \n\t"
1674  "movn %[qc2], %[t4], %[t1] \n\t"
1675  "movn %[qc3], %[t4], %[t2] \n\t"
1676  "movn %[qc4], %[t4], %[t3] \n\t"
1677 
1678  ".set pop \n\t"
1679 
1680  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1681  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1682  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1683  [t4]"=&r"(t4)
1684  );
1685 
1686  curidx = qc1;
1687  curidx *= 3;
1688  curidx += qc2;
1689  curidx *= 3;
1690  curidx += qc3;
1691  curidx *= 3;
1692  curidx += qc4;
1693 
1694  curbits += p_bits[curidx];
1695  curbits += uquad_sign_bits[curidx];
1696  vec = &p_codes[curidx*4];
1697 
1698  qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1699  + vec[2]*vec[2] + vec[3]*vec[3];
1700 
1701  __asm__ volatile (
1702  ".set push \n\t"
1703  ".set noreorder \n\t"
1704 
1705  "lwc1 %[di0], 0(%[in_pos]) \n\t"
1706  "lwc1 %[di1], 4(%[in_pos]) \n\t"
1707  "lwc1 %[di2], 8(%[in_pos]) \n\t"
1708  "lwc1 %[di3], 12(%[in_pos]) \n\t"
1709  "abs.s %[di0], %[di0] \n\t"
1710  "abs.s %[di1], %[di1] \n\t"
1711  "abs.s %[di2], %[di2] \n\t"
1712  "abs.s %[di3], %[di3] \n\t"
1713  "lwc1 $f0, 0(%[vec]) \n\t"
1714  "lwc1 $f1, 4(%[vec]) \n\t"
1715  "lwc1 $f2, 8(%[vec]) \n\t"
1716  "lwc1 $f3, 12(%[vec]) \n\t"
1717  "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1718  "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1719  "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1720  "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1721 
1722  ".set pop \n\t"
1723 
1724  : [di0]"=&f"(di0), [di1]"=&f"(di1),
1725  [di2]"=&f"(di2), [di3]"=&f"(di3)
1726  : [in_pos]"r"(in_pos), [vec]"r"(vec),
1727  [IQ]"f"(IQ)
1728  : "$f0", "$f1", "$f2", "$f3",
1729  "memory"
1730  );
1731 
1732  cost += di0 * di0 + di1 * di1
1733  + di2 * di2 + di3 * di3;
1734  }
1735 
1736  if (bits)
1737  *bits = curbits;
1738  if (energy)
1739  *energy = qenergy * (IQ*IQ);
1740  return cost * lambda + curbits;
1741 }
1742 
1743 static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
1744  PutBitContext *pb, const float *in,
1745  const float *scaled, int size, int scale_idx,
1746  int cb, const float lambda, const float uplim,
1747  int *bits, float *energy)
1748 {
1749  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1750  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1751  int i;
1752  float cost = 0;
1753  float qenergy = 0.0f;
1754  int qc1, qc2, qc3, qc4;
1755  int curbits = 0;
1756 
1757  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1758  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1759 
1760  for (i = 0; i < size; i += 4) {
1761  const float *vec, *vec2;
1762  int curidx, curidx2;
1763  int *in_int = (int *)&in[i];
1764  float *in_pos = (float *)&in[i];
1765  float di0, di1, di2, di3;
1766  int t0, t1, t2, t3, t4, t5, t6, t7;
1767 
1768  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1769  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1770  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1771  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1772 
1773  __asm__ volatile (
1774  ".set push \n\t"
1775  ".set noreorder \n\t"
1776 
1777  "ori %[t4], $zero, 4 \n\t"
1778  "slt %[t0], %[t4], %[qc1] \n\t"
1779  "slt %[t1], %[t4], %[qc2] \n\t"
1780  "slt %[t2], %[t4], %[qc3] \n\t"
1781  "slt %[t3], %[t4], %[qc4] \n\t"
1782  "movn %[qc1], %[t4], %[t0] \n\t"
1783  "movn %[qc2], %[t4], %[t1] \n\t"
1784  "movn %[qc3], %[t4], %[t2] \n\t"
1785  "movn %[qc4], %[t4], %[t3] \n\t"
1786  "lw %[t0], 0(%[in_int]) \n\t"
1787  "lw %[t1], 4(%[in_int]) \n\t"
1788  "lw %[t2], 8(%[in_int]) \n\t"
1789  "lw %[t3], 12(%[in_int]) \n\t"
1790  "srl %[t0], %[t0], 31 \n\t"
1791  "srl %[t1], %[t1], 31 \n\t"
1792  "srl %[t2], %[t2], 31 \n\t"
1793  "srl %[t3], %[t3], 31 \n\t"
1794  "subu %[t4], $zero, %[qc1] \n\t"
1795  "subu %[t5], $zero, %[qc2] \n\t"
1796  "subu %[t6], $zero, %[qc3] \n\t"
1797  "subu %[t7], $zero, %[qc4] \n\t"
1798  "movn %[qc1], %[t4], %[t0] \n\t"
1799  "movn %[qc2], %[t5], %[t1] \n\t"
1800  "movn %[qc3], %[t6], %[t2] \n\t"
1801  "movn %[qc4], %[t7], %[t3] \n\t"
1802 
1803  ".set pop \n\t"
1804 
1805  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1806  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1807  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1808  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1809  : [in_int]"r"(in_int)
1810  : "memory"
1811  );
1812 
1813  curidx = 9 * qc1;
1814  curidx += qc2 + 40;
1815 
1816  curidx2 = 9 * qc3;
1817  curidx2 += qc4 + 40;
1818 
1819  curbits += p_bits[curidx];
1820  curbits += p_bits[curidx2];
1821 
1822  vec = &p_codes[curidx*2];
1823  vec2 = &p_codes[curidx2*2];
1824 
1825  qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1826  + vec2[0]*vec2[0] + vec2[1]*vec2[1];
1827 
1828  __asm__ volatile (
1829  ".set push \n\t"
1830  ".set noreorder \n\t"
1831 
1832  "lwc1 $f0, 0(%[in_pos]) \n\t"
1833  "lwc1 $f1, 0(%[vec]) \n\t"
1834  "lwc1 $f2, 4(%[in_pos]) \n\t"
1835  "lwc1 $f3, 4(%[vec]) \n\t"
1836  "lwc1 $f4, 8(%[in_pos]) \n\t"
1837  "lwc1 $f5, 0(%[vec2]) \n\t"
1838  "lwc1 $f6, 12(%[in_pos]) \n\t"
1839  "lwc1 $f7, 4(%[vec2]) \n\t"
1840  "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1841  "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1842  "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1843  "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1844 
1845  ".set pop \n\t"
1846 
1847  : [di0]"=&f"(di0), [di1]"=&f"(di1),
1848  [di2]"=&f"(di2), [di3]"=&f"(di3)
1849  : [in_pos]"r"(in_pos), [vec]"r"(vec),
1850  [vec2]"r"(vec2), [IQ]"f"(IQ)
1851  : "$f0", "$f1", "$f2", "$f3",
1852  "$f4", "$f5", "$f6", "$f7",
1853  "memory"
1854  );
1855 
1856  cost += di0 * di0 + di1 * di1
1857  + di2 * di2 + di3 * di3;
1858  }
1859 
1860  if (bits)
1861  *bits = curbits;
1862  if (energy)
1863  *energy = qenergy * (IQ*IQ);
1864  return cost * lambda + curbits;
1865 }
1866 
1867 static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
1868  PutBitContext *pb, const float *in,
1869  const float *scaled, int size, int scale_idx,
1870  int cb, const float lambda, const float uplim,
1871  int *bits, float *energy)
1872 {
1873  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1874  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1875  int i;
1876  float cost = 0;
1877  float qenergy = 0.0f;
1878  int qc1, qc2, qc3, qc4;
1879  int curbits = 0;
1880 
1881  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1882  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1883 
1884  for (i = 0; i < size; i += 4) {
1885  const float *vec, *vec2;
1886  int curidx, curidx2, sign1, count1, sign2, count2;
1887  int *in_int = (int *)&in[i];
1888  float *in_pos = (float *)&in[i];
1889  float di0, di1, di2, di3;
1890  int t0, t1, t2, t3, t4;
1891 
1892  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1893  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1894  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1895  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1896 
1897  __asm__ volatile (
1898  ".set push \n\t"
1899  ".set noreorder \n\t"
1900 
1901  "ori %[t4], $zero, 7 \n\t"
1902  "ori %[sign1], $zero, 0 \n\t"
1903  "ori %[sign2], $zero, 0 \n\t"
1904  "slt %[t0], %[t4], %[qc1] \n\t"
1905  "slt %[t1], %[t4], %[qc2] \n\t"
1906  "slt %[t2], %[t4], %[qc3] \n\t"
1907  "slt %[t3], %[t4], %[qc4] \n\t"
1908  "movn %[qc1], %[t4], %[t0] \n\t"
1909  "movn %[qc2], %[t4], %[t1] \n\t"
1910  "movn %[qc3], %[t4], %[t2] \n\t"
1911  "movn %[qc4], %[t4], %[t3] \n\t"
1912  "lw %[t0], 0(%[in_int]) \n\t"
1913  "lw %[t1], 4(%[in_int]) \n\t"
1914  "lw %[t2], 8(%[in_int]) \n\t"
1915  "lw %[t3], 12(%[in_int]) \n\t"
1916  "slt %[t0], %[t0], $zero \n\t"
1917  "movn %[sign1], %[t0], %[qc1] \n\t"
1918  "slt %[t2], %[t2], $zero \n\t"
1919  "movn %[sign2], %[t2], %[qc3] \n\t"
1920  "slt %[t1], %[t1], $zero \n\t"
1921  "sll %[t0], %[sign1], 1 \n\t"
1922  "or %[t0], %[t0], %[t1] \n\t"
1923  "movn %[sign1], %[t0], %[qc2] \n\t"
1924  "slt %[t3], %[t3], $zero \n\t"
1925  "sll %[t0], %[sign2], 1 \n\t"
1926  "or %[t0], %[t0], %[t3] \n\t"
1927  "movn %[sign2], %[t0], %[qc4] \n\t"
1928  "slt %[count1], $zero, %[qc1] \n\t"
1929  "slt %[t1], $zero, %[qc2] \n\t"
1930  "slt %[count2], $zero, %[qc3] \n\t"
1931  "slt %[t2], $zero, %[qc4] \n\t"
1932  "addu %[count1], %[count1], %[t1] \n\t"
1933  "addu %[count2], %[count2], %[t2] \n\t"
1934 
1935  ".set pop \n\t"
1936 
1937  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1938  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1939  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
1940  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
1941  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1942  [t4]"=&r"(t4)
1943  : [in_int]"r"(in_int)
1944  : "memory"
1945  );
1946 
1947  curidx = 8 * qc1;
1948  curidx += qc2;
1949 
1950  curidx2 = 8 * qc3;
1951  curidx2 += qc4;
1952 
1953  curbits += p_bits[curidx];
1954  curbits += upair7_sign_bits[curidx];
1955  vec = &p_codes[curidx*2];
1956 
1957  curbits += p_bits[curidx2];
1958  curbits += upair7_sign_bits[curidx2];
1959  vec2 = &p_codes[curidx2*2];
1960 
1961  qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1962  + vec2[0]*vec2[0] + vec2[1]*vec2[1];
1963 
1964  __asm__ volatile (
1965  ".set push \n\t"
1966  ".set noreorder \n\t"
1967 
1968  "lwc1 %[di0], 0(%[in_pos]) \n\t"
1969  "lwc1 %[di1], 4(%[in_pos]) \n\t"
1970  "lwc1 %[di2], 8(%[in_pos]) \n\t"
1971  "lwc1 %[di3], 12(%[in_pos]) \n\t"
1972  "abs.s %[di0], %[di0] \n\t"
1973  "abs.s %[di1], %[di1] \n\t"
1974  "abs.s %[di2], %[di2] \n\t"
1975  "abs.s %[di3], %[di3] \n\t"
1976  "lwc1 $f0, 0(%[vec]) \n\t"
1977  "lwc1 $f1, 4(%[vec]) \n\t"
1978  "lwc1 $f2, 0(%[vec2]) \n\t"
1979  "lwc1 $f3, 4(%[vec2]) \n\t"
1980  "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1981  "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1982  "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1983  "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1984 
1985  ".set pop \n\t"
1986 
1987  : [di0]"=&f"(di0), [di1]"=&f"(di1),
1988  [di2]"=&f"(di2), [di3]"=&f"(di3)
1989  : [in_pos]"r"(in_pos), [vec]"r"(vec),
1990  [vec2]"r"(vec2), [IQ]"f"(IQ)
1991  : "$f0", "$f1", "$f2", "$f3",
1992  "memory"
1993  );
1994 
1995  cost += di0 * di0 + di1 * di1
1996  + di2 * di2 + di3 * di3;
1997  }
1998 
1999  if (bits)
2000  *bits = curbits;
2001  if (energy)
2002  *energy = qenergy * (IQ*IQ);
2003  return cost * lambda + curbits;
2004 }
2005 
2006 static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
2007  PutBitContext *pb, const float *in,
2008  const float *scaled, int size, int scale_idx,
2009  int cb, const float lambda, const float uplim,
2010  int *bits, float *energy)
2011 {
2012  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
2013  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
2014  int i;
2015  float cost = 0;
2016  float qenergy = 0.0f;
2017  int qc1, qc2, qc3, qc4;
2018  int curbits = 0;
2019 
2020  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
2021  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
2022 
2023  for (i = 0; i < size; i += 4) {
2024  const float *vec, *vec2;
2025  int curidx, curidx2;
2026  int sign1, count1, sign2, count2;
2027  int *in_int = (int *)&in[i];
2028  float *in_pos = (float *)&in[i];
2029  float di0, di1, di2, di3;
2030  int t0, t1, t2, t3, t4;
2031 
2032  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
2033  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
2034  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
2035  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
2036 
2037  __asm__ volatile (
2038  ".set push \n\t"
2039  ".set noreorder \n\t"
2040 
2041  "ori %[t4], $zero, 12 \n\t"
2042  "ori %[sign1], $zero, 0 \n\t"
2043  "ori %[sign2], $zero, 0 \n\t"
2044  "slt %[t0], %[t4], %[qc1] \n\t"
2045  "slt %[t1], %[t4], %[qc2] \n\t"
2046  "slt %[t2], %[t4], %[qc3] \n\t"
2047  "slt %[t3], %[t4], %[qc4] \n\t"
2048  "movn %[qc1], %[t4], %[t0] \n\t"
2049  "movn %[qc2], %[t4], %[t1] \n\t"
2050  "movn %[qc3], %[t4], %[t2] \n\t"
2051  "movn %[qc4], %[t4], %[t3] \n\t"
2052  "lw %[t0], 0(%[in_int]) \n\t"
2053  "lw %[t1], 4(%[in_int]) \n\t"
2054  "lw %[t2], 8(%[in_int]) \n\t"
2055  "lw %[t3], 12(%[in_int]) \n\t"
2056  "slt %[t0], %[t0], $zero \n\t"
2057  "movn %[sign1], %[t0], %[qc1] \n\t"
2058  "slt %[t2], %[t2], $zero \n\t"
2059  "movn %[sign2], %[t2], %[qc3] \n\t"
2060  "slt %[t1], %[t1], $zero \n\t"
2061  "sll %[t0], %[sign1], 1 \n\t"
2062  "or %[t0], %[t0], %[t1] \n\t"
2063  "movn %[sign1], %[t0], %[qc2] \n\t"
2064  "slt %[t3], %[t3], $zero \n\t"
2065  "sll %[t0], %[sign2], 1 \n\t"
2066  "or %[t0], %[t0], %[t3] \n\t"
2067  "movn %[sign2], %[t0], %[qc4] \n\t"
2068  "slt %[count1], $zero, %[qc1] \n\t"
2069  "slt %[t1], $zero, %[qc2] \n\t"
2070  "slt %[count2], $zero, %[qc3] \n\t"
2071  "slt %[t2], $zero, %[qc4] \n\t"
2072  "addu %[count1], %[count1], %[t1] \n\t"
2073  "addu %[count2], %[count2], %[t2] \n\t"
2074 
2075  ".set pop \n\t"
2076 
2077  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2078  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2079  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
2080  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
2081  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
2082  [t4]"=&r"(t4)
2083  : [in_int]"r"(in_int)
2084  : "memory"
2085  );
2086 
2087  curidx = 13 * qc1;
2088  curidx += qc2;
2089 
2090  curidx2 = 13 * qc3;
2091  curidx2 += qc4;
2092 
2093  curbits += p_bits[curidx];
2094  curbits += p_bits[curidx2];
2095  curbits += upair12_sign_bits[curidx];
2096  curbits += upair12_sign_bits[curidx2];
2097  vec = &p_codes[curidx*2];
2098  vec2 = &p_codes[curidx2*2];
2099 
2100  qenergy += vec[0]*vec[0] + vec[1]*vec[1]
2101  + vec2[0]*vec2[0] + vec2[1]*vec2[1];
2102 
2103  __asm__ volatile (
2104  ".set push \n\t"
2105  ".set noreorder \n\t"
2106 
2107  "lwc1 %[di0], 0(%[in_pos]) \n\t"
2108  "lwc1 %[di1], 4(%[in_pos]) \n\t"
2109  "lwc1 %[di2], 8(%[in_pos]) \n\t"
2110  "lwc1 %[di3], 12(%[in_pos]) \n\t"
2111  "abs.s %[di0], %[di0] \n\t"
2112  "abs.s %[di1], %[di1] \n\t"
2113  "abs.s %[di2], %[di2] \n\t"
2114  "abs.s %[di3], %[di3] \n\t"
2115  "lwc1 $f0, 0(%[vec]) \n\t"
2116  "lwc1 $f1, 4(%[vec]) \n\t"
2117  "lwc1 $f2, 0(%[vec2]) \n\t"
2118  "lwc1 $f3, 4(%[vec2]) \n\t"
2119  "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
2120  "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
2121  "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
2122  "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
2123 
2124  ".set pop \n\t"
2125 
2126  : [di0]"=&f"(di0), [di1]"=&f"(di1),
2127  [di2]"=&f"(di2), [di3]"=&f"(di3)
2128  : [in_pos]"r"(in_pos), [vec]"r"(vec),
2129  [vec2]"r"(vec2), [IQ]"f"(IQ)
2130  : "$f0", "$f1", "$f2", "$f3",
2131  "memory"
2132  );
2133 
2134  cost += di0 * di0 + di1 * di1
2135  + di2 * di2 + di3 * di3;
2136  }
2137 
2138  if (bits)
2139  *bits = curbits;
2140  if (energy)
2141  *energy = qenergy * (IQ*IQ);
2142  return cost * lambda + curbits;
2143 }
2144 
2145 static float get_band_cost_ESC_mips(struct AACEncContext *s,
2146  PutBitContext *pb, const float *in,
2147  const float *scaled, int size, int scale_idx,
2148  int cb, const float lambda, const float uplim,
2149  int *bits, float *energy)
2150 {
2151  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
2152  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
2153  const float CLIPPED_ESCAPE = 165140.0f * IQ;
2154  int i;
2155  float cost = 0;
2156  float qenergy = 0.0f;
2157  int qc1, qc2, qc3, qc4;
2158  int curbits = 0;
2159 
2160  uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
2161  float *p_codes = (float* )ff_aac_codebook_vectors[cb-1];
2162 
2163  for (i = 0; i < size; i += 4) {
2164  const float *vec, *vec2;
2165  int curidx, curidx2;
2166  float t1, t2, t3, t4, V;
2167  float di1, di2, di3, di4;
2168  int cond0, cond1, cond2, cond3;
2169  int c1, c2, c3, c4;
2170  int t6, t7;
2171 
2172  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
2173  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
2174  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
2175  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
2176 
2177  __asm__ volatile (
2178  ".set push \n\t"
2179  ".set noreorder \n\t"
2180 
2181  "ori %[t6], $zero, 15 \n\t"
2182  "ori %[t7], $zero, 16 \n\t"
2183  "shll_s.w %[c1], %[qc1], 18 \n\t"
2184  "shll_s.w %[c2], %[qc2], 18 \n\t"
2185  "shll_s.w %[c3], %[qc3], 18 \n\t"
2186  "shll_s.w %[c4], %[qc4], 18 \n\t"
2187  "srl %[c1], %[c1], 18 \n\t"
2188  "srl %[c2], %[c2], 18 \n\t"
2189  "srl %[c3], %[c3], 18 \n\t"
2190  "srl %[c4], %[c4], 18 \n\t"
2191  "slt %[cond0], %[t6], %[qc1] \n\t"
2192  "slt %[cond1], %[t6], %[qc2] \n\t"
2193  "slt %[cond2], %[t6], %[qc3] \n\t"
2194  "slt %[cond3], %[t6], %[qc4] \n\t"
2195  "movn %[qc1], %[t7], %[cond0] \n\t"
2196  "movn %[qc2], %[t7], %[cond1] \n\t"
2197  "movn %[qc3], %[t7], %[cond2] \n\t"
2198  "movn %[qc4], %[t7], %[cond3] \n\t"
2199 
2200  ".set pop \n\t"
2201 
2202  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2203  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2204  [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
2205  [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
2206  [c1]"=&r"(c1), [c2]"=&r"(c2),
2207  [c3]"=&r"(c3), [c4]"=&r"(c4),
2208  [t6]"=&r"(t6), [t7]"=&r"(t7)
2209  );
2210 
2211  curidx = 17 * qc1;
2212  curidx += qc2;
2213 
2214  curidx2 = 17 * qc3;
2215  curidx2 += qc4;
2216 
2217  curbits += p_bits[curidx];
2218  curbits += esc_sign_bits[curidx];
2219  vec = &p_codes[curidx*2];
2220 
2221  curbits += p_bits[curidx2];
2222  curbits += esc_sign_bits[curidx2];
2223  vec2 = &p_codes[curidx2*2];
2224 
2225  curbits += (av_log2(c1) * 2 - 3) & (-cond0);
2226  curbits += (av_log2(c2) * 2 - 3) & (-cond1);
2227  curbits += (av_log2(c3) * 2 - 3) & (-cond2);
2228  curbits += (av_log2(c4) * 2 - 3) & (-cond3);
2229 
2230  t1 = fabsf(in[i ]);
2231  t2 = fabsf(in[i+1]);
2232  t3 = fabsf(in[i+2]);
2233  t4 = fabsf(in[i+3]);
2234 
2235  if (cond0) {
2236  if (t1 >= CLIPPED_ESCAPE) {
2237  di1 = t1 - CLIPPED_ESCAPE;
2238  qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2239  } else {
2240  di1 = t1 - (V = c1 * cbrtf(c1) * IQ);
2241  qenergy += V*V;
2242  }
2243  } else {
2244  di1 = t1 - (V = vec[0] * IQ);
2245  qenergy += V*V;
2246  }
2247 
2248  if (cond1) {
2249  if (t2 >= CLIPPED_ESCAPE) {
2250  di2 = t2 - CLIPPED_ESCAPE;
2251  qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2252  } else {
2253  di2 = t2 - (V = c2 * cbrtf(c2) * IQ);
2254  qenergy += V*V;
2255  }
2256  } else {
2257  di2 = t2 - (V = vec[1] * IQ);
2258  qenergy += V*V;
2259  }
2260 
2261  if (cond2) {
2262  if (t3 >= CLIPPED_ESCAPE) {
2263  di3 = t3 - CLIPPED_ESCAPE;
2264  qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2265  } else {
2266  di3 = t3 - (V = c3 * cbrtf(c3) * IQ);
2267  qenergy += V*V;
2268  }
2269  } else {
2270  di3 = t3 - (V = vec2[0] * IQ);
2271  qenergy += V*V;
2272  }
2273 
2274  if (cond3) {
2275  if (t4 >= CLIPPED_ESCAPE) {
2276  di4 = t4 - CLIPPED_ESCAPE;
2277  qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2278  } else {
2279  di4 = t4 - (V = c4 * cbrtf(c4) * IQ);
2280  qenergy += V*V;
2281  }
2282  } else {
2283  di4 = t4 - (V = vec2[1]*IQ);
2284  qenergy += V*V;
2285  }
2286 
2287  cost += di1 * di1 + di2 * di2
2288  + di3 * di3 + di4 * di4;
2289  }
2290 
2291  if (bits)
2292  *bits = curbits;
2293  return cost * lambda + curbits;
2294 }
2295 
2296 static float (*const get_band_cost_arr[])(struct AACEncContext *s,
2297  PutBitContext *pb, const float *in,
2298  const float *scaled, int size, int scale_idx,
2299  int cb, const float lambda, const float uplim,
2300  int *bits, float *energy) = {
2301  get_band_cost_ZERO_mips,
2302  get_band_cost_SQUAD_mips,
2303  get_band_cost_SQUAD_mips,
2304  get_band_cost_UQUAD_mips,
2305  get_band_cost_UQUAD_mips,
2306  get_band_cost_SPAIR_mips,
2307  get_band_cost_SPAIR_mips,
2308  get_band_cost_UPAIR7_mips,
2309  get_band_cost_UPAIR7_mips,
2310  get_band_cost_UPAIR12_mips,
2311  get_band_cost_UPAIR12_mips,
2312  get_band_cost_ESC_mips,
2313  get_band_cost_NONE_mips, /* cb 12 doesn't exist */
2314  get_band_cost_ZERO_mips,
2315  get_band_cost_ZERO_mips,
2316  get_band_cost_ZERO_mips,
2317 };
2318 
2319 #define get_band_cost( \
2320  s, pb, in, scaled, size, scale_idx, cb, \
2321  lambda, uplim, bits, energy) \
2322  get_band_cost_arr[cb]( \
2323  s, pb, in, scaled, size, scale_idx, cb, \
2324  lambda, uplim, bits, energy)
2325 
2326 static float quantize_band_cost(struct AACEncContext *s, const float *in,
2327  const float *scaled, int size, int scale_idx,
2328  int cb, const float lambda, const float uplim,
2329  int *bits, float *energy, int rtz)
2330 {
2331  return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits, energy);
2332 }
2333 
2335 
2337 
2338 static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
2339 {
2340  int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side;
2341  uint8_t nextband0[128], nextband1[128];
2342  float M[128], S[128];
2343  float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
2344  const float lambda = s->lambda;
2345  const float mslambda = FFMIN(1.0f, lambda / 120.f);
2346  SingleChannelElement *sce0 = &cpe->ch[0];
2347  SingleChannelElement *sce1 = &cpe->ch[1];
2348  if (!cpe->common_window)
2349  return;
2350 
2351  /** Scout out next nonzero bands */
2352  ff_init_nextband_map(sce0, nextband0);
2353  ff_init_nextband_map(sce1, nextband1);
2354 
2355  prev_mid = sce0->sf_idx[0];
2356  prev_side = sce1->sf_idx[0];
2357  for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
2358  start = 0;
2359  for (g = 0; g < sce0->ics.num_swb; g++) {
2360  float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
2361  if (!cpe->is_mask[w*16+g])
2362  cpe->ms_mask[w*16+g] = 0;
2363  if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
2364  float Mmax = 0.0f, Smax = 0.0f;
2365 
2366  /* Must compute mid/side SF and book for the whole window group */
2367  for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
2368  for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
2369  M[i] = (sce0->coeffs[start+(w+w2)*128+i]
2370  + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
2371  S[i] = M[i]
2372  - sce1->coeffs[start+(w+w2)*128+i];
2373  }
2374  abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
2375  abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
2376  for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
2377  Mmax = FFMAX(Mmax, M34[i]);
2378  Smax = FFMAX(Smax, S34[i]);
2379  }
2380  }
2381 
2382  for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
2383  float dist1 = 0.0f, dist2 = 0.0f;
2384  int B0 = 0, B1 = 0;
2385  int minidx;
2386  int mididx, sididx;
2387  int midcb, sidcb;
2388 
2389  minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
2390  mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
2391  sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
2392  if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
2393  && ( !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
2394  || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
2395  /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
2396  continue;
2397  }
2398 
2399  midcb = find_min_book(Mmax, mididx);
2400  sidcb = find_min_book(Smax, sididx);
2401 
2402  /* No CB can be zero */
2403  midcb = FFMAX(1,midcb);
2404  sidcb = FFMAX(1,sidcb);
2405 
2406  for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
2407  FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
2408  FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
2409  float minthr = FFMIN(band0->threshold, band1->threshold);
2410  int b1,b2,b3,b4;
2411  for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
2412  M[i] = (sce0->coeffs[start+(w+w2)*128+i]
2413  + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
2414  S[i] = M[i]
2415  - sce1->coeffs[start+(w+w2)*128+i];
2416  }
2417 
2418  abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
2419  abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
2420  abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
2421  abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
2422  dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
2423  L34,
2424  sce0->ics.swb_sizes[g],
2425  sce0->sf_idx[w*16+g],
2426  sce0->band_type[w*16+g],
2427  lambda / band0->threshold, INFINITY, &b1, NULL, 0);
2428  dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
2429  R34,
2430  sce1->ics.swb_sizes[g],
2431  sce1->sf_idx[w*16+g],
2432  sce1->band_type[w*16+g],
2433  lambda / band1->threshold, INFINITY, &b2, NULL, 0);
2434  dist2 += quantize_band_cost(s, M,
2435  M34,
2436  sce0->ics.swb_sizes[g],
2437  mididx,
2438  midcb,
2439  lambda / minthr, INFINITY, &b3, NULL, 0);
2440  dist2 += quantize_band_cost(s, S,
2441  S34,
2442  sce1->ics.swb_sizes[g],
2443  sididx,
2444  sidcb,
2445  mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
2446  B0 += b1+b2;
2447  B1 += b3+b4;
2448  dist1 -= b1+b2;
2449  dist2 -= b3+b4;
2450  }
2451  cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
2452  if (cpe->ms_mask[w*16+g]) {
2453  if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
2454  sce0->sf_idx[w*16+g] = mididx;
2455  sce1->sf_idx[w*16+g] = sididx;
2456  sce0->band_type[w*16+g] = midcb;
2457  sce1->band_type[w*16+g] = sidcb;
2458  } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) {
2459  /* ms_mask unneeded, and it confuses some decoders */
2460  cpe->ms_mask[w*16+g] = 0;
2461  }
2462  break;
2463  } else if (B1 > B0) {
2464  /* More boost won't fix this */
2465  break;
2466  }
2467  }
2468  }
2469  if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
2470  prev_mid = sce0->sf_idx[w*16+g];
2471  if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
2472  prev_side = sce1->sf_idx[w*16+g];
2473  start += sce0->ics.swb_sizes[g];
2474  }
2475  }
2476 }
2477 #endif /*HAVE_MIPSFPU */
2478 
2480 
2481 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
2482 #endif /* HAVE_INLINE_ASM */
2483 
2485 #if HAVE_INLINE_ASM
2486 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
2487  AACCoefficientsEncoder *e = c->coder;
2488  int option = c->options.coder;
2489 
2490  if (option == 2) {
2491  e->quantize_and_encode_band = quantize_and_encode_band_mips;
2493 #if HAVE_MIPSFPU
2495 #endif /* HAVE_MIPSFPU */
2496  }
2497 #if HAVE_MIPSFPU
2498  e->search_for_ms = search_for_ms_mips;
2499 #endif /* HAVE_MIPSFPU */
2500 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
2501 #endif /* HAVE_INLINE_ASM */
2502 }
#define NULL
Definition: coverity.c:32
const char * s
Definition: avisynth_c.h:768
const AACCoefficientsEncoder * coder
Definition: aacenc.h:397
Band types following are encoded differently from others.
Definition: aac.h:86
int coder
Definition: aacenc.h:44
AAC encoder trellis codebook selector.
static void abs_pow34_v(float *out, const float *in, const int size)
Definition: aacenc_utils.h:40
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:207
const char * g
Definition: vf_curves.c:112
FFPsyBand psy_bands[PSY_MAX_BANDS]
channel bands information
Definition: psymodel.h:61
#define SCALE_MAX_POS
scalefactor index maximum value
Definition: aac.h:150
int av_log2(unsigned v)
Definition: intmath.c:26
static av_always_inline float bval2bmax(float b)
approximates exp10f(-3.0f*(0.5f + 0.5f * cosf(FFMIN(b,15.5f) / 15.5f)))
Definition: aacenc_utils.h:188
int common_window
Set if channels share a common 'IndividualChannelStream' in bitstream.
Definition: aac.h:278
int prev_idx
pointer to the previous path point
Definition: aaccoder.c:69
uint8_t ms_mask[128]
Set if mid/side stereo is used for each scalefactor window band.
Definition: aac.h:281
float lambda
Definition: aacenc.h:400
#define ROUND_TO_ZERO
Definition: aacenc_utils.h:37
Spectral data are scaled white noise not coded in the bitstream.
Definition: aac.h:87
#define quantize_and_encode_band_cost(s, pb, in, quant, scaled, size, scale_idx, cb, lambda, uplim, bits, energy, rtz)
static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda)
#define B1
Definition: faandct.c:41
#define t7
Definition: regdef.h:35
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
static int ff_sfdelta_can_replace(const SingleChannelElement *sce, const uint8_t *nextband, int prev_sf, int new_sf, int band)
Definition: aacenc_utils.h:246
static double cb(void *priv, double x, double y)
Definition: vf_geq.c:112
AACEncOptions options
encoding options
Definition: aacenc.h:378
#define M(a, b)
Definition: vp3dsp.c:44
AAC encoder context.
Definition: aacenc.h:376
uint8_t
SingleChannelElement ch[2]
Definition: aac.h:284
#define t0
Definition: regdef.h:28
void ff_aac_coder_init_mips(AACEncContext *c)
static const uint64_t c1
Definition: murmur3.c:49
static float(*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy)
single band psychoacoustic information
Definition: psymodel.h:50
ptrdiff_t size
Definition: opengl_enc.c:101
#define S(s, c, i)
#define t1
Definition: regdef.h:29
#define t3
Definition: regdef.h:31
GLsizei count
Definition: opengl_enc.c:109
int num_swb
number of scalefactor window bands
Definition: aac.h:183
#define FFMAX(a, b)
Definition: common.h:94
float cost
path cost
Definition: aaccoder.c:70
void(* search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s, SingleChannelElement *sce, const float lambda)
Definition: aacenc.h:57
const float *const ff_aac_codebook_vectors[]
Definition: aactab.c:918
float ff_aac_pow2sf_tab[428]
Definition: aactab.c:35
#define SCALE_DIV_512
scalefactor difference that corresponds to scale difference in 512 times
Definition: aac.h:148
float ff_aac_pow34sf_tab[428]
Definition: aactab.c:36
int cur_channel
current channel for coder context
Definition: aacenc.h:398
const uint8_t *const ff_aac_spectral_bits[11]
Definition: aactab.c:422
#define FFMIN(a, b)
Definition: common.h:96
uint8_t w
Definition: llviddspenc.c:38
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
static void ff_init_nextband_map(const SingleChannelElement *sce, uint8_t *nextband)
Definition: aacenc_utils.h:199
void(* quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, int size, int scale_idx, int cb, const float lambda, int rtz)
Definition: aacenc.h:61
AAC definitions and structures.
AAC encoder twoloop coder.
static int quantize_band_cost_bits(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int rtz)
PutBitContext pb
Definition: aacenc.h:379
#define ROUND_STANDARD
Definition: aacenc_utils.h:36
Libavcodec external API header.
void(* search_for_ms)(struct AACEncContext *s, ChannelElement *cpe)
Definition: aacenc.h:77
static int find_min_book(float maxval, int sf)
Definition: aacenc_utils.h:92
IndividualChannelStream ics
Definition: aac.h:249
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> in
void(* encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda)
Definition: aacenc.h:59
static av_always_inline float cbrtf(float x)
Definition: libm.h:61
structure used in optimal codebook search
Definition: aaccoder.c:68
uint8_t group_len[8]
Definition: aac.h:179
Replacements for frequently missing libm functions.
option
Definition: libkvazaar.c:282
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
Definition: aac.h:182
#define t5
Definition: regdef.h:33
FFPsyContext psy
Definition: aacenc.h:395
AAC encoder data.
uint8_t zeroes[128]
band is not coded (used by encoder)
Definition: aac.h:257
int sf_idx[128]
scalefactor indices (used by encoder)
Definition: aac.h:256
INTFLOAT coeffs[1024]
coefficients for IMDCT, maybe processed
Definition: aac.h:262
#define SCALE_ONE_POS
scalefactor index that corresponds to scale=1.0
Definition: aac.h:149
static void search_for_quantizers_twoloop(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, const float lambda)
two-loop quantizers search taken from ISO 13818-7 Appendix C
AAC encoder utilities.
#define t6
Definition: regdef.h:34
Single Channel Element - used for both SCE and LFE elements.
Definition: aac.h:248
static double c[64]
ChannelElement * cpe
channel elements
Definition: aacenc.h:394
static const uint64_t c2
Definition: murmur3.c:50
channel element - generic struct for SCE/CPE/CCE/LFE
Definition: aac.h:275
const uint16_t *const ff_aac_spectral_codes[11]
Definition: aactab.c:417
#define t4
Definition: regdef.h:32
int len
FFPsyChannel * ch
single channel information
Definition: psymodel.h:93
enum BandType band_type[128]
band types
Definition: aac.h:252
AAC encoder quantization misc reusable function templates.
#define POW_SF2_ZERO
ff_aac_pow2sf_tab index corresponding to pow(2, 0);
Definition: aac.h:154
FILE * out
Definition: movenc.c:54
void INT64 start
Definition: avisynth_c.h:690
static float quantize_band_cost(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int rtz)
uint8_t is_mask[128]
Set if intensity stereo is used (used by encoder)
Definition: aac.h:282
float threshold
Definition: psymodel.h:53
#define INFINITY
Definition: mathematics.h:67
AAC data declarations.
float scoefs[1024]
scaled coefficients
Definition: aacenc.h:408
#define B0
Definition: faandct.c:40
#define t2
Definition: regdef.h:30
#define V
Definition: avdct.c:30
bitstream writer API