Go to the documentation of this file.
   44 #define PSY_3GPP_THR_SPREAD_HI   1.5f // spreading factor for low-to-hi threshold spreading  (15 dB/Bark) 
   45 #define PSY_3GPP_THR_SPREAD_LOW  3.0f // spreading factor for hi-to-low threshold spreading  (30 dB/Bark) 
   47 #define PSY_3GPP_EN_SPREAD_HI_L1 2.0f 
   49 #define PSY_3GPP_EN_SPREAD_HI_L2 1.5f 
   51 #define PSY_3GPP_EN_SPREAD_HI_S  1.5f 
   53 #define PSY_3GPP_EN_SPREAD_LOW_L 3.0f 
   55 #define PSY_3GPP_EN_SPREAD_LOW_S 2.0f 
   57 #define PSY_3GPP_RPEMIN      0.01f 
   58 #define PSY_3GPP_RPELEV      2.0f 
   60 #define PSY_3GPP_C1          3.0f            
   61 #define PSY_3GPP_C2          1.3219281f      
   62 #define PSY_3GPP_C3          0.55935729f     
   64 #define PSY_SNR_1DB          7.9432821e-1f   
   65 #define PSY_SNR_25DB         3.1622776e-3f   
   67 #define PSY_3GPP_SAVE_SLOPE_L  -0.46666667f 
   68 #define PSY_3GPP_SAVE_SLOPE_S  -0.36363637f 
   69 #define PSY_3GPP_SAVE_ADD_L    -0.84285712f 
   70 #define PSY_3GPP_SAVE_ADD_S    -0.75f 
   71 #define PSY_3GPP_SPEND_SLOPE_L  0.66666669f 
   72 #define PSY_3GPP_SPEND_SLOPE_S  0.81818181f 
   73 #define PSY_3GPP_SPEND_ADD_L   -0.35f 
   74 #define PSY_3GPP_SPEND_ADD_S   -0.26111111f 
   75 #define PSY_3GPP_CLIP_LO_L      0.2f 
   76 #define PSY_3GPP_CLIP_LO_S      0.2f 
   77 #define PSY_3GPP_CLIP_HI_L      0.95f 
   78 #define PSY_3GPP_CLIP_HI_S      0.75f 
   80 #define PSY_3GPP_AH_THR_LONG    0.5f 
   81 #define PSY_3GPP_AH_THR_SHORT   0.63f 
   83 #define PSY_PE_FORGET_SLOPE  511 
   91 #define PSY_3GPP_BITS_TO_PE(bits) ((bits) * 1.18f) 
   92 #define PSY_3GPP_PE_TO_BITS(bits) ((bits) / 1.18f) 
   95 #define PSY_LAME_FIR_LEN 21          
   96 #define AAC_BLOCK_SIZE_LONG 1024    
 
   97 #define AAC_BLOCK_SIZE_SHORT 128    
 
   98 #define AAC_NUM_BLOCKS_SHORT 8      
 
   99 #define PSY_LAME_NUM_SUBBLOCKS 3    
 
  220     -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
 
  221     -3.36639e-17 * 2, -0.0438162 * 2,  -1.54175e-17 * 2, 0.0931738 * 2,
 
  222     -5.52212e-17 * 2, -0.313819 * 2
 
  235     int lower_range = 12, upper_range = 12;
 
  243     for (
i = 1; 
i < 13; 
i++) {
 
  284     return 13.3f * 
atanf(0.00076
f * 
f) + 3.5f * 
atanf((
f / 7500.0
f) * (
f / 7500.0
f));
 
  295     return    3.64 * pow(
f, -0.8)
 
  296             - 6.8  * 
exp(-0.6  * (
f - 3.4) * (
f - 3.4))
 
  297             + 6.0  * 
exp(-0.15 * (
f - 8.7) * (
f - 8.7))
 
  298             + (0.6 + 0.04 * add) * 0.001 * 
f * 
f * 
f * 
f;
 
  305     float prev, minscale, minath, minsnr, pe_min;
 
  309     const float num_bark   = 
calc_bark((
float)bandwidth);
 
  315     if (!
ctx->model_priv_data)
 
  317     pctx = 
ctx->model_priv_data;
 
  322         chan_bitrate = (
int)(chan_bitrate / 120.0 * (
ctx->avctx->global_quality ? 
ctx->avctx->global_quality : 120));
 
  330     ctx->bitres.size  -= 
ctx->bitres.size % 8;
 
  333     for (j = 0; j < 2; j++) {
 
  335         const uint8_t *band_sizes = 
ctx->bands[j];
 
  336         float line_to_frequency = 
ctx->avctx->sample_rate / (j ? 256.f : 2048.0f);
 
  337         float avg_chan_bits = chan_bitrate * (j ? 128.0f : 1024.0f) / 
ctx->avctx->sample_rate;
 
  346         for (
g = 0; 
g < 
ctx->num_bands[j]; 
g++) {
 
  349             coeffs[
g].
barks = (bark + prev) / 2.0;
 
  352         for (
g = 0; 
g < 
ctx->num_bands[j] - 1; 
g++) {
 
  354             float bark_width = coeffs[
g+1].
barks - coeffs->
barks;
 
  357             coeff->spread_low[1] = 
ff_exp10(-bark_width * en_spread_low);
 
  359             pe_min = bark_pe * bark_width;
 
  360             minsnr = 
exp2(pe_min / band_sizes[
g]) - 1.5f;
 
  364         for (
g = 0; 
g < 
ctx->num_bands[j]; 
g++) {
 
  365             minscale = 
ath(start * line_to_frequency, 
ATH_ADD);
 
  366             for (
i = 1; 
i < band_sizes[
g]; 
i++)
 
  368             coeffs[
g].
ath = minscale - minath;
 
  369             start += band_sizes[
g];
 
  401     0xB6, 0x6C, 0xD8, 0xB2, 0x66, 0xC6, 0x96, 0x36, 0x36
 
  409                                                  const int16_t *audio,
 
  415     int attack_ratio     = br <= 16000 ? 18 : 10;
 
  418     uint8_t grouping     = 0;
 
  424         int switch_to_eight = 0;
 
  425         float sum = 0.0, sum2 = 0.0;
 
  428         for (
i = 0; 
i < 8; 
i++) {
 
  429             for (j = 0; j < 128; j++) {
 
  436         for (
i = 0; 
i < 8; 
i++) {
 
  437             if (
s[
i] > pch->win_energy * attack_ratio) {
 
  443         pch->win_energy = pch->win_energy*7/8 + sum2/64;
 
  445         wi.window_type[1] = prev_type;
 
  453             grouping = pch->next_grouping;
 
  469         pch->next_window_seq = next_type;
 
  471         for (
i = 0; 
i < 3; 
i++)
 
  472             wi.window_type[
i] = prev_type;
 
  483         for (
i = 0; 
i < 8; 
i++) {
 
  484             if (!((grouping >> 
i) & 1))
 
  486             wi.grouping[lastgrp]++;
 
  503     float clipped_pe, bit_save, bit_spend, bit_factor, fill_level, forgetful_min_pe;
 
  507     fill_level = 
av_clipf((
float)
ctx->fill_level / 
size, clip_low, clip_high);
 
  509     bit_save   = (fill_level + bitsave_add) * bitsave_slope;
 
  510     assert(bit_save <= 0.3f && bit_save >= -0.05000001
f);
 
  511     bit_spend  = (fill_level + bitspend_add) * bitspend_slope;
 
  512     assert(bit_spend <= 0.5f && bit_spend >= -0.1
f);
 
  519     bit_factor = 1.0f - bit_save + ((bit_spend - bit_save) / (
ctx->pe.max - 
ctx->pe.min)) * (clipped_pe - 
ctx->pe.min);
 
  527     ctx->pe.min = 
FFMIN(pe, forgetful_min_pe);
 
  533         ctx->frame_bits * bit_factor,
 
  563     float thr_avg, reduction;
 
  565     if(active_lines == 0.0)
 
  568     thr_avg   = 
exp2f((
a - pe) / (4.0
f * active_lines));
 
  569     reduction = 
exp2f((
a - desired_pe) / (4.0
f * active_lines)) - thr_avg;
 
  571     return FFMAX(reduction, 0.0
f);
 
  577     float thr = band->
thr;
 
  581         thr = 
sqrtf(thr) + reduction;
 
  599 #ifndef calc_thr_3gpp 
  601                           const uint8_t *band_sizes, 
const float *coefs, 
const int cutoff)
 
  604     int start = 0, wstart = 0;
 
  607         for (
g = 0; 
g < num_bands; 
g++) {
 
  610             float form_factor = 0.0f;
 
  613             if (wstart < cutoff) {
 
  614                 for (
i = 0; 
i < band_sizes[
g]; 
i++) {
 
  615                     band->
energy += coefs[start+
i] * coefs[start+
i];
 
  623             start += band_sizes[
g];
 
  624             wstart += band_sizes[
g];
 
  630 #ifndef psy_hp_filter 
  644         hpfsmpl[
i] = (sum1 + sum2) * 32768.0
f;
 
  658     float desired_bits, desired_pe, delta_pe, reduction= 
NAN, spread_en[128] = {0};
 
  659     float a = 0.0f, active_lines = 0.0f, norm_fac = 0.0f;
 
  660     float pe = pctx->chan_bitrate > 32000 ? 0.0f : 
FFMAX(50.0
f, 100.0
f - pctx->chan_bitrate * 100.0f / 32000.0f);
 
  661     const int      num_bands   = 
ctx->num_bands[wi->num_windows == 8];
 
  662     const uint8_t *band_sizes  = 
ctx->bands[wi->num_windows == 8];
 
  663     AacPsyCoeffs  *coeffs      = pctx->psy_coef[wi->num_windows == 8];
 
  666     const int cutoff           = bandwidth * 2048 / wi->num_windows / 
ctx->avctx->sample_rate;
 
  669     calc_thr_3gpp(wi, num_bands, pch, band_sizes, coefs, cutoff);
 
  672     for (
w = 0; 
w < wi->num_windows*16; 
w += 16) {
 
  676         spread_en[0] = 
bands[0].energy;
 
  677         for (
g = 1; 
g < num_bands; 
g++) {
 
  679             spread_en[
w+
g] = 
FFMAX(
bands[
g].energy, spread_en[
w+
g-1] * coeffs[
g].spread_hi[1]);
 
  681         for (
g = num_bands - 2; 
g >= 0; 
g--) {
 
  683             spread_en[
w+
g] = 
FFMAX(spread_en[
w+
g], spread_en[
w+
g+1] * coeffs[
g].spread_low[1]);
 
  686         for (
g = 0; 
g < num_bands; 
g++) {
 
  701             if (spread_en[
w+
g] * avoid_hole_thr > band->
energy || coeffs[
g].min_snr > 1.0f)
 
  714         desired_pe = pe * (
ctx->avctx->global_quality ? 
ctx->avctx->global_quality : 120) / (2 * 2.5
f * 120.0
f);
 
  719         if (
ctx->bitres.bits > 0) {
 
  724         pctx->pe.max = 
FFMAX(pe, pctx->pe.max);
 
  725         pctx->pe.min = 
FFMIN(pe, pctx->pe.min);
 
  734         if (
ctx->bitres.bits > 0)
 
  739     ctx->bitres.alloc = desired_bits;
 
  741     if (desired_pe < pe) {
 
  743         for (
w = 0; 
w < wi->num_windows*16; 
w += 16) {
 
  748             for (
g = 0; 
g < num_bands; 
g++) {
 
  760         for (
i = 0; 
i < 2; 
i++) {
 
  761             float pe_no_ah = 0.0f, desired_pe_no_ah;
 
  762             active_lines = 
a = 0.0f;
 
  763             for (
w = 0; 
w < wi->num_windows*16; 
w += 16) {
 
  764                 for (
g = 0; 
g < num_bands; 
g++) {
 
  768                         pe_no_ah += band->
pe;
 
  774             desired_pe_no_ah = 
FFMAX(desired_pe - (pe - pe_no_ah), 0.0
f);
 
  775             if (active_lines > 0.0
f)
 
  779             for (
w = 0; 
w < wi->num_windows*16; 
w += 16) {
 
  780                 for (
g = 0; 
g < num_bands; 
g++) {
 
  783                     if (active_lines > 0.0
f)
 
  786                     if (band->
thr > 0.0f)
 
  793             delta_pe = desired_pe - pe;
 
  794             if (
fabs(delta_pe) > 0.05
f * desired_pe)
 
  798         if (pe < 1.15
f * desired_pe) {
 
  800             norm_fac = norm_fac ? 1.0f / norm_fac : 0;
 
  801             for (
w = 0; 
w < wi->num_windows*16; 
w += 16) {
 
  802                 for (
g = 0; 
g < num_bands; 
g++) {
 
  806                         float delta_sfb_pe = band->
norm_fac * norm_fac * delta_pe;
 
  807                         float thr = band->
thr;
 
  819             while (pe > desired_pe && 
g--) {
 
  820                 for (
w = 0; 
w < wi->num_windows*16; 
w+= 16) {
 
  833     for (
w = 0; 
w < wi->num_windows*16; 
w += 16) {
 
  834         for (
g = 0; 
g < num_bands; 
g++) {
 
  845     memcpy(pch->prev_band, pch->band, 
sizeof(pch->band));
 
  854     for (ch = 0; ch < group->
num_ch; ch++)
 
  881     ctx->next_window_seq = blocktype;
 
  885                                        const float *la, 
int channel, 
int prev_type)
 
  890     int uselongblock = 1;
 
  897         const float *pf = hpfsmpl;
 
  912             energy_short[0] += energy_subshort[
i];
 
  918             for (; pf < pfe; pf++)
 
  929             if (p > energy_subshort[
i + 1])
 
  930                 p = p / energy_subshort[
i + 1];
 
  931             else if (energy_subshort[
i + 1] > p * 10.0
f)
 
  932                 p = energy_subshort[
i + 1] / (p * 10.0f);
 
  941                 if (attack_intensity[
i] > pch->attack_threshold)
 
  949             const float u = energy_short[
i - 1];
 
  950             const float v = energy_short[
i];
 
  951             const float m = 
FFMAX(
u, v);
 
  953                 if (
u < 1.7
f * v && v < 1.7
f * 
u) {   
 
  954                     if (
i == 1 && attacks[0] < attacks[
i])
 
  959             att_sum += attacks[
i];
 
  962         if (attacks[0] <= pch->prev_attack)
 
  965         att_sum += attacks[0];
 
  967         if (pch->prev_attack == 3 || att_sum) {
 
  971                 if (attacks[
i] && attacks[
i-1])
 
  996         for (
i = 0; 
i < 8; 
i++) {
 
  997             if (!((pch->next_grouping >> 
i) & 1))
 
 1009     for (
i = 0; 
i < 9; 
i++) {
 
 1017     pch->prev_attack = attacks[8];
 
 1024     .
name    = 
"3GPP TS 26.403-inspired model",
 
  
float spread_low[2]
spreading factor for low-to-high threshold spreading in long frame
 
static av_always_inline double ff_exp10(double x)
Compute 10^x for floating point values.
 
static av_cold int psy_3gpp_init(FFPsyContext *ctx)
 
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
 
static av_unused FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type)
Tell encoder which window types to use.
 
static float lame_calc_attack_threshold(int bitrate)
Calculate the ABR attack threshold from the above LAME psymodel table.
 
#define u(width, name, range_min, range_max)
 
#define PSY_PE_FORGET_SLOPE
 
static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type)
 
float thr
energy threshold
 
static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch, const uint8_t *band_sizes, const float *coefs, const int cutoff)
 
#define PSY_3GPP_PE_TO_BITS(bits)
 
#define AV_CODEC_FLAG_QSCALE
Use fixed qscale.
 
static av_cold float calc_bark(float f)
Calculate Bark value for given line.
 
float nz_lines
number of non-zero spectral lines
 
#define PSY_3GPP_CLIP_LO_S
 
#define PSY_3GPP_AH_THR_LONG
 
int window_shape
window shape (sine/KBD/whatever)
 
static float calc_pe_3gpp(AacPsyBand *band)
 
float min
minimum allowed PE for bit factor calculation
 
#define PSY_3GPP_SPEND_SLOPE_L
 
#define PSY_3GPP_THR_SPREAD_HI
constants for 3GPP AAC psychoacoustic model
 
int fill_level
bit reservoir fill level
 
int nb_channels
Number of channels in this layout.
 
float spread_hi[2]
spreading factor for high-to-low threshold spreading in long frame
 
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about quality
 
static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
 
psychoacoustic model frame type-dependent coefficients
 
AVChannelLayout ch_layout
Audio channel layout.
 
static av_cold void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx)
LAME psy model specific initialization.
 
float st_lrm
short threshold for L, R, and M channels
 
#define PSY_3GPP_EN_SPREAD_HI_S
 
#define PSY_3GPP_SPEND_ADD_L
 
int flags
AV_CODEC_FLAG_*.
 
float barks
Bark value for each spectral band in long frame.
 
float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT *PSY_LAME_NUM_SUBBLOCKS]
 
static __device__ float fabsf(float a)
 
windowing related information
 
int64_t bit_rate
Total stream bitrate in bit/s, 0 if not available.
 
float previous
allowed PE of the previous frame
 
const FFPsyModel ff_aac_psy_model
 
uint8_t num_ch
number of channels in this group
 
LAME psy model preset struct.
 
#define PSY_3GPP_CLIP_HI_S
 
information for single band used by 3GPP TS26.403-inspired psychoacoustic model
 
int global_quality
Global quality for codecs which cannot change it per frame.
 
int flags
Flags modifying the (de)muxer behaviour.
 
struct AacPsyContext::@13 pe
 
int quality
Quality to map the rest of the vaules to.
 
float pe_const
constant part of the PE calculation
 
3GPP TS26.403-inspired psychoacoustic model specific data
 
static float calc_reduction_3gpp(float a, float desired_pe, float pe, float active_lines)
 
static const uint8_t window_grouping[9]
window grouping information stored as bits (0 - new group, 1 - group continues)
 
#define AAC_BLOCK_SIZE_SHORT
short block size
 
static const float bands[]
 
static av_cold float ath(float f, float add)
Calculate ATH value for given frequency.
 
static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size, int short_window)
 
#define PSY_3GPP_AH_THR_SHORT
 
static void psy_hp_filter(const float *firbuf, float *hpfsmpl, const float *psy_fir_coeffs)
 
static float iir_filter(int in, float state[2])
IIR filter used in block switching decision.
 
static const PsyLamePreset psy_vbr_map[]
LAME psy model preset table for constant quality.
 
int window_type[3]
window type (short/long/transitional, etc.) - current, previous and next
 
static __device__ float fabs(float a)
 
static const PsyLamePreset psy_abr_map[]
LAME psy model preset table for ABR.
 
int64_t bit_rate
the average bitrate
 
static av_cold void psy_3gpp_end(FFPsyContext *apc)
 
#define PSY_3GPP_BITS_TO_PE(bits)
 
single band psychoacoustic information
 
static __device__ float sqrtf(float a)
 
int grouping[8]
window grouping (for e.g. AAC)
 
float max
maximum allowed PE for bit factor calculation
 
float iir_state[2]
hi-pass IIR filter state
 
AacPsyCoeffs psy_coef[2][64]
 
float thr_quiet
threshold in quiet
 
#define AAC_BLOCK_SIZE_LONG
long block size
 
AacPsyBand band[128]
bands information
 
static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr, float reduction)
 
float ath
absolute threshold of hearing per bands
 
float active_lines
number of active spectral lines
 
#define AAC_NUM_BLOCKS_SHORT
number of blocks in a short sequence
 
#define PSY_LAME_FIR_LEN
LAME psy model FIR order.
 
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
 
#define PSY_3GPP_CLIP_LO_L
 
int avoid_holes
hole avoidance flag
 
#define PSY_3GPP_THR_SPREAD_LOW
 
#define PSY_3GPP_SAVE_ADD_S
 
#define PSY_3GPP_SPEND_ADD_S
 
static const float psy_fir_coeffs[]
LAME psy model FIR coefficient table.
 
float attack_threshold
attack threshold for this channel
 
#define i(width, name, range_min, range_max)
 
float norm_fac
normalization factor for linearization
 
#define PSY_3GPP_CLIP_HI_L
 
float pe
perceptual entropy
 
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
 
void * av_calloc(size_t nmemb, size_t size)
 
psychoacoustic information for an arbitrary group of channels
 
enum WindowSequence next_window_seq
window sequence to be used in the next frame
 
float win_energy
sliding average of channel energy
 
single/pair channel context for psychoacoustic model
 
float correction
PE correction factor.
 
void * model_priv_data
psychoacoustic model implementation private data
 
#define PSY_3GPP_SAVE_SLOPE_S
 
#define PSY_3GPP_EN_SPREAD_HI_L1
 
uint8_t next_grouping
stored grouping scheme for the next frame (in case of 8 short window sequence)
 
main external API structure.
 
#define PSY_LAME_NUM_SUBBLOCKS
Number of sub-blocks in each short block.
 
float global_quality
normalized global quality taken from avctx
 
static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, const float *coefs, const FFPsyWindowInfo *wi)
Calculate band thresholds as suggested in 3GPP TS26.403.
 
codec-specific psychoacoustic model implementation
 
int frame_bits
average bits per frame
 
FFPsyChannelGroup * ff_psy_find_group(FFPsyContext *ctx, int channel)
Determine what group a channel belongs to.
 
static void psy_3gpp_analyze(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi)
 
#define PSY_3GPP_EN_SPREAD_LOW_L
 
int chan_bitrate
bitrate per channel
 
#define PSY_3GPP_SAVE_SLOPE_L
 
static const double coeff[2][5]
 
#define PSY_3GPP_SPEND_SLOPE_S
 
#define FF_QP2LAMBDA
factor to convert from H.263 QP to lambda
 
#define PSY_3GPP_EN_SPREAD_LOW_S
 
int prev_attack
attack value for the last short block in the previous sequence
 
context used by psychoacoustic model
 
AacPsyBand prev_band[128]
bands information from the previous frame
 
int num_windows
number of windows in a frame
 
#define PSY_3GPP_SAVE_ADD_L