Go to the documentation of this file.
   47 #define FRAME_SIZE_SHIFT 2 
   48 #define FRAME_SIZE (120<<FRAME_SIZE_SHIFT) 
   49 #define WINDOW_SIZE (2*FRAME_SIZE) 
   50 #define FREQ_SIZE (FRAME_SIZE + 1) 
   52 #define PITCH_MIN_PERIOD 60 
   53 #define PITCH_MAX_PERIOD 768 
   54 #define PITCH_FRAME_SIZE 960 
   55 #define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE) 
   57 #define SQUARE(x) ((x)*(x)) 
   62 #define NB_DELTA_CEPS 6 
   64 #define NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2) 
   66 #define WEIGHTS_SCALE (1.f/256) 
   68 #define MAX_NEURONS 128 
   70 #define ACTIVATION_TANH    0 
   71 #define ACTIVATION_SIGMOID 1 
   72 #define ACTIVATION_RELU    2 
  154 #define F_ACTIVATION_TANH       0 
  155 #define F_ACTIVATION_SIGMOID    1 
  156 #define F_ACTIVATION_RELU       2 
  160 #define FREE_MAYBE(ptr) do { if (ptr) free(ptr); } while (0) 
  161 #define FREE_DENSE(name) do { \ 
  163         av_free((void *) model->name->input_weights); \ 
  164         av_free((void *) model->name->bias); \ 
  165         av_free((void *) model->name); \ 
  168 #define FREE_GRU(name) do { \ 
  170         av_free((void *) model->name->input_weights); \ 
  171         av_free((void *) model->name->recurrent_weights); \ 
  172         av_free((void *) model->name->bias); \ 
  173         av_free((void *) model->name); \ 
  199     if (fscanf(
f, 
"rnnoise-nu model file version %d\n", &in) != 1 || in != 1)
 
  206 #define ALLOC_LAYER(type, name) \ 
  207     name = av_calloc(1, sizeof(type)); \ 
  209         rnnoise_model_free(ret); \ 
  210         return AVERROR(ENOMEM); \ 
  221 #define INPUT_VAL(name) do { \ 
  222     if (fscanf(f, "%d", &in) != 1 || in < 0 || in > 128) { \ 
  223         rnnoise_model_free(ret); \ 
  224         return AVERROR(EINVAL); \ 
  229 #define INPUT_ACTIVATION(name) do { \ 
  231     INPUT_VAL(activation); \ 
  232     switch (activation) { \ 
  233     case F_ACTIVATION_SIGMOID: \ 
  234         name = ACTIVATION_SIGMOID; \ 
  236     case F_ACTIVATION_RELU: \ 
  237         name = ACTIVATION_RELU; \ 
  240         name = ACTIVATION_TANH; \ 
  244 #define INPUT_ARRAY(name, len) do { \ 
  245     float *values = av_calloc((len), sizeof(float)); \ 
  247         rnnoise_model_free(ret); \ 
  248         return AVERROR(ENOMEM); \ 
  251     for (int i = 0; i < (len); i++) { \ 
  252         if (fscanf(f, "%d", &in) != 1) { \ 
  253             rnnoise_model_free(ret); \ 
  254             return AVERROR(EINVAL); \ 
  260 #define INPUT_ARRAY3(name, len0, len1, len2) do { \ 
  261     float *values = av_calloc(FFALIGN((len0), 4) * FFALIGN((len1), 4) * (len2), sizeof(float)); \ 
  263         rnnoise_model_free(ret); \ 
  264         return AVERROR(ENOMEM); \ 
  267     for (int k = 0; k < (len0); k++) { \ 
  268         for (int i = 0; i < (len2); i++) { \ 
  269             for (int j = 0; j < (len1); j++) { \ 
  270                 if (fscanf(f, "%d", &in) != 1) { \ 
  271                     rnnoise_model_free(ret); \ 
  272                     return AVERROR(EINVAL); \ 
  274                 values[j * (len2) * FFALIGN((len0), 4) + i * FFALIGN((len0), 4) + k] = in; \ 
  280 #define NEW_LINE() do { \ 
  282     while ((c = fgetc(f)) != EOF) { \ 
  288 #define INPUT_DENSE(name) do { \ 
  289     INPUT_VAL(name->nb_inputs); \ 
  290     INPUT_VAL(name->nb_neurons); \ 
  291     ret->name ## _size = name->nb_neurons; \ 
  292     INPUT_ACTIVATION(name->activation); \ 
  294     INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons); \ 
  296     INPUT_ARRAY(name->bias, name->nb_neurons); \ 
  300 #define INPUT_GRU(name) do { \ 
  301     INPUT_VAL(name->nb_inputs); \ 
  302     INPUT_VAL(name->nb_neurons); \ 
  303     ret->name ## _size = name->nb_neurons; \ 
  304     INPUT_ACTIVATION(name->activation); \ 
  306     INPUT_ARRAY3(name->input_weights, name->nb_inputs, name->nb_neurons, 3); \ 
  308     INPUT_ARRAY3(name->recurrent_weights, name->nb_neurons, name->nb_neurons, 3); \ 
  310     INPUT_ARRAY(name->bias, name->nb_neurons * 3); \ 
  356     s->channels = 
inlink->ch_layout.nb_channels;
 
  363     for (
int i = 0; 
i < 
s->channels; 
i++) {
 
  376     for (
int i = 0; 
i < 
s->channels; 
i++) {
 
  393 static void biquad(
float *y, 
float mem[2], 
const float *x,
 
  394                    const float *
b, 
const float *
a, 
int N)
 
  396     for (
int i = 0; 
i < 
N; 
i++) {
 
  401         mem[0] = mem[1] + (
b[0]*
xi - 
a[0]*yi);
 
  402         mem[1] = (
b[1]*
xi - 
a[1]*yi);
 
  407 #define RNN_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) 
  408 #define RNN_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst)))) 
  409 #define RNN_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) 
  421     st->
tx_fn(st->
tx, y, x, 
sizeof(
float));
 
  438     st->
txi_fn(st->
txi, y, x, 
sizeof(
float));
 
  446   0,  1,  2,  3,  4,   5, 6,  7,  8,  10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
 
  457         for (
int j = 0; j < band_size; j++) {
 
  458             float tmp, frac = (
float)j / band_size;
 
  462             sum[
i]     += (1.f - frac) * 
tmp;
 
  463             sum[
i + 1] +=        frac  * 
tmp;
 
  482         for (
int j = 0; j < band_size; j++) {
 
  483             float tmp, frac = (
float)j / band_size;
 
  487             sum[
i]     += (1 - frac) * 
tmp;
 
  488             sum[
i + 1] +=      frac  * 
tmp;
 
  515     const float mix = 
s->mix;
 
  516     const float imix = 1.f - 
FFMAX(
mix, 0.
f);
 
  528 static inline void xcorr_kernel(
const float *x, 
const float *y, 
float sum[4], 
int len)
 
  530     float y_0, y_1, y_2, y_3 = 0;
 
  537     for (j = 0; j < 
len - 3; j += 4) {
 
  598                                     const float *y, 
int N)
 
  602     for (
int i = 0; 
i < 
N; 
i++)
 
  609                              float *xcorr, 
int len, 
int max_pitch)
 
  613     for (
i = 0; 
i < max_pitch - 3; 
i += 4) {
 
  614         float sum[4] = { 0, 0, 0, 0};
 
  619         xcorr[
i + 1] = sum[1];
 
  620         xcorr[
i + 2] = sum[2];
 
  621         xcorr[
i + 3] = sum[3];
 
  624     for (; 
i < max_pitch; 
i++) {
 
  644         for (
int i = 0; 
i < n; 
i++)
 
  646         for (
int i = 0; 
i < overlap; 
i++) {
 
  656     for (
int k = 0; k <= lag; k++) {
 
  659         for (
int i = k + fastN; 
i < n; 
i++)
 
  660             d += xptr[
i] * xptr[
i-k];
 
  675         for (
int i = 0; 
i < p; 
i++) {
 
  678             for (
int j = 0; j < 
i; j++)
 
  679                 rr += (lpc[j] * ac[
i - j]);
 
  684             for (
int j = 0; j < (
i + 1) >> 1; j++) {
 
  688                 lpc[j]     = tmp1 + (
r*tmp2);
 
  689                 lpc[
i-1-j] = tmp2 + (
r*tmp1);
 
  694             if (
error < .001
f * ac[0])
 
  706     float num0, num1, num2, num3, num4;
 
  707     float mem0, mem1, mem2, mem3, mem4;
 
  720     for (
int i = 0; 
i < 
N; 
i++) {
 
  748     float lpc[4], mem[5]={0,0,0,0,0};
 
  752     for (
int i = 1; i < len >> 1; 
i++)
 
  753         x_lp[
i] = .5
f * (.5
f * (x[0][(2*
i-1)]+x[0][(2*
i+1)])+x[0][2*
i]);
 
  754     x_lp[0] = .5f * (.5f * (x[0][1])+x[0][0]);
 
  756         for (
int i = 1; i < len >> 1; 
i++)
 
  757             x_lp[
i] += (.5
f * (.5
f * (x[1][(2*
i-1)]+x[1][(2*
i+1)])+x[1][2*
i]));
 
  758         x_lp[0] += .5f * (.5f * (x[1][1])+x[1][0]);
 
  766     for (
int i = 1; 
i <= 4; 
i++) {
 
  768         ac[
i] -= ac[
i]*(.008f*
i)*(.008
f*
i);
 
  772     for (
int i = 0; 
i < 4; 
i++) {
 
  774         lpc[
i] = (lpc[
i] * 
tmp);
 
  777     lpc2[0] = lpc[0] + .8f;
 
  778     lpc2[1] = lpc[1] + (
c1 * lpc[0]);
 
  779     lpc2[2] = lpc[2] + (
c1 * lpc[1]);
 
  780     lpc2[3] = lpc[3] + (
c1 * lpc[2]);
 
  781     lpc2[4] = (
c1 * lpc[3]);
 
  785 static inline void dual_inner_prod(
const float *x, 
const float *y01, 
const float *y02,
 
  786                                    int N, 
float *xy1, 
float *xy2)
 
  788     float xy01 = 0, xy02 = 0;
 
  790     for (
int i = 0; 
i < 
N; 
i++) {
 
  791         xy01 += (x[
i] * y01[
i]);
 
  792         xy02 += (x[
i] * y02[
i]);
 
  801     return xy / 
sqrtf(1.
f + xx * yy);
 
  804 static const uint8_t 
second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
 
  806                              int *T0_, 
int prev_period, 
float prev_gain)
 
  813     float best_xy, best_yy;
 
  818     minperiod0 = minperiod;
 
  832     for (
i = 1; 
i <= maxperiod; 
i++) {
 
  833         yy = yy+(x[-
i] * x[-
i])-(x[
N-
i] * x[
N-
i]);
 
  834         yy_lookup[
i] = 
FFMAX(0, yy);
 
  841     for (k = 2; k <= 15; k++) {
 
  861         xy = .5f * (xy + xy2);
 
  862         yy = .5f * (yy_lookup[T1] + yy_lookup[T1b]);
 
  864         if (
FFABS(T1-prev_period)<=1)
 
  866         else if (
FFABS(T1-prev_period)<=2 && 5 * k * k < T0)
 
  867             cont = prev_gain * .5f;
 
  870         thresh = 
FFMAX(.3
f, (.7
f * g0) - cont);
 
  874             thresh = 
FFMAX(.4
f, (.85
f * g0) - cont);
 
  875         else if (T1<2*minperiod)
 
  876             thresh = 
FFMAX(.5
f, (.9
f * g0) - cont);
 
  885     best_xy = 
FFMAX(0, best_xy);
 
  886     if (best_yy <= best_xy)
 
  889         pg = best_xy/(best_yy + 1);
 
  891     for (k = 0; k < 3; k++)
 
  893     if ((xcorr[2]-xcorr[0]) > .7f * (xcorr[1]-xcorr[0]))
 
  895     else if ((xcorr[0]-xcorr[2]) > (.7f * (xcorr[1] - xcorr[2])))
 
  909                             int max_pitch, 
int *best_pitch)
 
  922     for (
int j = 0; j < 
len; j++)
 
  925     for (
int i = 0; 
i < max_pitch; 
i++) {
 
  934             num = xcorr16 * xcorr16;
 
  935             if ((num * best_den[1]) > (best_num[1] * Syy)) {
 
  936                 if ((num * best_den[0]) > (best_num[0] * Syy)) {
 
  937                     best_num[1] = best_num[0];
 
  938                     best_den[1] = best_den[0];
 
  939                     best_pitch[1] = best_pitch[0];
 
  956                          int len, 
int max_pitch, 
int *pitch)
 
  959     int best_pitch[2]={0,0};
 
  969     for (
int j = 0; j < len >> 2; j++)
 
  970         x_lp4[j] = x_lp[2*j];
 
  971     for (
int j = 0; j < lag >> 2; j++)
 
  981     for (
int i = 0; i < max_pitch >> 1; 
i++) {
 
  984         if (
FFABS(
i-2*best_pitch[0])>2 && 
FFABS(
i-2*best_pitch[1])>2)
 
  987         xcorr[
i] = 
FFMAX(-1, sum);
 
  993     if (best_pitch[0] > 0 && best_pitch[0] < (max_pitch >> 1) - 1) {
 
  996         a = xcorr[best_pitch[0] - 1];
 
  997         b = xcorr[best_pitch[0]];
 
  998         c = xcorr[best_pitch[0] + 1];
 
  999         if (
c - 
a > .7
f * (
b - 
a))
 
 1001         else if (
a - 
c > .7
f * (
b-
c))
 
 1009     *pitch = 2 * best_pitch[0] - 
offset;
 
 1023                                   float *Ex, 
float *Ep, 
float *Exp, 
float *features, 
const float *in)
 
 1026     float *ceps_0, *ceps_1, *ceps_2;
 
 1027     float spec_variability = 0;
 
 1035     float follow, logMax;
 
 1076         logMax = 
FFMAX(logMax, Ly[
i]);
 
 1077         follow = 
FFMAX(follow-1.5, Ly[
i]);
 
 1087     dct(
s, features, Ly);
 
 1095         ceps_0[
i] = features[
i];
 
 1099         features[
i] = ceps_0[
i] + ceps_1[
i] + ceps_2[
i];
 
 1108         float mindist = 1e15f;
 
 1109         for (
int j = 0; j < 
CEPS_MEM; j++) {
 
 1111             for (
int k = 0; k < 
NB_BANDS; k++) {
 
 1119                 mindist = 
FFMIN(mindist, dist);
 
 1122         spec_variability += mindist;
 
 1137         for (
int j = 0; j < band_size; j++) {
 
 1138             float frac = (
float)j / band_size;
 
 1146                          const float *Exp, 
const float *
g)
 
 1155         if (Exp[
i]>
g[
i]) 
r[
i] = 1;
 
 1162         X[
i].re += rf[
i]*
P[
i].re;
 
 1163         X[
i].im += rf[
i]*
P[
i].im;
 
 1167         norm[
i] = 
sqrtf(Ex[
i] / (1e-8+newE[
i]));
 
 1171         X[
i].re *= normf[
i];
 
 1172         X[
i].im *= normf[
i];
 
 1177     0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f,
 
 1178     0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f,
 
 1179     0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,
 
 1180     0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f,
 
 1181     0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f,
 
 1182     0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f,
 
 1183     0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f,
 
 1184     0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f,
 
 1185     0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f,
 
 1186     0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f,
 
 1187     0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f,
 
 1188     0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f,
 
 1189     0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f,
 
 1190     0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f,
 
 1191     0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f,
 
 1192     0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f,
 
 1193     0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f,
 
 1194     0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f,
 
 1195     0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f,
 
 1196     0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f,
 
 1197     0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f,
 
 1198     0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f,
 
 1199     0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f,
 
 1200     0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f,
 
 1201     0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f,
 
 1202     0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f,
 
 1203     0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f,
 
 1204     0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f,
 
 1205     0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f,
 
 1206     0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f,
 
 1207     0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f,
 
 1208     0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f,
 
 1209     0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f,
 
 1210     0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f,
 
 1211     0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f,
 
 1212     0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f,
 
 1213     0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
 
 1214     0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
 
 1215     1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
 
 1216     1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
 
 1244     y = y + x*dy*(1 - y*x);
 
 1257     for (
int i = 0; 
i < 
N; 
i++) {
 
 1259         float sum = layer->
bias[
i];
 
 1261         for (
int j = 0; j < 
M; j++)
 
 1268         for (
int i = 0; 
i < 
N; 
i++)
 
 1271         for (
int i = 0; 
i < 
N; 
i++)
 
 1274         for (
int i = 0; 
i < 
N; 
i++)
 
 1290     const int stride = 3 * AN, istride = 3 * AM;
 
 1292     for (
int i = 0; 
i < 
N; 
i++) {
 
 1294         float sum = gru->
bias[
i];
 
 1301     for (
int i = 0; 
i < 
N; 
i++) {
 
 1303         float sum = gru->
bias[
N + 
i];
 
 1310     for (
int i = 0; 
i < 
N; 
i++) {
 
 1312         float sum = gru->
bias[2 * 
N + 
i];
 
 1315         for (
int j = 0; j < 
N; j++)
 
 1332 #define INPUT_SIZE 42 
 1375     static const float a_hp[2] = {-1.99599, 0.99600};
 
 1376     static const float b_hp[2] = {-2, 1};
 
 1382     if (!silence && !disabled) {
 
 1401     memcpy(history, in, 
FRAME_SIZE * 
sizeof(*history));
 
 1416     const int start = (
out->ch_layout.nb_channels * jobnr) / nb_jobs;
 
 1417     const int end = (
out->ch_layout.nb_channels * (jobnr+1)) / nb_jobs;
 
 1419     for (
int ch = start; ch < end; ch++) {
 
 1421                         (
float *)
out->extended_data[ch],
 
 1489     if (!*model || 
ret < 0)
 
 1514         for (
int j = 0; j < 
NB_BANDS; j++) {
 
 1517                 s->dct_table[j][
i] *= 
sqrtf(.5);
 
 1531     for (
int ch = 0; ch < 
s->channels && 
s->st; ch++) {
 
 1532         av_freep(&
s->st[ch].rnn[n].vad_gru_state);
 
 1533         av_freep(&
s->st[ch].rnn[n].noise_gru_state);
 
 1534         av_freep(&
s->st[ch].rnn[n].denoise_gru_state);
 
 1539                            char *res, 
int res_len, 
int flags)
 
 1553     for (
int ch = 0; ch < 
s->channels; ch++)
 
 1558         for (
int ch = 0; ch < 
s->channels; ch++)
 
 1574     for (
int ch = 0; ch < 
s->channels && 
s->st; ch++) {
 
 1596 #define OFFSET(x) offsetof(AudioRNNContext, x) 
 1597 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM 
 1610     .description   = 
NULL_IF_CONFIG_SMALL(
"Reduce noise from speech using Recurrent Neural Networks."),
 
 1612     .priv_class    = &arnndn_class,
 
  
static void error(const char *err)
static void compute_dense(const DenseLayer *layer, float *output, const float *input)
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
@ AV_SAMPLE_FMT_FLTP
float, planar
static void pitch_downsample(float *x[], float *x_lp, int len, int C)
static int mix(int c0, int c1)
float synthesis_mem[FRAME_SIZE]
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
static int activate(AVFilterContext *ctx)
static void dual_inner_prod(const float *x, const float *y01, const float *y02, int N, float *xy1, float *xy2)
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
static enum AVSampleFormat sample_fmts[]
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
static av_cold void uninit(AVFilterContext *ctx)
static void inverse_transform(DenoiseState *st, float *out, const AVComplexFloat *in)
This structure describes decoded (raw) audio or video data.
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
static const AVOption arnndn_options[]
#define FILTER_QUERY_FUNC(func)
static void frame_synthesis(AudioRNNContext *s, DenoiseState *st, float *out, const AVComplexFloat *y)
const char * name
Filter name.
int nb_channels
Number of channels in this layout.
static const float tansig_table[201]
A link between two filters.
static void find_best_pitch(float *xcorr, float *y, int len, int max_pitch, int *best_pitch)
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration (i)MDCTs with an odd length are currently...
#define RNN_CLEAR(dst, n)
static void compute_band_energy(float *bandE, const AVComplexFloat *X)
static void compute_rnn(AudioRNNContext *s, RNNState *rnn, float *gains, float *vad, const float *input)
static void free_model(AVFilterContext *ctx, int n)
float * denoise_gru_state
static int rnnoise_channels(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
static SDL_Window * window
static void rnnoise_model_free(RNNModel *model)
float cepstral_mem[CEPS_MEM][NB_BANDS]
A filter pad used for either input or output.
static void compute_band_corr(float *bandE, const AVComplexFloat *X, const AVComplexFloat *P)
float history[FRAME_SIZE]
s EdgeDetect Foobar g libavfilter vf_edgedetect c libavfilter vf_foobar c edit libavfilter and add an entry for foobar following the pattern of the other filters edit libavfilter allfilters and add an entry for foobar following the pattern of the other filters configure make j< whatever > ffmpeg ffmpeg i you should get a foobar png with Lena edge detected That s your new playground is ready Some little details about what s going which in turn will define variables for the build system and the C
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
static void frame_analysis(AudioRNNContext *s, DenoiseState *st, AVComplexFloat *X, float *Ex, const float *in)
static __device__ float floor(float a)
static const AVFilterPad inputs[]
static float celt_inner_prod(const float *x, const float *y, int N)
#define av_assert0(cond)
assert() equivalent, that is always enabled.
@ AV_TX_FLOAT_FFT
Standard complex to complex FFT with sample data type of AVComplexFloat, AVComplexDouble or AVComplex...
#define xi(width, name, var, range_min, range_max, subs,...)
static int rnnoise_model_from_file(FILE *f, RNNModel **rnn)
const AVFilter ff_af_arnndn
static int config_input(AVFilterLink *inlink)
#define FILTER_INPUTS(array)
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Describe the class of an AVClass context structure.
int ff_inlink_consume_samples(AVFilterLink *link, unsigned min, unsigned max, AVFrame **rframe)
Take samples from the link's FIFO and update the link's stats.
#define LOCAL_ALIGNED_32(t, v,...)
static float sigmoid_approx(float x)
const DenseLayer * vad_output
const float * recurrent_weights
static const AVFilterPad outputs[]
static __device__ float sqrtf(float a)
const DenseLayer * input_dense
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
const float * input_weights
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N)
float pitch_buf[PITCH_BUF_SIZE]
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
static int celt_autocorr(const float *x, float *ac, const float *window, int overlap, int lag, int n)
static void celt_lpc(float *lpc, const float *ac, int p)
int ff_filter_process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Generic processing of user supplied commands that are set in the same way as the filter options.
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
#define RNN_MOVE(dst, src, n)
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
FF_FILTER_FORWARD_WANTED(outlink, inlink)
const GRULayer * denoise_gru
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets *ctx to NULL, does nothing when *ctx == NULL.
#define ACTIVATION_SIGMOID
#define DECLARE_ALIGNED(n, t, v)
#define i(width, name, range_min, range_max)
#define RNN_COPY(dst, src, n)
uint8_t ** extended_data
pointers to the data planes/channels.
int ff_filter_get_nb_threads(AVFilterContext *ctx)
Get number of threads for current filter instance.
AVSampleFormat
Audio sample formats.
Used for passing data between threads.
static void interp_band_gain(float *g, const float *bandE)
static void dct(AudioRNNContext *s, float *out, const float *in)
float dct_table[FFALIGN(NB_BANDS, 4)][FFALIGN(NB_BANDS, 4)]
const char * name
Pad name.
FILE * avpriv_fopen_utf8(const char *path, const char *mode)
Open a file using a UTF-8 filename.
void * av_calloc(size_t nmemb, size_t size)
static int open_model(AVFilterContext *ctx, RNNModel **model)
#define FFSWAP(type, a, b)
static int compute_frame_features(AudioRNNContext *s, DenoiseState *st, AVComplexFloat *X, AVComplexFloat *P, float *Ex, float *Ep, float *Exp, float *features, const float *in)
const float * input_weights
float window[WINDOW_SIZE]
static const uint8_t second_check[16]
static float remove_doubling(float *x, int maxperiod, int minperiod, int N, int *T0_, int prev_period, float prev_gain)
static float compute_pitch_gain(float xy, float xx, float yy)
AVFILTER_DEFINE_CLASS(arnndn)
static void xcorr_kernel(const float *x, const float *y, float sum[4], int len)
static void pitch_search(const float *x_lp, float *y, int len, int max_pitch, int *pitch)
static void pitch_filter(AVComplexFloat *X, const AVComplexFloat *P, const float *Ex, const float *Ep, const float *Exp, const float *g)
static void celt_pitch_xcorr(const float *x, const float *y, float *xcorr, int len, int max_pitch)
static float rnnoise_channel(AudioRNNContext *s, DenoiseState *st, float *out, const float *in, int disabled)
static void celt_fir5(const float *x, const float *num, float *y, int N, float *mem)
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
float pitch_enh_buf[PITCH_BUF_SIZE]
static int shift(int a, int b)
#define AVFILTER_FLAG_SLICE_THREADS
The filter supports multithreading by splitting frames into multiple parts and processing them concur...
static float tansig_approx(float x)
static int query_formats(AVFilterContext *ctx)
static void forward_transform(DenoiseState *st, AVComplexFloat *out, const float *in)
AVChannelLayout ch_layout
channel layout of current buffer (see libavutil/channel_layout.h)
FF_FILTER_FORWARD_STATUS(inlink, outlink)
static const int16_t alpha[]
#define FILTER_OUTPUTS(array)
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
#define AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
Same as AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, except that the filter will have its filter_frame() c...
#define flags(name, subs,...)
const DenseLayer * denoise_output
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
#define ALLOC_LAYER(type, name)
static av_always_inline int ff_filter_execute(AVFilterContext *ctx, avfilter_action_func *func, void *arg, int *ret, int nb_jobs)
static void compute_gru(AudioRNNContext *s, const GRULayer *gru, float *state, const float *input)
static const uint8_t eband5ms[]
#define INPUT_DENSE(name)
const GRULayer * noise_gru
static av_cold int init(AVFilterContext *ctx)
float analysis_mem[FRAME_SIZE]