47 #define FRAME_SIZE_SHIFT 2
48 #define FRAME_SIZE (120<<FRAME_SIZE_SHIFT)
49 #define WINDOW_SIZE (2*FRAME_SIZE)
50 #define FREQ_SIZE (FRAME_SIZE + 1)
52 #define PITCH_MIN_PERIOD 60
53 #define PITCH_MAX_PERIOD 768
54 #define PITCH_FRAME_SIZE 960
55 #define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)
57 #define SQUARE(x) ((x)*(x))
62 #define NB_DELTA_CEPS 6
64 #define NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2)
66 #define WEIGHTS_SCALE (1.f/256)
68 #define MAX_NEURONS 128
70 #define ACTIVATION_TANH 0
71 #define ACTIVATION_SIGMOID 1
72 #define ACTIVATION_RELU 2
154 #define F_ACTIVATION_TANH 0
155 #define F_ACTIVATION_SIGMOID 1
156 #define F_ACTIVATION_RELU 2
160 #define FREE_MAYBE(ptr) do { if (ptr) free(ptr); } while (0)
161 #define FREE_DENSE(name) do { \
163 av_free((void *) model->name->input_weights); \
164 av_free((void *) model->name->bias); \
165 av_free((void *) model->name); \
168 #define FREE_GRU(name) do { \
170 av_free((void *) model->name->input_weights); \
171 av_free((void *) model->name->recurrent_weights); \
172 av_free((void *) model->name->bias); \
173 av_free((void *) model->name); \
199 if (fscanf(
f,
"rnnoise-nu model file version %d\n", &
in) != 1 ||
in != 1)
206 #define ALLOC_LAYER(type, name) \
207 name = av_calloc(1, sizeof(type)); \
209 rnnoise_model_free(ret); \
210 return AVERROR(ENOMEM); \
221 #define INPUT_VAL(name) do { \
222 if (fscanf(f, "%d", &in) != 1 || in < 0 || in > 128) { \
223 rnnoise_model_free(ret); \
224 return AVERROR(EINVAL); \
229 #define INPUT_ACTIVATION(name) do { \
231 INPUT_VAL(activation); \
232 switch (activation) { \
233 case F_ACTIVATION_SIGMOID: \
234 name = ACTIVATION_SIGMOID; \
236 case F_ACTIVATION_RELU: \
237 name = ACTIVATION_RELU; \
240 name = ACTIVATION_TANH; \
244 #define INPUT_ARRAY(name, len) do { \
245 float *values = av_calloc((len), sizeof(float)); \
247 rnnoise_model_free(ret); \
248 return AVERROR(ENOMEM); \
251 for (int i = 0; i < (len); i++) { \
252 if (fscanf(f, "%d", &in) != 1) { \
253 rnnoise_model_free(ret); \
254 return AVERROR(EINVAL); \
260 #define INPUT_ARRAY3(name, len0, len1, len2) do { \
261 float *values = av_calloc(FFALIGN((len0), 4) * FFALIGN((len1), 4) * (len2), sizeof(float)); \
263 rnnoise_model_free(ret); \
264 return AVERROR(ENOMEM); \
267 for (int k = 0; k < (len0); k++) { \
268 for (int i = 0; i < (len2); i++) { \
269 for (int j = 0; j < (len1); j++) { \
270 if (fscanf(f, "%d", &in) != 1) { \
271 rnnoise_model_free(ret); \
272 return AVERROR(EINVAL); \
274 values[j * (len2) * FFALIGN((len0), 4) + i * FFALIGN((len0), 4) + k] = in; \
280 #define NEW_LINE() do { \
282 while ((c = fgetc(f)) != EOF) { \
288 #define INPUT_DENSE(name) do { \
289 INPUT_VAL(name->nb_inputs); \
290 INPUT_VAL(name->nb_neurons); \
291 ret->name ## _size = name->nb_neurons; \
292 INPUT_ACTIVATION(name->activation); \
294 INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons); \
296 INPUT_ARRAY(name->bias, name->nb_neurons); \
300 #define INPUT_GRU(name) do { \
301 INPUT_VAL(name->nb_inputs); \
302 INPUT_VAL(name->nb_neurons); \
303 ret->name ## _size = name->nb_neurons; \
304 INPUT_ACTIVATION(name->activation); \
306 INPUT_ARRAY3(name->input_weights, name->nb_inputs, name->nb_neurons, 3); \
308 INPUT_ARRAY3(name->recurrent_weights, name->nb_neurons, name->nb_neurons, 3); \
310 INPUT_ARRAY(name->bias, name->nb_neurons * 3); \
375 for (
int i = 0;
i <
s->channels;
i++) {
388 for (
int i = 0;
i <
s->channels;
i++) {
405 static void biquad(
float *y,
float mem[2],
const float *x,
406 const float *
b,
const float *
a,
int N)
408 for (
int i = 0;
i <
N;
i++) {
413 mem[0] = mem[1] + (
b[0]*
xi -
a[0]*yi);
414 mem[1] = (
b[1]*
xi -
a[1]*yi);
419 #define RNN_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
420 #define RNN_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))
421 #define RNN_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
433 st->
tx_fn(st->
tx, y, x,
sizeof(
float));
450 st->
txi_fn(st->
txi, y, x,
sizeof(
float));
458 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
469 for (
int j = 0; j < band_size; j++) {
470 float tmp, frac = (float)j / band_size;
474 sum[
i] += (1.f - frac) *
tmp;
475 sum[
i + 1] += frac *
tmp;
494 for (
int j = 0; j < band_size; j++) {
495 float tmp, frac = (float)j / band_size;
499 sum[
i] += (1 - frac) *
tmp;
500 sum[
i + 1] += frac *
tmp;
527 const float mix =
s->mix;
528 const float imix = 1.f -
FFMAX(
mix, 0.f);
540 static inline void xcorr_kernel(
const float *x,
const float *y,
float sum[4],
int len)
542 float y_0, y_1, y_2, y_3 = 0;
549 for (j = 0; j <
len - 3; j += 4) {
610 const float *y,
int N)
614 for (
int i = 0;
i <
N;
i++)
621 float *xcorr,
int len,
int max_pitch)
625 for (
i = 0;
i < max_pitch - 3;
i += 4) {
626 float sum[4] = { 0, 0, 0, 0};
631 xcorr[
i + 1] = sum[1];
632 xcorr[
i + 2] = sum[2];
633 xcorr[
i + 3] = sum[3];
636 for (;
i < max_pitch;
i++) {
656 for (
int i = 0;
i < n;
i++)
658 for (
int i = 0;
i < overlap;
i++) {
668 for (
int k = 0; k <= lag; k++) {
671 for (
int i = k + fastN;
i < n;
i++)
672 d += xptr[
i] * xptr[
i-k];
687 for (
int i = 0;
i < p;
i++) {
690 for (
int j = 0; j <
i; j++)
691 rr += (lpc[j] * ac[
i - j]);
696 for (
int j = 0; j < (
i + 1) >> 1; j++) {
700 lpc[j] = tmp1 + (
r*tmp2);
701 lpc[
i-1-j] = tmp2 + (
r*tmp1);
706 if (
error < .001f * ac[0])
718 float num0, num1, num2, num3, num4;
719 float mem0, mem1, mem2, mem3, mem4;
732 for (
int i = 0;
i <
N;
i++) {
760 float lpc[4], mem[5]={0,0,0,0,0};
764 for (
int i = 1; i < len >> 1;
i++)
765 x_lp[
i] = .5f * (.5f * (x[0][(2*
i-1)]+x[0][(2*
i+1)])+x[0][2*
i]);
766 x_lp[0] = .5f * (.5f * (x[0][1])+x[0][0]);
768 for (
int i = 1; i < len >> 1;
i++)
769 x_lp[
i] += (.5f * (.5f * (x[1][(2*
i-1)]+x[1][(2*
i+1)])+x[1][2*
i]));
770 x_lp[0] += .5f * (.5f * (x[1][1])+x[1][0]);
778 for (
int i = 1;
i <= 4;
i++) {
780 ac[
i] -= ac[
i]*(.008f*
i)*(.008f*
i);
784 for (
int i = 0;
i < 4;
i++) {
786 lpc[
i] = (lpc[
i] *
tmp);
789 lpc2[0] = lpc[0] + .8f;
790 lpc2[1] = lpc[1] + (
c1 * lpc[0]);
791 lpc2[2] = lpc[2] + (
c1 * lpc[1]);
792 lpc2[3] = lpc[3] + (
c1 * lpc[2]);
793 lpc2[4] = (
c1 * lpc[3]);
797 static inline void dual_inner_prod(
const float *x,
const float *y01,
const float *y02,
798 int N,
float *xy1,
float *xy2)
800 float xy01 = 0, xy02 = 0;
802 for (
int i = 0;
i <
N;
i++) {
803 xy01 += (x[
i] * y01[
i]);
804 xy02 += (x[
i] * y02[
i]);
813 return xy / sqrtf(1.f + xx * yy);
816 static const uint8_t second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
818 int *T0_,
int prev_period,
float prev_gain)
825 float best_xy, best_yy;
830 minperiod0 = minperiod;
844 for (
i = 1;
i <= maxperiod;
i++) {
845 yy = yy+(x[-
i] * x[-
i])-(x[
N-
i] * x[
N-
i]);
846 yy_lookup[
i] =
FFMAX(0, yy);
853 for (k = 2; k <= 15; k++) {
873 xy = .5f * (xy + xy2);
874 yy = .5f * (yy_lookup[T1] + yy_lookup[T1b]);
876 if (
FFABS(T1-prev_period)<=1)
878 else if (
FFABS(T1-prev_period)<=2 && 5 * k * k < T0)
879 cont = prev_gain * .5f;
882 thresh =
FFMAX(.3f, (.7f * g0) - cont);
886 thresh =
FFMAX(.4f, (.85f * g0) - cont);
887 else if (T1<2*minperiod)
888 thresh =
FFMAX(.5f, (.9f * g0) - cont);
897 best_xy =
FFMAX(0, best_xy);
898 if (best_yy <= best_xy)
901 pg = best_xy/(best_yy + 1);
903 for (k = 0; k < 3; k++)
905 if ((xcorr[2]-xcorr[0]) > .7f * (xcorr[1]-xcorr[0]))
907 else if ((xcorr[0]-xcorr[2]) > (.7f * (xcorr[1] - xcorr[2])))
921 int max_pitch,
int *best_pitch)
934 for (
int j = 0; j <
len; j++)
937 for (
int i = 0;
i < max_pitch;
i++) {
946 num = xcorr16 * xcorr16;
947 if ((num * best_den[1]) > (best_num[1] * Syy)) {
948 if ((num * best_den[0]) > (best_num[0] * Syy)) {
949 best_num[1] = best_num[0];
950 best_den[1] = best_den[0];
951 best_pitch[1] = best_pitch[0];
968 int len,
int max_pitch,
int *pitch)
971 int best_pitch[2]={0,0};
981 for (
int j = 0; j < len >> 2; j++)
982 x_lp4[j] = x_lp[2*j];
983 for (
int j = 0; j < lag >> 2; j++)
993 for (
int i = 0; i < max_pitch >> 1;
i++) {
996 if (
FFABS(
i-2*best_pitch[0])>2 &&
FFABS(
i-2*best_pitch[1])>2)
999 xcorr[
i] =
FFMAX(-1, sum);
1005 if (best_pitch[0] > 0 && best_pitch[0] < (max_pitch >> 1) - 1) {
1008 a = xcorr[best_pitch[0] - 1];
1009 b = xcorr[best_pitch[0]];
1010 c = xcorr[best_pitch[0] + 1];
1011 if (
c -
a > .7f * (
b -
a))
1013 else if (
a -
c > .7f * (
b-
c))
1021 *pitch = 2 * best_pitch[0] -
offset;
1030 out[
i] = sum * sqrtf(2.f / 22);
1035 float *Ex,
float *Ep,
float *Exp,
float *features,
const float *
in)
1038 float *ceps_0, *ceps_1, *ceps_2;
1039 float spec_variability = 0;
1047 float follow, logMax;
1072 Exp[
i] = Exp[
i] / sqrtf(.001f+Ex[
i]*Ep[
i]);
1088 logMax =
FFMAX(logMax, Ly[
i]);
1089 follow =
FFMAX(follow-1.5, Ly[
i]);
1099 dct(
s, features, Ly);
1107 ceps_0[
i] = features[
i];
1111 features[
i] = ceps_0[
i] + ceps_1[
i] + ceps_2[
i];
1120 float mindist = 1e15f;
1121 for (
int j = 0; j <
CEPS_MEM; j++) {
1123 for (
int k = 0; k <
NB_BANDS; k++) {
1131 mindist =
FFMIN(mindist, dist);
1134 spec_variability += mindist;
1149 for (
int j = 0; j < band_size; j++) {
1150 float frac = (float)j / band_size;
1158 const float *Exp,
const float *
g)
1167 if (Exp[
i]>
g[
i])
r[
i] = 1;
1170 r[
i] *= sqrtf(Ex[
i]/(1e-8+Ep[
i]));
1174 X[
i].re += rf[
i]*
P[
i].re;
1175 X[
i].im += rf[
i]*
P[
i].im;
1179 norm[
i] = sqrtf(Ex[
i] / (1e-8+newE[
i]));
1183 X[
i].re *= normf[
i];
1184 X[
i].im *= normf[
i];
1189 0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f,
1190 0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f,
1191 0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,
1192 0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f,
1193 0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f,
1194 0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f,
1195 0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f,
1196 0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f,
1197 0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f,
1198 0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f,
1199 0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f,
1200 0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f,
1201 0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f,
1202 0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f,
1203 0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f,
1204 0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f,
1205 0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f,
1206 0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f,
1207 0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f,
1208 0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f,
1209 0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f,
1210 0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f,
1211 0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f,
1212 0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f,
1213 0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f,
1214 0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f,
1215 0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f,
1216 0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f,
1217 0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f,
1218 0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f,
1219 0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f,
1220 0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f,
1221 0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f,
1222 0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f,
1223 0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f,
1224 0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f,
1225 0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
1226 0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
1227 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1228 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1256 y = y + x*dy*(1 - y*x);
1269 for (
int i = 0;
i <
N;
i++) {
1271 float sum = layer->
bias[
i];
1273 for (
int j = 0; j <
M; j++)
1280 for (
int i = 0;
i <
N;
i++)
1283 for (
int i = 0;
i <
N;
i++)
1286 for (
int i = 0;
i <
N;
i++)
1287 output[
i] =
FFMAX(0, output[
i]);
1302 const int stride = 3 * AN, istride = 3 * AM;
1304 for (
int i = 0;
i <
N;
i++) {
1306 float sum = gru->
bias[
i];
1308 sum +=
s->fdsp->scalarproduct_float(gru->
input_weights +
i * istride, input, AM);
1313 for (
int i = 0;
i <
N;
i++) {
1315 float sum = gru->
bias[
N +
i];
1317 sum +=
s->fdsp->scalarproduct_float(gru->
input_weights + AM +
i * istride, input, AM);
1322 for (
int i = 0;
i <
N;
i++) {
1324 float sum = gru->
bias[2 *
N +
i];
1326 sum +=
s->fdsp->scalarproduct_float(gru->
input_weights + 2 * AM +
i * istride, input, AM);
1327 for (
int j = 0; j <
N; j++)
1344 #define INPUT_SIZE 42
1387 static const float a_hp[2] = {-1.99599, 0.99600};
1388 static const float b_hp[2] = {-2, 1};
1394 if (!silence && !disabled) {
1428 const int start = (
out->channels * jobnr) / nb_jobs;
1429 const int end = (
out->channels * (jobnr+1)) / nb_jobs;
1431 for (
int ch = start; ch < end; ch++) {
1433 (
float *)
out->extended_data[ch],
1434 (
const float *)
in->extended_data[ch],
1501 if (!*model || ret < 0)
1526 for (
int j = 0; j <
NB_BANDS; j++) {
1529 s->dct_table[j][
i] *= sqrtf(.5);
1543 for (
int ch = 0; ch <
s->channels &&
s->st; ch++) {
1544 av_freep(&
s->st[ch].rnn[n].vad_gru_state);
1545 av_freep(&
s->st[ch].rnn[n].noise_gru_state);
1546 av_freep(&
s->st[ch].rnn[n].denoise_gru_state);
1551 char *res,
int res_len,
int flags)
1565 for (
int ch = 0; ch <
s->channels; ch++)
1570 for (
int ch = 0; ch <
s->channels; ch++)
1586 for (
int ch = 0; ch <
s->channels &&
s->st; ch++) {
1610 #define OFFSET(x) offsetof(AudioRNNContext, x)
1611 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
1624 .description =
NULL_IF_CONFIG_SMALL(
"Reduce noise from speech using Recurrent Neural Networks."),
1627 .priv_class = &arnndn_class,
static enum AVSampleFormat sample_fmts[]
static float compute_pitch_gain(float xy, float xx, float yy)
static int compute_frame_features(AudioRNNContext *s, DenoiseState *st, AVComplexFloat *X, AVComplexFloat *P, float *Ex, float *Ep, float *Exp, float *features, const float *in)
static const uint8_t second_check[16]
static void dual_inner_prod(const float *x, const float *y01, const float *y02, int N, float *xy1, float *xy2)
static float remove_doubling(float *x, int maxperiod, int minperiod, int N, int *T0_, int prev_period, float prev_gain)
static void inverse_transform(DenoiseState *st, float *out, const AVComplexFloat *in)
static int celt_autocorr(const float *x, float *ac, const float *window, int overlap, int lag, int n)
static void forward_transform(DenoiseState *st, AVComplexFloat *out, const float *in)
static void pitch_downsample(float *x[], float *x_lp, int len, int C)
AVFILTER_DEFINE_CLASS(arnndn)
static int rnnoise_model_from_file(FILE *f, RNNModel **rnn)
#define ACTIVATION_SIGMOID
static int open_model(AVFilterContext *ctx, RNNModel **model)
#define ALLOC_LAYER(type, name)
static int query_formats(AVFilterContext *ctx)
static float celt_inner_prod(const float *x, const float *y, int N)
static void free_model(AVFilterContext *ctx, int n)
static void pitch_filter(AVComplexFloat *X, const AVComplexFloat *P, const float *Ex, const float *Ep, const float *Exp, const float *g)
static int config_input(AVFilterLink *inlink)
#define RNN_CLEAR(dst, n)
static void frame_synthesis(AudioRNNContext *s, DenoiseState *st, float *out, const AVComplexFloat *y)
static float sigmoid_approx(float x)
static const AVFilterPad inputs[]
static void compute_gru(AudioRNNContext *s, const GRULayer *gru, float *state, const float *input)
#define INPUT_DENSE(name)
static void interp_band_gain(float *g, const float *bandE)
#define RNN_COPY(dst, src, n)
static const AVFilterPad outputs[]
static void compute_band_corr(float *bandE, const AVComplexFloat *X, const AVComplexFloat *P)
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
static void celt_lpc(float *lpc, const float *ac, int p)
static void dct(AudioRNNContext *s, float *out, const float *in)
static const AVOption arnndn_options[]
static int rnnoise_channels(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
static int activate(AVFilterContext *ctx)
static void compute_band_energy(float *bandE, const AVComplexFloat *X)
static av_cold int init(AVFilterContext *ctx)
static void frame_analysis(AudioRNNContext *s, DenoiseState *st, AVComplexFloat *X, float *Ex, const float *in)
#define RNN_MOVE(dst, src, n)
static av_cold void uninit(AVFilterContext *ctx)
static float rnnoise_channel(AudioRNNContext *s, DenoiseState *st, float *out, const float *in, int disabled)
static void rnnoise_model_free(RNNModel *model)
static void celt_fir5(const float *x, const float *num, float *y, int N, float *mem)
static void compute_dense(const DenseLayer *layer, float *output, const float *input)
static void compute_rnn(AudioRNNContext *s, RNNState *rnn, float *gains, float *vad, const float *input)
static void celt_pitch_xcorr(const float *x, const float *y, float *xcorr, int len, int max_pitch)
static float tansig_approx(float x)
static void xcorr_kernel(const float *x, const float *y, float sum[4], int len)
static void find_best_pitch(float *xcorr, float *y, int len, int max_pitch, int *best_pitch)
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N)
static void pitch_search(const float *x_lp, float *y, int len, int max_pitch, int *pitch)
static const uint8_t eband5ms[]
static const float tansig_table[201]
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
simple assert() macros that are a bit more flexible than ISO C assert().
#define av_assert0(cond)
assert() equivalent, that is always enabled.
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
int ff_filter_process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Generic processing of user supplied commands that are set in the same way as the filter options.
int ff_inlink_consume_samples(AVFilterLink *link, unsigned min, unsigned max, AVFrame **rframe)
Take samples from the link's FIFO and update the link's stats.
int ff_filter_get_nb_threads(AVFilterContext *ctx)
Get number of threads for current filter instance.
Main libavfilter public API header.
#define flags(name, subs,...)
#define xi(width, name, var, range_min, range_max, subs,...)
#define FFSWAP(type, a, b)
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
static __device__ float floor(float a)
static SDL_Window * window
#define FF_FILTER_FORWARD_WANTED(outlink, inlink)
Forward the frame_wanted_out flag from an output link to an input link.
#define FF_FILTER_FORWARD_STATUS(inlink, outlink)
Acknowledge the status on an input link and forward it to an output link.
#define FFERROR_NOT_READY
Filters implementation helper functions.
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
#define AVFILTER_FLAG_SLICE_THREADS
The filter supports multithreading by splitting frames into multiple parts and processing them concur...
#define AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
Same as AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, except that the filter will have its filter_frame() c...
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
void * av_calloc(size_t nmemb, size_t size)
Non-inlined equivalent of av_mallocz_array().
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
FILE * av_fopen_utf8(const char *path, const char *mode)
Open a file using a UTF-8 filename.
AVSampleFormat
Audio sample formats.
@ AV_SAMPLE_FMT_FLTP
float, planar
static const int16_t alpha[]
static int mix(int c0, int c1)
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
#define LOCAL_ALIGNED_32(t, v,...)
enum MovChannelLayoutTag * layouts
static int shift(int a, int b)
Describe the class of an AVClass context structure.
A list of supported channel layouts.
A link between two filters.
int channels
Number of channels.
AVFilterContext * dst
dest filter
A filter pad used for either input or output.
const char * name
Pad name.
const char * name
Filter name.
AVFormatInternal * internal
An opaque field for libavformat internal usage.
This structure describes decoded (raw) audio or video data.
float window[WINDOW_SIZE]
float dct_table[FFALIGN(NB_BANDS, 4)][FFALIGN(NB_BANDS, 4)]
float pitch_enh_buf[PITCH_BUF_SIZE]
float analysis_mem[FRAME_SIZE]
float cepstral_mem[CEPS_MEM][NB_BANDS]
float synthesis_mem[FRAME_SIZE]
float pitch_buf[PITCH_BUF_SIZE]
float history[FRAME_SIZE]
const float * input_weights
const float * recurrent_weights
const float * input_weights
const DenseLayer * denoise_output
const DenseLayer * input_dense
const DenseLayer * vad_output
const GRULayer * noise_gru
const GRULayer * denoise_gru
float * denoise_gru_state
Used for passing data between threads.
static void error(const char *err)
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets ctx to NULL, does nothing when ctx == NULL.
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration (i)MDCTs with an odd length are currently...
@ AV_TX_FLOAT_FFT
Standard complex to complex FFT with sample data type AVComplexFloat.
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
static const uint8_t offset[127][2]