From d5755744c3e2b70e9f04704ae9d18b928d9fa456 Mon Sep 17 00:00:00 2001 From: Arun Raghavan Date: Wed, 2 Dec 2020 18:31:44 -0500 Subject: [PATCH 1/2] webrtcdsp: Update code for webrtc-audio-processing-1 Updated API usage appropriately, and now we have a versioned package to track breaking vs. non-breaking updates. Deprecates a number of properties (and we have to plug in our own values for related enums which are now gone): * echo-suprression-level * experimental-agc * extended-filter * delay-agnostic * voice-detection-frame-size-ms * voice-detection-likelihood Part-of: --- .../ext/webrtcdsp/gstwebrtcdsp.cpp | 271 +++++++----------- .../ext/webrtcdsp/gstwebrtcechoprobe.cpp | 87 +++--- .../ext/webrtcdsp/gstwebrtcechoprobe.h | 9 +- .../gst-plugins-bad/ext/webrtcdsp/meson.build | 4 +- 4 files changed, 164 insertions(+), 207 deletions(-) diff --git a/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcdsp.cpp b/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcdsp.cpp index 7ee09488fb7..c9a7cdae2f4 100644 --- a/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcdsp.cpp +++ b/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcdsp.cpp @@ -71,9 +71,7 @@ #include "gstwebrtcdsp.h" #include "gstwebrtcechoprobe.h" -#include -#include -#include +#include GST_DEBUG_CATEGORY (webrtc_dsp_debug); #define GST_CAT_DEFAULT (webrtc_dsp_debug) @@ -82,10 +80,9 @@ GST_DEBUG_CATEGORY (webrtc_dsp_debug); #define DEFAULT_COMPRESSION_GAIN_DB 9 #define DEFAULT_STARTUP_MIN_VOLUME 12 #define DEFAULT_LIMITER TRUE -#define DEFAULT_GAIN_CONTROL_MODE webrtc::GainControl::kAdaptiveDigital +#define DEFAULT_GAIN_CONTROL_MODE webrtc::AudioProcessing::Config::GainController1::Mode::kAdaptiveDigital #define DEFAULT_VOICE_DETECTION FALSE #define DEFAULT_VOICE_DETECTION_FRAME_SIZE_MS 10 -#define DEFAULT_VOICE_DETECTION_LIKELIHOOD webrtc::VoiceDetection::kLowLikelihood static GstStaticPadTemplate gst_webrtc_dsp_sink_template = GST_STATIC_PAD_TEMPLATE ("sink", @@ -119,7 +116,7 @@ GST_STATIC_PAD_TEMPLATE ("src", "channels = (int) [1, MAX]") ); -typedef webrtc::EchoCancellation::SuppressionLevel GstWebrtcEchoSuppressionLevel; +typedef int GstWebrtcEchoSuppressionLevel; #define GST_TYPE_WEBRTC_ECHO_SUPPRESSION_LEVEL \ (gst_webrtc_echo_suppression_level_get_type ()) static GType @@ -127,10 +124,9 @@ gst_webrtc_echo_suppression_level_get_type (void) { static GType suppression_level_type = 0; static const GEnumValue level_types[] = { - {webrtc::EchoCancellation::kLowSuppression, "Low Suppression", "low"}, - {webrtc::EchoCancellation::kModerateSuppression, - "Moderate Suppression", "moderate"}, - {webrtc::EchoCancellation::kHighSuppression, "high Suppression", "high"}, + {1, "Low Suppression", "low"}, + {2, "Moderate Suppression", "moderate"}, + {3, "high Suppression", "high"}, {0, NULL, NULL} }; @@ -141,7 +137,7 @@ gst_webrtc_echo_suppression_level_get_type (void) return suppression_level_type; } -typedef webrtc::NoiseSuppression::Level GstWebrtcNoiseSuppressionLevel; +typedef webrtc::AudioProcessing::Config::NoiseSuppression::Level GstWebrtcNoiseSuppressionLevel; #define GST_TYPE_WEBRTC_NOISE_SUPPRESSION_LEVEL \ (gst_webrtc_noise_suppression_level_get_type ()) static GType @@ -149,10 +145,10 @@ gst_webrtc_noise_suppression_level_get_type (void) { static GType suppression_level_type = 0; static const GEnumValue level_types[] = { - {webrtc::NoiseSuppression::kLow, "Low Suppression", "low"}, - {webrtc::NoiseSuppression::kModerate, "Moderate Suppression", "moderate"}, - {webrtc::NoiseSuppression::kHigh, "High Suppression", "high"}, - {webrtc::NoiseSuppression::kVeryHigh, "Very High Suppression", + {webrtc::AudioProcessing::Config::NoiseSuppression::Level::kLow, "Low Suppression", "low"}, + {webrtc::AudioProcessing::Config::NoiseSuppression::Level::kModerate, "Moderate Suppression", "moderate"}, + {webrtc::AudioProcessing::Config::NoiseSuppression::Level::kHigh, "High Suppression", "high"}, + {webrtc::AudioProcessing::Config::NoiseSuppression::Level::kVeryHigh, "Very High Suppression", "very-high"}, {0, NULL, NULL} }; @@ -164,7 +160,7 @@ gst_webrtc_noise_suppression_level_get_type (void) return suppression_level_type; } -typedef webrtc::GainControl::Mode GstWebrtcGainControlMode; +typedef webrtc::AudioProcessing::Config::GainController1::Mode GstWebrtcGainControlMode; #define GST_TYPE_WEBRTC_GAIN_CONTROL_MODE \ (gst_webrtc_gain_control_mode_get_type ()) static GType @@ -172,8 +168,9 @@ gst_webrtc_gain_control_mode_get_type (void) { static GType gain_control_mode_type = 0; static const GEnumValue mode_types[] = { - {webrtc::GainControl::kAdaptiveDigital, "Adaptive Digital", "adaptive-digital"}, - {webrtc::GainControl::kFixedDigital, "Fixed Digital", "fixed-digital"}, + {webrtc::AudioProcessing::Config::GainController1::kAdaptiveDigital, "Adaptive Digital", "adaptive-digital"}, + {webrtc::AudioProcessing::Config::GainController1::kFixedDigital, "Fixed Digital", "fixed-digital"}, + {webrtc::AudioProcessing::Config::GainController1::kAdaptiveAnalog, "Adaptive Analog", "adaptive-analog"}, {0, NULL, NULL} }; @@ -184,7 +181,7 @@ gst_webrtc_gain_control_mode_get_type (void) return gain_control_mode_type; } -typedef webrtc::VoiceDetection::Likelihood GstWebrtcVoiceDetectionLikelihood; +typedef int GstWebrtcVoiceDetectionLikelihood; #define GST_TYPE_WEBRTC_VOICE_DETECTION_LIKELIHOOD \ (gst_webrtc_voice_detection_likelihood_get_type ()) static GType @@ -192,10 +189,10 @@ gst_webrtc_voice_detection_likelihood_get_type (void) { static GType likelihood_type = 0; static const GEnumValue likelihood_types[] = { - {webrtc::VoiceDetection::kVeryLowLikelihood, "Very Low Likelihood", "very-low"}, - {webrtc::VoiceDetection::kLowLikelihood, "Low Likelihood", "low"}, - {webrtc::VoiceDetection::kModerateLikelihood, "Moderate Likelihood", "moderate"}, - {webrtc::VoiceDetection::kHighLikelihood, "High Likelihood", "high"}, + {1, "Very Low Likelihood", "very-low"}, + {2, "Low Likelihood", "low"}, + {3, "Moderate Likelihood", "moderate"}, + {4, "High Likelihood", "high"}, {0, NULL, NULL} }; @@ -227,6 +224,7 @@ enum PROP_VOICE_DETECTION, PROP_VOICE_DETECTION_FRAME_SIZE_MS, PROP_VOICE_DETECTION_LIKELIHOOD, + PROP_EXTRA_DELAY_MS, }; /** @@ -248,7 +246,7 @@ struct _GstWebrtcDsp /* Protected by the stream lock */ GstAdapter *adapter; GstPlanarAudioAdapter *padapter; - webrtc::AudioProcessing * apm; + webrtc::AudioProcessing *apm; /* Protected by the object lock */ gchar *probe_name; @@ -257,21 +255,15 @@ struct _GstWebrtcDsp /* Properties */ gboolean high_pass_filter; gboolean echo_cancel; - webrtc::EchoCancellation::SuppressionLevel echo_suppression_level; gboolean noise_suppression; - webrtc::NoiseSuppression::Level noise_suppression_level; + webrtc::AudioProcessing::Config::NoiseSuppression::Level noise_suppression_level; gboolean gain_control; - gboolean experimental_agc; - gboolean extended_filter; - gboolean delay_agnostic; gint target_level_dbfs; gint compression_gain_db; gint startup_min_volume; gboolean limiter; - webrtc::GainControl::Mode gain_control_mode; + webrtc::AudioProcessing::Config::GainController1::Mode gain_control_mode; gboolean voice_detection; - gint voice_detection_frame_size_ms; - webrtc::VoiceDetection::Likelihood voice_detection_likelihood; }; G_DEFINE_TYPE_WITH_CODE (GstWebrtcDsp, gst_webrtc_dsp, GST_TYPE_AUDIO_FILTER, @@ -376,9 +368,9 @@ gst_webrtc_dsp_analyze_reverse_stream (GstWebrtcDsp * self, GstClockTime rec_time) { GstWebrtcEchoProbe *probe = NULL; - webrtc::AudioProcessing * apm; - webrtc::AudioFrame frame; + webrtc::AudioProcessing *apm; GstBuffer *buf = NULL; + GstAudioBuffer abuf; GstFlowReturn ret = GST_FLOW_OK; gint err, delay; @@ -391,48 +383,44 @@ gst_webrtc_dsp_analyze_reverse_stream (GstWebrtcDsp * self, if (!probe) return GST_FLOW_OK; + webrtc::StreamConfig config (probe->info.rate, probe->info.channels, + false); apm = self->apm; - if (self->delay_agnostic) - rec_time = GST_CLOCK_TIME_NONE; - -again: - delay = gst_webrtc_echo_probe_read (probe, rec_time, (gpointer) &frame, &buf); + delay = gst_webrtc_echo_probe_read (probe, rec_time, &buf); apm->set_stream_delay_ms (delay); + g_return_val_if_fail (buf != NULL, GST_FLOW_ERROR); + if (delay < 0) goto done; - if (frame.sample_rate_hz_ != self->info.rate) { + if (probe->info.rate != self->info.rate) { GST_ELEMENT_ERROR (self, STREAM, FORMAT, ("Echo Probe has rate %i , while the DSP is running at rate %i," " use a caps filter to ensure those are the same.", - frame.sample_rate_hz_, self->info.rate), (NULL)); + probe->info.rate, self->info.rate), (NULL)); ret = GST_FLOW_ERROR; goto done; } - if (buf) { - webrtc::StreamConfig config (frame.sample_rate_hz_, frame.num_channels_, - false); - GstAudioBuffer abuf; - float * const * data; + gst_audio_buffer_map (&abuf, &self->info, buf, GST_MAP_READWRITE); + + if (probe->interleaved) { + int16_t * const data = (int16_t * const) abuf.planes[0]; - gst_audio_buffer_map (&abuf, &self->info, buf, GST_MAP_READWRITE); - data = (float * const *) abuf.planes; if ((err = apm->ProcessReverseStream (data, config, config, data)) < 0) GST_WARNING_OBJECT (self, "Reverse stream analyses failed: %s.", webrtc_error_to_string (err)); - gst_audio_buffer_unmap (&abuf); - gst_buffer_replace (&buf, NULL); } else { - if ((err = apm->AnalyzeReverseStream (&frame)) < 0) + float * const * data = (float * const *) abuf.planes; + + if ((err = apm->ProcessReverseStream (data, config, config, data)) < 0) GST_WARNING_OBJECT (self, "Reverse stream analyses failed: %s.", webrtc_error_to_string (err)); } - if (self->delay_agnostic) - goto again; + gst_audio_buffer_unmap (&abuf); done: gst_object_unref (probe); @@ -443,16 +431,14 @@ done: static void gst_webrtc_vad_post_activity (GstWebrtcDsp *self, GstBuffer *buffer, - gboolean stream_has_voice) + gboolean stream_has_voice, guint8 level) { GstClockTime timestamp = GST_BUFFER_PTS (buffer); GstBaseTransform *trans = GST_BASE_TRANSFORM_CAST (self); GstStructure *s; GstClockTime stream_time; GstAudioLevelMeta *meta; - guint8 level; - level = self->apm->level_estimator ()->RMS (); meta = gst_buffer_get_audio_level_meta (buffer); if (meta) { meta->voice_activity = stream_has_voice; @@ -481,6 +467,7 @@ gst_webrtc_dsp_process_stream (GstWebrtcDsp * self, { GstAudioBuffer abuf; webrtc::AudioProcessing * apm = self->apm; + webrtc::StreamConfig config (self->info.rate, self->info.channels, false); gint err; if (!gst_audio_buffer_map (&abuf, &self->info, buffer, @@ -490,19 +477,10 @@ gst_webrtc_dsp_process_stream (GstWebrtcDsp * self, } if (self->interleaved) { - webrtc::AudioFrame frame; - frame.num_channels_ = self->info.channels; - frame.sample_rate_hz_ = self->info.rate; - frame.samples_per_channel_ = self->period_samples; - - memcpy (frame.data_, abuf.planes[0], self->period_size); - err = apm->ProcessStream (&frame); - if (err >= 0) - memcpy (abuf.planes[0], frame.data_, self->period_size); + int16_t * const data = (int16_t * const) abuf.planes[0]; + err = apm->ProcessStream (data, config, config, data); } else { float * const * data = (float * const *) abuf.planes; - webrtc::StreamConfig config (self->info.rate, self->info.channels, false); - err = apm->ProcessStream (data, config, config, data); } @@ -511,10 +489,13 @@ gst_webrtc_dsp_process_stream (GstWebrtcDsp * self, webrtc_error_to_string (err)); } else { if (self->voice_detection) { - gboolean stream_has_voice = apm->voice_detection ()->stream_has_voice (); + webrtc::AudioProcessingStats stats = apm->GetStatistics (); + gboolean stream_has_voice = stats.voice_detected && *stats.voice_detected; + // The meta takes the value as -dbov, so we negate + guint8 level = stats.output_rms_dbfs ? (guint8) -(*stats.output_rms_dbfs) : 127; if (stream_has_voice != self->stream_has_voice) - gst_webrtc_vad_post_activity (self, buffer, stream_has_voice); + gst_webrtc_vad_post_activity (self, buffer, stream_has_voice, level); self->stream_has_voice = stream_has_voice; } @@ -583,21 +564,9 @@ static gboolean gst_webrtc_dsp_start (GstBaseTransform * btrans) { GstWebrtcDsp *self = GST_WEBRTC_DSP (btrans); - webrtc::Config config; GST_OBJECT_LOCK (self); - config.Set < webrtc::ExtendedFilter > - (new webrtc::ExtendedFilter (self->extended_filter)); - config.Set < webrtc::ExperimentalAgc > - (new webrtc::ExperimentalAgc (self->experimental_agc, self->startup_min_volume)); - config.Set < webrtc::DelayAgnostic > - (new webrtc::DelayAgnostic (self->delay_agnostic)); - - /* TODO Intelligibility enhancer, Beamforming, etc. */ - - self->apm = webrtc::AudioProcessing::Create (config); - if (self->echo_cancel) { self->probe = gst_webrtc_acquire_echo_probe (self->probe_name); @@ -618,10 +587,8 @@ static gboolean gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info) { GstWebrtcDsp *self = GST_WEBRTC_DSP (filter); - webrtc::AudioProcessing * apm; - webrtc::ProcessingConfig pconfig; + webrtc::AudioProcessing::Config config; GstAudioInfo probe_info = *info; - gint err = 0; GST_LOG_OBJECT (self, "setting format to %s with %i Hz and %i channels", info->finfo->description, info->rate, info->channels); @@ -633,7 +600,7 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info) self->info = *info; self->interleaved = (info->layout == GST_AUDIO_LAYOUT_INTERLEAVED); - apm = self->apm; + self->apm = webrtc::AudioProcessingBuilder().Create(); if (!self->interleaved) gst_planar_audio_adapter_configure (self->padapter, info); @@ -642,8 +609,7 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info) self->period_samples = info->rate / 100; self->period_size = self->period_samples * info->bpf; - if (self->interleaved && - (webrtc::AudioFrame::kMaxDataSizeSamples * 2) < self->period_size) + if (self->interleaved && (self->period_size > MAX_DATA_SIZE_SAMPLES * 2)) goto period_too_big; if (self->probe) { @@ -658,40 +624,31 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info) GST_WEBRTC_ECHO_PROBE_UNLOCK (self->probe); } - /* input stream */ - pconfig.streams[webrtc::ProcessingConfig::kInputStream] = - webrtc::StreamConfig (info->rate, info->channels, false); - /* output stream */ - pconfig.streams[webrtc::ProcessingConfig::kOutputStream] = - webrtc::StreamConfig (info->rate, info->channels, false); - /* reverse input stream */ - pconfig.streams[webrtc::ProcessingConfig::kReverseInputStream] = - webrtc::StreamConfig (probe_info.rate, probe_info.channels, false); - /* reverse output stream */ - pconfig.streams[webrtc::ProcessingConfig::kReverseOutputStream] = - webrtc::StreamConfig (probe_info.rate, probe_info.channels, false); - - if ((err = apm->Initialize (pconfig)) < 0) - goto initialize_failed; - /* Setup Filters */ + // TODO: expose pre_amplifier + if (self->high_pass_filter) { GST_DEBUG_OBJECT (self, "Enabling High Pass filter"); - apm->high_pass_filter ()->Enable (true); + config.high_pass_filter.enabled = true; } if (self->echo_cancel) { GST_DEBUG_OBJECT (self, "Enabling Echo Cancellation"); - apm->echo_cancellation ()->enable_drift_compensation (false); - apm->echo_cancellation () - ->set_suppression_level (self->echo_suppression_level); - apm->echo_cancellation ()->Enable (true); + config.echo_canceller.enabled = true; } if (self->noise_suppression) { GST_DEBUG_OBJECT (self, "Enabling Noise Suppression"); - apm->noise_suppression ()->set_level (self->noise_suppression_level); - apm->noise_suppression ()->Enable (true); + config.noise_suppression.enabled = true; + config.noise_suppression.level = self->noise_suppression_level; + } + + // TODO: expose transient suppression + + if (self->voice_detection) { + GST_DEBUG_OBJECT (self, "Enabling Voice Activity Detection"); + config.voice_detection.enabled = true; + self->stream_has_voice = FALSE; } if (self->gain_control) { @@ -706,30 +663,17 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info) g_type_class_unref (mode_class); - apm->gain_control ()->set_mode (self->gain_control_mode); - apm->gain_control ()->set_target_level_dbfs (self->target_level_dbfs); - apm->gain_control ()->set_compression_gain_db (self->compression_gain_db); - apm->gain_control ()->enable_limiter (self->limiter); - apm->gain_control ()->Enable (true); + config.gain_controller1.enabled = true; + config.gain_controller1.target_level_dbfs = self->target_level_dbfs; + config.gain_controller1.compression_gain_db = self->compression_gain_db; + config.gain_controller1.enable_limiter = self->limiter; + config.level_estimation.enabled = true; } - if (self->voice_detection) { - GEnumClass *likelihood_class = (GEnumClass *) - g_type_class_ref (GST_TYPE_WEBRTC_VOICE_DETECTION_LIKELIHOOD); - GST_DEBUG_OBJECT (self, "Enabling Voice Activity Detection, frame size " - "%d milliseconds, likelihood: %s", self->voice_detection_frame_size_ms, - g_enum_get_value (likelihood_class, - self->voice_detection_likelihood)->value_name); - g_type_class_unref (likelihood_class); + // TODO: expose gain controller 2 + // TODO: expose residual echo detector - self->stream_has_voice = FALSE; - - apm->voice_detection ()->Enable (true); - apm->voice_detection ()->set_likelihood (self->voice_detection_likelihood); - apm->voice_detection ()->set_frame_size_ms ( - self->voice_detection_frame_size_ms); - apm->level_estimator ()->Enable (true); - } + self->apm->ApplyConfig (config); GST_OBJECT_UNLOCK (self); @@ -738,9 +682,9 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info) period_too_big: GST_OBJECT_UNLOCK (self); GST_WARNING_OBJECT (self, "webrtcdsp format produce too big period " - "(maximum is %" G_GSIZE_FORMAT " samples and we have %u samples), " + "(maximum is %d samples and we have %u samples), " "reduce the number of channels or the rate.", - webrtc::AudioFrame::kMaxDataSizeSamples, self->period_size / 2); + MAX_DATA_SIZE_SAMPLES, self->period_size / 2); return FALSE; probe_has_wrong_rate: @@ -751,14 +695,6 @@ probe_has_wrong_rate: " use a caps filter to ensure those are the same.", probe_info.rate, info->rate), (NULL)); return FALSE; - -initialize_failed: - GST_OBJECT_UNLOCK (self); - GST_ELEMENT_ERROR (self, LIBRARY, INIT, - ("Failed to initialize WebRTC Audio Processing library"), - ("webrtc::AudioProcessing::Initialize() failed: %s", - webrtc_error_to_string (err))); - return FALSE; } static gboolean @@ -803,8 +739,6 @@ gst_webrtc_dsp_set_property (GObject * object, self->echo_cancel = g_value_get_boolean (value); break; case PROP_ECHO_SUPPRESSION_LEVEL: - self->echo_suppression_level = - (GstWebrtcEchoSuppressionLevel) g_value_get_enum (value); break; case PROP_NOISE_SUPPRESSION: self->noise_suppression = g_value_get_boolean (value); @@ -817,13 +751,10 @@ gst_webrtc_dsp_set_property (GObject * object, self->gain_control = g_value_get_boolean (value); break; case PROP_EXPERIMENTAL_AGC: - self->experimental_agc = g_value_get_boolean (value); break; case PROP_EXTENDED_FILTER: - self->extended_filter = g_value_get_boolean (value); break; case PROP_DELAY_AGNOSTIC: - self->delay_agnostic = g_value_get_boolean (value); break; case PROP_TARGET_LEVEL_DBFS: self->target_level_dbfs = g_value_get_int (value); @@ -845,11 +776,8 @@ gst_webrtc_dsp_set_property (GObject * object, self->voice_detection = g_value_get_boolean (value); break; case PROP_VOICE_DETECTION_FRAME_SIZE_MS: - self->voice_detection_frame_size_ms = g_value_get_int (value); break; case PROP_VOICE_DETECTION_LIKELIHOOD: - self->voice_detection_likelihood = - (GstWebrtcVoiceDetectionLikelihood) g_value_get_enum (value); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); @@ -876,7 +804,7 @@ gst_webrtc_dsp_get_property (GObject * object, g_value_set_boolean (value, self->echo_cancel); break; case PROP_ECHO_SUPPRESSION_LEVEL: - g_value_set_enum (value, self->echo_suppression_level); + g_value_set_enum (value, (GstWebrtcEchoSuppressionLevel) 2); break; case PROP_NOISE_SUPPRESSION: g_value_set_boolean (value, self->noise_suppression); @@ -888,13 +816,13 @@ gst_webrtc_dsp_get_property (GObject * object, g_value_set_boolean (value, self->gain_control); break; case PROP_EXPERIMENTAL_AGC: - g_value_set_boolean (value, self->experimental_agc); + g_value_set_boolean (value, false); break; case PROP_EXTENDED_FILTER: - g_value_set_boolean (value, self->extended_filter); + g_value_set_boolean (value, false); break; case PROP_DELAY_AGNOSTIC: - g_value_set_boolean (value, self->delay_agnostic); + g_value_set_boolean (value, false); break; case PROP_TARGET_LEVEL_DBFS: g_value_set_int (value, self->target_level_dbfs); @@ -915,10 +843,10 @@ gst_webrtc_dsp_get_property (GObject * object, g_value_set_boolean (value, self->voice_detection); break; case PROP_VOICE_DETECTION_FRAME_SIZE_MS: - g_value_set_int (value, self->voice_detection_frame_size_ms); + g_value_set_int (value, 0); break; case PROP_VOICE_DETECTION_LIKELIHOOD: - g_value_set_enum (value, self->voice_detection_likelihood); + g_value_set_enum (value, 2); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); @@ -1005,13 +933,13 @@ gst_webrtc_dsp_class_init (GstWebrtcDspClass * klass) g_object_class_install_property (gobject_class, PROP_ECHO_SUPPRESSION_LEVEL, - g_param_spec_enum ("echo-suppression-level", "Echo Suppression Level", + g_param_spec_enum ("echo-suppression-level", + "Echo Suppression Level (does nothing)", "Controls the aggressiveness of the suppressor. A higher level " "trades off double-talk performance for increased echo suppression.", - GST_TYPE_WEBRTC_ECHO_SUPPRESSION_LEVEL, - webrtc::EchoCancellation::kModerateSuppression, + GST_TYPE_WEBRTC_ECHO_SUPPRESSION_LEVEL, 2, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | - G_PARAM_CONSTRUCT))); + G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED))); g_object_class_install_property (gobject_class, PROP_NOISE_SUPPRESSION, @@ -1026,7 +954,7 @@ gst_webrtc_dsp_class_init (GstWebrtcDspClass * klass) "Controls the aggressiveness of the suppression. Increasing the " "level will reduce the noise level at the expense of a higher " "speech distortion.", GST_TYPE_WEBRTC_NOISE_SUPPRESSION_LEVEL, - webrtc::EchoCancellation::kModerateSuppression, + webrtc::AudioProcessing::Config::NoiseSuppression::Level::kModerate, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | G_PARAM_CONSTRUCT))); @@ -1039,24 +967,26 @@ gst_webrtc_dsp_class_init (GstWebrtcDspClass * klass) g_object_class_install_property (gobject_class, PROP_EXPERIMENTAL_AGC, - g_param_spec_boolean ("experimental-agc", "Experimental AGC", + g_param_spec_boolean ("experimental-agc", + "Experimental AGC (does nothing)", "Enable or disable experimental automatic gain control.", FALSE, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | - G_PARAM_CONSTRUCT))); + G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED))); g_object_class_install_property (gobject_class, PROP_EXTENDED_FILTER, g_param_spec_boolean ("extended-filter", "Extended Filter", "Enable or disable the extended filter.", TRUE, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | - G_PARAM_CONSTRUCT))); + G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED))); g_object_class_install_property (gobject_class, PROP_DELAY_AGNOSTIC, - g_param_spec_boolean ("delay-agnostic", "Delay Agnostic", + g_param_spec_boolean ("delay-agnostic", + "Delay agnostic mode (does nothing)", "Enable or disable the delay agnostic mode.", FALSE, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | - G_PARAM_CONSTRUCT))); + G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED))); g_object_class_install_property (gobject_class, PROP_TARGET_LEVEL_DBFS, @@ -1111,24 +1041,23 @@ gst_webrtc_dsp_class_init (GstWebrtcDspClass * klass) g_object_class_install_property (gobject_class, PROP_VOICE_DETECTION_FRAME_SIZE_MS, g_param_spec_int ("voice-detection-frame-size-ms", - "Voice Detection Frame Size Milliseconds", + "Voice detection frame size in milliseconds (does nothing)", "Sets the |size| of the frames in ms on which the VAD will operate. " "Larger frames will improve detection accuracy, but reduce the " "frequency of updates", 10, 30, DEFAULT_VOICE_DETECTION_FRAME_SIZE_MS, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | - G_PARAM_CONSTRUCT))); + G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED))); g_object_class_install_property (gobject_class, PROP_VOICE_DETECTION_LIKELIHOOD, g_param_spec_enum ("voice-detection-likelihood", - "Voice Detection Likelihood", + "Voice detection likelihood (does nothing)", "Specifies the likelihood that a frame will be declared to contain " "voice.", - GST_TYPE_WEBRTC_VOICE_DETECTION_LIKELIHOOD, - DEFAULT_VOICE_DETECTION_LIKELIHOOD, + GST_TYPE_WEBRTC_VOICE_DETECTION_LIKELIHOOD, 2, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | - G_PARAM_CONSTRUCT))); + G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED))); gst_type_mark_as_plugin_api (GST_TYPE_WEBRTC_GAIN_CONTROL_MODE, (GstPluginAPIFlags) 0); gst_type_mark_as_plugin_api (GST_TYPE_WEBRTC_NOISE_SUPPRESSION_LEVEL, (GstPluginAPIFlags) 0); diff --git a/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcechoprobe.cpp b/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcechoprobe.cpp index acdb3d8a7d5..8e8ca064c48 100644 --- a/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcechoprobe.cpp +++ b/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcechoprobe.cpp @@ -33,7 +33,8 @@ #include "gstwebrtcechoprobe.h" -#include +#include + #include GST_DEBUG_CATEGORY_EXTERN (webrtc_dsp_debug); @@ -102,7 +103,7 @@ gst_webrtc_echo_probe_setup (GstAudioFilter * filter, const GstAudioInfo * info) self->period_size = self->period_samples * info->bpf; if (self->interleaved && - (webrtc::AudioFrame::kMaxDataSizeSamples * 2) < self->period_size) + (MAX_DATA_SIZE_SAMPLES * 2) < self->period_size) goto period_too_big; GST_WEBRTC_ECHO_PROBE_UNLOCK (self); @@ -112,9 +113,9 @@ gst_webrtc_echo_probe_setup (GstAudioFilter * filter, const GstAudioInfo * info) period_too_big: GST_WEBRTC_ECHO_PROBE_UNLOCK (self); GST_WARNING_OBJECT (self, "webrtcdsp format produce too big period " - "(maximum is %" G_GSIZE_FORMAT " samples and we have %u samples), " + "(maximum is %d samples and we have %u samples), " "reduce the number of channels or the rate.", - webrtc::AudioFrame::kMaxDataSizeSamples, self->period_size / 2); + MAX_DATA_SIZE_SAMPLES, self->period_size / 2); return FALSE; } @@ -303,18 +304,20 @@ gst_webrtc_release_echo_probe (GstWebrtcEchoProbe * probe) gint gst_webrtc_echo_probe_read (GstWebrtcEchoProbe * self, GstClockTime rec_time, - gpointer _frame, GstBuffer ** buf) + GstBuffer ** buf) { - webrtc::AudioFrame * frame = (webrtc::AudioFrame *) _frame; GstClockTimeDiff diff; - gsize avail, skip, offset, size; + gsize avail, skip, offset, size = 0; gint delay = -1; GST_WEBRTC_ECHO_PROBE_LOCK (self); + /* We always return a buffer -- if don't have data (size == 0), we generate a + * silence buffer */ + if (!GST_CLOCK_TIME_IS_VALID (self->latency) || !GST_AUDIO_INFO_IS_VALID (&self->info)) - goto done; + goto copy; if (self->interleaved) avail = gst_adapter_available (self->adapter) / self->info.bpf; @@ -324,7 +327,7 @@ gst_webrtc_echo_probe_read (GstWebrtcEchoProbe * self, GstClockTime rec_time, /* In delay agnostic mode, just return 10ms of data */ if (!GST_CLOCK_TIME_IS_VALID (rec_time)) { if (avail < self->period_samples) - goto done; + goto copy; size = self->period_samples; skip = 0; @@ -371,23 +374,51 @@ gst_webrtc_echo_probe_read (GstWebrtcEchoProbe * self, GstClockTime rec_time, size = MIN (avail - offset, self->period_samples - skip); copy: - if (self->interleaved) { - skip *= self->info.bpf; - offset *= self->info.bpf; - size *= self->info.bpf; - - if (size < self->period_size) - memset (frame->data_, 0, self->period_size); - - if (size) { - gst_adapter_copy (self->adapter, (guint8 *) frame->data_ + skip, - offset, size); - gst_adapter_flush (self->adapter, offset + size); - } + if (!size) { + /* No data, provide a period's worth of silence */ + *buf = gst_buffer_new_allocate (NULL, self->period_size, NULL); + gst_buffer_memset (*buf, 0, 0, self->period_size); + gst_buffer_add_audio_meta (*buf, &self->info, self->period_samples, + NULL); } else { + /* We have some actual data, pop period_samples' worth if have it, else pad + * with silence and provide what we do have */ GstBuffer *ret, *taken, *tmp; - if (size) { + if (self->interleaved) { + skip *= self->info.bpf; + offset *= self->info.bpf; + size *= self->info.bpf; + + gst_adapter_flush (self->adapter, offset); + + /* we need to fill silence at the beginning and/or the end of the + * buffer in order to have period_samples in the buffer */ + if (size < self->period_size) { + gsize padding = self->period_size - (skip + size); + + taken = gst_adapter_take_buffer (self->adapter, size); + ret = gst_buffer_new (); + + /* need some silence at the beginning */ + if (skip) { + tmp = gst_buffer_new_allocate (NULL, skip, NULL); + gst_buffer_memset (tmp, 0, 0, skip); + ret = gst_buffer_append (ret, tmp); + } + + ret = gst_buffer_append (ret, taken); + + /* need some silence at the end */ + if (padding) { + tmp = gst_buffer_new_allocate (NULL, padding, NULL); + gst_buffer_memset (tmp, 0, 0, padding); + ret = gst_buffer_append (ret, tmp); + } + } else { + ret = gst_adapter_take_buffer (self->adapter, size); + } + } else { gst_planar_audio_adapter_flush (self->padapter, offset); /* we need to fill silence at the beginning and/or the end of each @@ -430,23 +461,13 @@ copy: ret = gst_planar_audio_adapter_take_buffer (self->padapter, size, GST_MAP_READWRITE); } - } else { - ret = gst_buffer_new_allocate (NULL, self->period_size, NULL); - gst_buffer_memset (ret, 0, 0, self->period_size); - gst_buffer_add_audio_meta (ret, &self->info, self->period_samples, - NULL); } *buf = ret; } - frame->num_channels_ = self->info.channels; - frame->sample_rate_hz_ = self->info.rate; - frame->samples_per_channel_ = self->period_samples; - delay = self->delay; -done: GST_WEBRTC_ECHO_PROBE_UNLOCK (self); return delay; diff --git a/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcechoprobe.h b/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcechoprobe.h index 36fd34f1794..488c0e958f3 100644 --- a/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcechoprobe.h +++ b/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcechoprobe.h @@ -45,6 +45,12 @@ G_BEGIN_DECLS #define GST_WEBRTC_ECHO_PROBE_LOCK(obj) g_mutex_lock (&GST_WEBRTC_ECHO_PROBE (obj)->lock) #define GST_WEBRTC_ECHO_PROBE_UNLOCK(obj) g_mutex_unlock (&GST_WEBRTC_ECHO_PROBE (obj)->lock) +/* From the webrtc audio_frame.h definition of kMaxDataSizeSamples: + * Stereo, 32 kHz, 120 ms (2 * 32 * 120) + * Stereo, 192 kHz, 20 ms (2 * 192 * 20) + */ +#define MAX_DATA_SIZE_SAMPLES 7680 + typedef struct _GstWebrtcEchoProbe GstWebrtcEchoProbe; typedef struct _GstWebrtcEchoProbeClass GstWebrtcEchoProbeClass; @@ -71,6 +77,7 @@ struct _GstWebrtcEchoProbe GstClockTime latency; gint delay; gboolean interleaved; + gint extra_delay; GstSegment segment; GstAdapter *adapter; @@ -92,7 +99,7 @@ GST_ELEMENT_REGISTER_DECLARE (webrtcechoprobe); GstWebrtcEchoProbe *gst_webrtc_acquire_echo_probe (const gchar * name); void gst_webrtc_release_echo_probe (GstWebrtcEchoProbe * probe); gint gst_webrtc_echo_probe_read (GstWebrtcEchoProbe * self, - GstClockTime rec_time, gpointer frame, GstBuffer ** buf); + GstClockTime rec_time, GstBuffer ** buf); G_END_DECLS #endif /* __GST_WEBRTC_ECHO_PROBE_H__ */ diff --git a/subprojects/gst-plugins-bad/ext/webrtcdsp/meson.build b/subprojects/gst-plugins-bad/ext/webrtcdsp/meson.build index 5aeae69a44d..09565e27c73 100644 --- a/subprojects/gst-plugins-bad/ext/webrtcdsp/meson.build +++ b/subprojects/gst-plugins-bad/ext/webrtcdsp/meson.build @@ -4,7 +4,7 @@ webrtc_sources = [ 'gstwebrtcdspplugin.cpp' ] -webrtc_dep = dependency('webrtc-audio-processing', version : ['>= 0.2', '< 0.4'], +webrtc_dep = dependency('webrtc-audio-processing-1', version : ['>= 1.0'], required : get_option('webrtcdsp')) if not gnustl_dep.found() and get_option('webrtcdsp').enabled() @@ -20,7 +20,7 @@ if webrtc_dep.found() and gnustl_dep.found() dependencies : [gstbase_dep, gstaudio_dep, gstbadaudio_dep, webrtc_dep, gnustl_dep], install : true, install_dir : plugins_install_dir, - override_options : ['cpp_std=c++11'], + override_options : ['cpp_std=c++17'], ) plugins += [gstwebrtcdsp] endif -- GitLab #From 37aab17be305b8033e682276ad9d4ea2d0ab9ee2 Mon Sep 17 00:00:00 2001 #From: Nirbheek Chauhan #Date: Wed, 31 May 2023 17:51:38 +0530 #Subject: [PATCH 2/2] meson: Update webrtc-audio-processing wrap to 1.1 # #Part-of: #--- # subprojects/webrtc-audio-processing.wrap | 8 ++++---- # 1 file changed, 4 insertions(+), 4 deletions(-) # #diff --git a/subprojects/webrtc-audio-processing.wrap b/subprojects/webrtc-audio-processing.wrap #index 11e9390bc53..bba7dd0b516 100644 #--- a/subprojects/webrtc-audio-processing.wrap #+++ b/subprojects/webrtc-audio-processing.wrap #@@ -1,8 +1,8 @@ # [wrap-git] #-directory=webrtc-audio-processing #-url=https://gitlab.freedesktop.org/pulseaudio/webrtc-audio-processing.git #-push-url=git@gitlab.freedesktop.org:pulseaudio/webrtc-audio-processing.git #-revision=v1.0 #+directory = webrtc-audio-processing #+url = https://gitlab.freedesktop.org/pulseaudio/webrtc-audio-processing.git #+push-url = git@gitlab.freedesktop.org:pulseaudio/webrtc-audio-processing.git #+revision = v1.1 # # [provide] # dependency_names = webrtc-audio-coding-1, webrtc-audio-processing-1 #-- #GitLab #