ffmpeg_normalize

 1from ._errors import FFmpegNormalizeError
 2from ._ffmpeg_normalize import FFmpegNormalize
 3from ._media_file import MediaFile
 4from ._streams import AudioStream, MediaStream, SubtitleStream, VideoStream
 5from ._version import __version__
 6
 7__module_name__ = "ffmpeg_normalize"
 8
 9__all__ = [
10    "FFmpegNormalize",
11    "FFmpegNormalizeError",
12    "MediaFile",
13    "AudioStream",
14    "VideoStream",
15    "SubtitleStream",
16    "MediaStream",
17    "__version__",
18]
class FFmpegNormalize:
 51class FFmpegNormalize:
 52    """
 53    ffmpeg-normalize class.
 54
 55    Args:
 56        normalization_type (str, optional): Normalization type. Defaults to "ebu".
 57        target_level (float, optional): Target level. Defaults to -23.0.
 58        print_stats (bool, optional): Print loudnorm stats. Defaults to False.
 59        loudness_range_target (float, optional): Loudness range target. Defaults to 7.0.
 60        keep_loudness_range_target (bool, optional): Keep loudness range target. Defaults to False.
 61        keep_lra_above_loudness_range_target (bool, optional): Keep input loudness range above loudness range target. Defaults to False.
 62        true_peak (float, optional): True peak. Defaults to -2.0.
 63        offset (float, optional): Offset. Defaults to 0.0.
 64        lower_only (bool, optional): Whether the audio should not increase in loudness. Defaults to False.
 65        auto_lower_loudness_target (bool, optional): Automatically lower EBU Integrated Loudness Target.
 66        dual_mono (bool, optional): Dual mono. Defaults to False.
 67        dynamic (bool, optional): Use dynamic EBU R128 normalization. This is a one-pass algorithm and skips the initial media scan. Defaults to False.
 68        audio_codec (str, optional): Audio codec. Defaults to "pcm_s16le".
 69        audio_bitrate (float, optional): Audio bitrate. Defaults to None.
 70        sample_rate (int, optional): Sample rate. Defaults to None.
 71        audio_channels (int | None, optional): Audio channels. Defaults to None.
 72        keep_original_audio (bool, optional): Keep original audio. Defaults to False.
 73        pre_filter (str, optional): Pre filter. Defaults to None.
 74        post_filter (str, optional): Post filter. Defaults to None.
 75        video_codec (str, optional): Video codec. Defaults to "copy".
 76        video_disable (bool, optional): Disable video. Defaults to False.
 77        subtitle_disable (bool, optional): Disable subtitles. Defaults to False.
 78        metadata_disable (bool, optional): Disable metadata. Defaults to False.
 79        chapters_disable (bool, optional): Disable chapters. Defaults to False.
 80        extra_input_options (list, optional): Extra input options. Defaults to None.
 81        extra_output_options (list, optional): Extra output options. Defaults to None.
 82        output_format (str, optional): Output format. Defaults to None.
 83        extension (str, optional): Output file extension to use for output files that were not explicitly specified. Defaults to "mkv".
 84        dry_run (bool, optional): Dry run. Defaults to False.
 85        debug (bool, optional): Debug. Defaults to False.
 86        progress (bool, optional): Progress. Defaults to False.
 87        replaygain (bool, optional): Write ReplayGain tags without normalizing. Defaults to False.
 88
 89    Raises:
 90        FFmpegNormalizeError: If the ffmpeg executable is not found or does not support the loudnorm filter.
 91    """
 92
 93    def __init__(
 94        self,
 95        normalization_type: Literal["ebu", "rms", "peak"] = "ebu",
 96        target_level: float = -23.0,
 97        print_stats: bool = False,
 98        # threshold=0.5,
 99        loudness_range_target: float = 7.0,
100        keep_loudness_range_target: bool = False,
101        keep_lra_above_loudness_range_target: bool = False,
102        true_peak: float = -2.0,
103        offset: float = 0.0,
104        lower_only: bool = False,
105        auto_lower_loudness_target: bool = False,
106        dual_mono: bool = False,
107        dynamic: bool = False,
108        audio_codec: str = "pcm_s16le",
109        audio_bitrate: float | None = None,
110        sample_rate: float | int | None = None,
111        audio_channels: int | None = None,
112        keep_original_audio: bool = False,
113        pre_filter: str | None = None,
114        post_filter: str | None = None,
115        video_codec: str = "copy",
116        video_disable: bool = False,
117        subtitle_disable: bool = False,
118        metadata_disable: bool = False,
119        chapters_disable: bool = False,
120        extra_input_options: list[str] | None = None,
121        extra_output_options: list[str] | None = None,
122        output_format: str | None = None,
123        extension: str = "mkv",
124        dry_run: bool = False,
125        debug: bool = False,
126        progress: bool = False,
127        replaygain: bool = False,
128    ):
129        self.ffmpeg_exe = get_ffmpeg_exe()
130        self.has_loudnorm_capabilities = ffmpeg_has_loudnorm()
131
132        if normalization_type not in NORMALIZATION_TYPES:
133            raise FFmpegNormalizeError(
134                "Normalization type must be: 'ebu', 'rms', or 'peak'"
135            )
136        self.normalization_type = normalization_type
137
138        if not self.has_loudnorm_capabilities and self.normalization_type == "ebu":
139            raise FFmpegNormalizeError(
140                "Your ffmpeg does not support the 'loudnorm' EBU R128 filter. "
141                "Please install ffmpeg v4.2 or above, or choose another normalization type."
142            )
143
144        if self.normalization_type == "ebu":
145            self.target_level = check_range(target_level, -70, -5, name="target_level")
146        else:
147            self.target_level = check_range(target_level, -99, 0, name="target_level")
148
149        self.print_stats = print_stats
150
151        # self.threshold = float(threshold)
152
153        self.loudness_range_target = check_range(
154            loudness_range_target, 1, 50, name="loudness_range_target"
155        )
156
157        self.keep_loudness_range_target = keep_loudness_range_target
158
159        if self.keep_loudness_range_target and loudness_range_target != 7.0:
160            _logger.warning(
161                "Setting --keep-loudness-range-target will override your set loudness range target value! "
162                "Remove --keep-loudness-range-target or remove the --lrt/--loudness-range-target option."
163            )
164
165        self.keep_lra_above_loudness_range_target = keep_lra_above_loudness_range_target
166
167        if (
168            self.keep_loudness_range_target
169            and self.keep_lra_above_loudness_range_target
170        ):
171            raise FFmpegNormalizeError(
172                "Options --keep-loudness-range-target and --keep-lra-above-loudness-range-target are mutually exclusive! "
173                "Please choose just one of the two options."
174            )
175
176        self.true_peak = check_range(true_peak, -9, 0, name="true_peak")
177        self.offset = check_range(offset, -99, 99, name="offset")
178        self.lower_only = lower_only
179        self.auto_lower_loudness_target = auto_lower_loudness_target
180
181        # Ensure library user is passing correct types
182        assert isinstance(dual_mono, bool), "dual_mono must be bool"
183        assert isinstance(dynamic, bool), "dynamic must be bool"
184
185        self.dual_mono = dual_mono
186        self.dynamic = dynamic
187        self.sample_rate = None if sample_rate is None else int(sample_rate)
188        self.audio_channels = None if audio_channels is None else int(audio_channels)
189
190        self.audio_codec = audio_codec
191        self.audio_bitrate = audio_bitrate
192        self.keep_original_audio = keep_original_audio
193        self.video_codec = video_codec
194        self.video_disable = video_disable
195        self.subtitle_disable = subtitle_disable
196        self.metadata_disable = metadata_disable
197        self.chapters_disable = chapters_disable
198
199        self.extra_input_options = extra_input_options
200        self.extra_output_options = extra_output_options
201        self.pre_filter = pre_filter
202        self.post_filter = post_filter
203
204        self.output_format = output_format
205        self.extension = extension
206        self.dry_run = dry_run
207        self.debug = debug
208        self.progress = progress
209        self.replaygain = replaygain
210
211        if (
212            self.audio_codec is None or "pcm" in self.audio_codec
213        ) and self.output_format in PCM_INCOMPATIBLE_FORMATS:
214            raise FFmpegNormalizeError(
215                f"Output format {self.output_format} does not support PCM audio. "
216                "Please choose a suitable audio codec with the -c:a option."
217            )
218
219        # replaygain only works for EBU for now
220        if self.replaygain and self.normalization_type != "ebu":
221            raise FFmpegNormalizeError(
222                "ReplayGain only works for EBU normalization type for now."
223            )
224
225        self.stats: list[LoudnessStatisticsWithMetadata] = []
226        self.media_files: list[MediaFile] = []
227        self.file_count = 0
228
229    def add_media_file(self, input_file: str, output_file: str) -> None:
230        """
231        Add a media file to normalize
232
233        Args:
234            input_file (str): Path to input file
235            output_file (str): Path to output file
236        """
237        if not os.path.exists(input_file):
238            raise FFmpegNormalizeError(f"file {input_file} does not exist")
239
240        ext = os.path.splitext(output_file)[1][1:]
241        if (
242            self.audio_codec is None or "pcm" in self.audio_codec
243        ) and ext in PCM_INCOMPATIBLE_EXTS:
244            raise FFmpegNormalizeError(
245                f"Output extension {ext} does not support PCM audio. "
246                "Please choose a suitable audio codec with the -c:a option."
247            )
248
249        self.media_files.append(MediaFile(self, input_file, output_file))
250        self.file_count += 1
251
252    def run_normalization(self) -> None:
253        """
254        Run the normalization procedures
255        """
256        for index, media_file in enumerate(
257            tqdm(self.media_files, desc="File", disable=not self.progress, position=0)
258        ):
259            _logger.info(
260                f"Normalizing file {media_file} ({index + 1} of {self.file_count})"
261            )
262
263            try:
264                media_file.run_normalization()
265            except Exception as e:
266                if len(self.media_files) > 1:
267                    # simply warn and do not die
268                    _logger.error(
269                        f"Error processing input file {media_file}, will "
270                        f"continue batch-processing. Error was: {e}"
271                    )
272                else:
273                    # raise the error so the program will exit
274                    raise e
275
276        if self.print_stats:
277            json.dump(
278                list(
279                    chain.from_iterable(
280                        media_file.get_stats() for media_file in self.media_files
281                    )
282                ),
283                sys.stdout,
284                indent=4,
285            )
286            print()

ffmpeg-normalize class.

Arguments:
  • normalization_type (str, optional): Normalization type. Defaults to "ebu".
  • target_level (float, optional): Target level. Defaults to -23.0.
  • print_stats (bool, optional): Print loudnorm stats. Defaults to False.
  • loudness_range_target (float, optional): Loudness range target. Defaults to 7.0.
  • keep_loudness_range_target (bool, optional): Keep loudness range target. Defaults to False.
  • keep_lra_above_loudness_range_target (bool, optional): Keep input loudness range above loudness range target. Defaults to False.
  • true_peak (float, optional): True peak. Defaults to -2.0.
  • offset (float, optional): Offset. Defaults to 0.0.
  • lower_only (bool, optional): Whether the audio should not increase in loudness. Defaults to False.
  • auto_lower_loudness_target (bool, optional): Automatically lower EBU Integrated Loudness Target.
  • dual_mono (bool, optional): Dual mono. Defaults to False.
  • dynamic (bool, optional): Use dynamic EBU R128 normalization. This is a one-pass algorithm and skips the initial media scan. Defaults to False.
  • audio_codec (str, optional): Audio codec. Defaults to "pcm_s16le".
  • audio_bitrate (float, optional): Audio bitrate. Defaults to None.
  • sample_rate (int, optional): Sample rate. Defaults to None.
  • audio_channels (int | None, optional): Audio channels. Defaults to None.
  • keep_original_audio (bool, optional): Keep original audio. Defaults to False.
  • pre_filter (str, optional): Pre filter. Defaults to None.
  • post_filter (str, optional): Post filter. Defaults to None.
  • video_codec (str, optional): Video codec. Defaults to "copy".
  • video_disable (bool, optional): Disable video. Defaults to False.
  • subtitle_disable (bool, optional): Disable subtitles. Defaults to False.
  • metadata_disable (bool, optional): Disable metadata. Defaults to False.
  • chapters_disable (bool, optional): Disable chapters. Defaults to False.
  • extra_input_options (list, optional): Extra input options. Defaults to None.
  • extra_output_options (list, optional): Extra output options. Defaults to None.
  • output_format (str, optional): Output format. Defaults to None.
  • extension (str, optional): Output file extension to use for output files that were not explicitly specified. Defaults to "mkv".
  • dry_run (bool, optional): Dry run. Defaults to False.
  • debug (bool, optional): Debug. Defaults to False.
  • progress (bool, optional): Progress. Defaults to False.
  • replaygain (bool, optional): Write ReplayGain tags without normalizing. Defaults to False.
Raises:
  • FFmpegNormalizeError: If the ffmpeg executable is not found or does not support the loudnorm filter.
FFmpegNormalize( normalization_type: Literal['ebu', 'rms', 'peak'] = 'ebu', target_level: float = -23.0, print_stats: bool = False, loudness_range_target: float = 7.0, keep_loudness_range_target: bool = False, keep_lra_above_loudness_range_target: bool = False, true_peak: float = -2.0, offset: float = 0.0, lower_only: bool = False, auto_lower_loudness_target: bool = False, dual_mono: bool = False, dynamic: bool = False, audio_codec: str = 'pcm_s16le', audio_bitrate: float | None = None, sample_rate: float | int | None = None, audio_channels: int | None = None, keep_original_audio: bool = False, pre_filter: str | None = None, post_filter: str | None = None, video_codec: str = 'copy', video_disable: bool = False, subtitle_disable: bool = False, metadata_disable: bool = False, chapters_disable: bool = False, extra_input_options: list[str] | None = None, extra_output_options: list[str] | None = None, output_format: str | None = None, extension: str = 'mkv', dry_run: bool = False, debug: bool = False, progress: bool = False, replaygain: bool = False)
 93    def __init__(
 94        self,
 95        normalization_type: Literal["ebu", "rms", "peak"] = "ebu",
 96        target_level: float = -23.0,
 97        print_stats: bool = False,
 98        # threshold=0.5,
 99        loudness_range_target: float = 7.0,
100        keep_loudness_range_target: bool = False,
101        keep_lra_above_loudness_range_target: bool = False,
102        true_peak: float = -2.0,
103        offset: float = 0.0,
104        lower_only: bool = False,
105        auto_lower_loudness_target: bool = False,
106        dual_mono: bool = False,
107        dynamic: bool = False,
108        audio_codec: str = "pcm_s16le",
109        audio_bitrate: float | None = None,
110        sample_rate: float | int | None = None,
111        audio_channels: int | None = None,
112        keep_original_audio: bool = False,
113        pre_filter: str | None = None,
114        post_filter: str | None = None,
115        video_codec: str = "copy",
116        video_disable: bool = False,
117        subtitle_disable: bool = False,
118        metadata_disable: bool = False,
119        chapters_disable: bool = False,
120        extra_input_options: list[str] | None = None,
121        extra_output_options: list[str] | None = None,
122        output_format: str | None = None,
123        extension: str = "mkv",
124        dry_run: bool = False,
125        debug: bool = False,
126        progress: bool = False,
127        replaygain: bool = False,
128    ):
129        self.ffmpeg_exe = get_ffmpeg_exe()
130        self.has_loudnorm_capabilities = ffmpeg_has_loudnorm()
131
132        if normalization_type not in NORMALIZATION_TYPES:
133            raise FFmpegNormalizeError(
134                "Normalization type must be: 'ebu', 'rms', or 'peak'"
135            )
136        self.normalization_type = normalization_type
137
138        if not self.has_loudnorm_capabilities and self.normalization_type == "ebu":
139            raise FFmpegNormalizeError(
140                "Your ffmpeg does not support the 'loudnorm' EBU R128 filter. "
141                "Please install ffmpeg v4.2 or above, or choose another normalization type."
142            )
143
144        if self.normalization_type == "ebu":
145            self.target_level = check_range(target_level, -70, -5, name="target_level")
146        else:
147            self.target_level = check_range(target_level, -99, 0, name="target_level")
148
149        self.print_stats = print_stats
150
151        # self.threshold = float(threshold)
152
153        self.loudness_range_target = check_range(
154            loudness_range_target, 1, 50, name="loudness_range_target"
155        )
156
157        self.keep_loudness_range_target = keep_loudness_range_target
158
159        if self.keep_loudness_range_target and loudness_range_target != 7.0:
160            _logger.warning(
161                "Setting --keep-loudness-range-target will override your set loudness range target value! "
162                "Remove --keep-loudness-range-target or remove the --lrt/--loudness-range-target option."
163            )
164
165        self.keep_lra_above_loudness_range_target = keep_lra_above_loudness_range_target
166
167        if (
168            self.keep_loudness_range_target
169            and self.keep_lra_above_loudness_range_target
170        ):
171            raise FFmpegNormalizeError(
172                "Options --keep-loudness-range-target and --keep-lra-above-loudness-range-target are mutually exclusive! "
173                "Please choose just one of the two options."
174            )
175
176        self.true_peak = check_range(true_peak, -9, 0, name="true_peak")
177        self.offset = check_range(offset, -99, 99, name="offset")
178        self.lower_only = lower_only
179        self.auto_lower_loudness_target = auto_lower_loudness_target
180
181        # Ensure library user is passing correct types
182        assert isinstance(dual_mono, bool), "dual_mono must be bool"
183        assert isinstance(dynamic, bool), "dynamic must be bool"
184
185        self.dual_mono = dual_mono
186        self.dynamic = dynamic
187        self.sample_rate = None if sample_rate is None else int(sample_rate)
188        self.audio_channels = None if audio_channels is None else int(audio_channels)
189
190        self.audio_codec = audio_codec
191        self.audio_bitrate = audio_bitrate
192        self.keep_original_audio = keep_original_audio
193        self.video_codec = video_codec
194        self.video_disable = video_disable
195        self.subtitle_disable = subtitle_disable
196        self.metadata_disable = metadata_disable
197        self.chapters_disable = chapters_disable
198
199        self.extra_input_options = extra_input_options
200        self.extra_output_options = extra_output_options
201        self.pre_filter = pre_filter
202        self.post_filter = post_filter
203
204        self.output_format = output_format
205        self.extension = extension
206        self.dry_run = dry_run
207        self.debug = debug
208        self.progress = progress
209        self.replaygain = replaygain
210
211        if (
212            self.audio_codec is None or "pcm" in self.audio_codec
213        ) and self.output_format in PCM_INCOMPATIBLE_FORMATS:
214            raise FFmpegNormalizeError(
215                f"Output format {self.output_format} does not support PCM audio. "
216                "Please choose a suitable audio codec with the -c:a option."
217            )
218
219        # replaygain only works for EBU for now
220        if self.replaygain and self.normalization_type != "ebu":
221            raise FFmpegNormalizeError(
222                "ReplayGain only works for EBU normalization type for now."
223            )
224
225        self.stats: list[LoudnessStatisticsWithMetadata] = []
226        self.media_files: list[MediaFile] = []
227        self.file_count = 0
ffmpeg_exe
has_loudnorm_capabilities
normalization_type
print_stats
loudness_range_target
keep_loudness_range_target
keep_lra_above_loudness_range_target
true_peak
offset
lower_only
auto_lower_loudness_target
dual_mono
dynamic
sample_rate
audio_channels
audio_codec
audio_bitrate
keep_original_audio
video_codec
video_disable
subtitle_disable
metadata_disable
chapters_disable
extra_input_options
extra_output_options
pre_filter
post_filter
output_format
extension
dry_run
debug
progress
replaygain
stats: list[ffmpeg_normalize._streams.LoudnessStatisticsWithMetadata]
media_files: list[MediaFile]
file_count
def add_media_file(self, input_file: str, output_file: str) -> None:
229    def add_media_file(self, input_file: str, output_file: str) -> None:
230        """
231        Add a media file to normalize
232
233        Args:
234            input_file (str): Path to input file
235            output_file (str): Path to output file
236        """
237        if not os.path.exists(input_file):
238            raise FFmpegNormalizeError(f"file {input_file} does not exist")
239
240        ext = os.path.splitext(output_file)[1][1:]
241        if (
242            self.audio_codec is None or "pcm" in self.audio_codec
243        ) and ext in PCM_INCOMPATIBLE_EXTS:
244            raise FFmpegNormalizeError(
245                f"Output extension {ext} does not support PCM audio. "
246                "Please choose a suitable audio codec with the -c:a option."
247            )
248
249        self.media_files.append(MediaFile(self, input_file, output_file))
250        self.file_count += 1

Add a media file to normalize

Arguments:
  • input_file (str): Path to input file
  • output_file (str): Path to output file
def run_normalization(self) -> None:
252    def run_normalization(self) -> None:
253        """
254        Run the normalization procedures
255        """
256        for index, media_file in enumerate(
257            tqdm(self.media_files, desc="File", disable=not self.progress, position=0)
258        ):
259            _logger.info(
260                f"Normalizing file {media_file} ({index + 1} of {self.file_count})"
261            )
262
263            try:
264                media_file.run_normalization()
265            except Exception as e:
266                if len(self.media_files) > 1:
267                    # simply warn and do not die
268                    _logger.error(
269                        f"Error processing input file {media_file}, will "
270                        f"continue batch-processing. Error was: {e}"
271                    )
272                else:
273                    # raise the error so the program will exit
274                    raise e
275
276        if self.print_stats:
277            json.dump(
278                list(
279                    chain.from_iterable(
280                        media_file.get_stats() for media_file in self.media_files
281                    )
282                ),
283                sys.stdout,
284                indent=4,
285            )
286            print()

Run the normalization procedures

class FFmpegNormalizeError(builtins.Exception):
2class FFmpegNormalizeError(Exception):
3    pass

Common base class for all non-exit exceptions.

class MediaFile:
 56class MediaFile:
 57    """
 58    Class that holds a file, its streams and adjustments
 59    """
 60
 61    def __init__(
 62        self, ffmpeg_normalize: FFmpegNormalize, input_file: str, output_file: str
 63    ):
 64        """
 65        Initialize a media file for later normalization by parsing the streams.
 66
 67        Args:
 68            ffmpeg_normalize (FFmpegNormalize): reference to overall settings
 69            input_file (str): Path to input file
 70            output_file (str): Path to output file
 71        """
 72        self.ffmpeg_normalize = ffmpeg_normalize
 73        self.skip = False
 74        self.input_file = input_file
 75        self.output_file = output_file
 76        current_ext = os.path.splitext(output_file)[1][1:]
 77        # we need to check if it's empty, e.g. /dev/null or NUL
 78        if current_ext == "" or self.output_file == os.devnull:
 79            _logger.debug(
 80                f"Current extension is unset, or output file is a null device, using extension: {self.ffmpeg_normalize.extension}"
 81            )
 82            self.output_ext = self.ffmpeg_normalize.extension
 83        else:
 84            _logger.debug(
 85                f"Current extension is set from output file, using extension: {current_ext}"
 86            )
 87            self.output_ext = current_ext
 88        self.streams: StreamDict = {"audio": {}, "video": {}, "subtitle": {}}
 89        self.temp_file: Union[str, None] = None
 90
 91        self.parse_streams()
 92
 93    def _stream_ids(self) -> list[int]:
 94        """
 95        Get all stream IDs of this file.
 96
 97        Returns:
 98            list: List of stream IDs
 99        """
100        return (
101            list(self.streams["audio"].keys())
102            + list(self.streams["video"].keys())
103            + list(self.streams["subtitle"].keys())
104        )
105
106    def __repr__(self) -> str:
107        return os.path.basename(self.input_file)
108
109    def parse_streams(self) -> None:
110        """
111        Try to parse all input streams from file and set them in self.streams.
112
113        Raises:
114            FFmpegNormalizeError: If no audio streams are found
115        """
116        _logger.debug(f"Parsing streams of {self.input_file}")
117
118        cmd = [
119            self.ffmpeg_normalize.ffmpeg_exe,
120            "-i",
121            self.input_file,
122            "-c",
123            "copy",
124            "-t",
125            "0",
126            "-map",
127            "0",
128            "-f",
129            "null",
130            os.devnull,
131        ]
132
133        output = CommandRunner().run_command(cmd).get_output()
134
135        _logger.debug("Stream parsing command output:")
136        _logger.debug(output)
137
138        output_lines = [line.strip() for line in output.split("\n")]
139
140        duration = None
141        for line in output_lines:
142            if "Duration" in line:
143                if duration_search := DUR_REGEX.search(line):
144                    duration = _to_ms(**duration_search.groupdict()) / 1000
145                    _logger.debug(f"Found duration: {duration} s")
146                else:
147                    _logger.warning("Could not extract duration from input file!")
148
149            if not line.startswith("Stream"):
150                continue
151
152            if stream_id_match := re.search(r"#0:([\d]+)", line):
153                stream_id = int(stream_id_match.group(1))
154                if stream_id in self._stream_ids():
155                    continue
156            else:
157                continue
158
159            if "Audio" in line:
160                _logger.debug(f"Found audio stream at index {stream_id}")
161                sample_rate_match = re.search(r"(\d+) Hz", line)
162                sample_rate = (
163                    int(sample_rate_match.group(1)) if sample_rate_match else None
164                )
165                bit_depth_match = re.search(r"[sfu](\d+)(p|le|be)?", line)
166                bit_depth = int(bit_depth_match.group(1)) if bit_depth_match else None
167                self.streams["audio"][stream_id] = AudioStream(
168                    self.ffmpeg_normalize,
169                    self,
170                    stream_id,
171                    sample_rate,
172                    bit_depth,
173                    duration,
174                )
175
176            elif "Video" in line:
177                _logger.debug(f"Found video stream at index {stream_id}")
178                self.streams["video"][stream_id] = VideoStream(
179                    self.ffmpeg_normalize, self, stream_id
180                )
181
182            elif "Subtitle" in line:
183                _logger.debug(f"Found subtitle stream at index {stream_id}")
184                self.streams["subtitle"][stream_id] = SubtitleStream(
185                    self.ffmpeg_normalize, self, stream_id
186                )
187
188        if not self.streams["audio"]:
189            raise FFmpegNormalizeError(
190                f"Input file {self.input_file} does not contain any audio streams"
191            )
192
193        if (
194            self.output_ext.lower() in ONE_STREAM
195            and len(self.streams["audio"].values()) > 1
196        ):
197            _logger.warning(
198                "Output file only supports one stream. Keeping only first audio stream."
199            )
200            first_stream = list(self.streams["audio"].values())[0]
201            self.streams["audio"] = {first_stream.stream_id: first_stream}
202            self.streams["video"] = {}
203            self.streams["subtitle"] = {}
204
205    def run_normalization(self) -> None:
206        """
207        Run the normalization process for this file.
208        """
209        _logger.debug(f"Running normalization for {self.input_file}")
210
211        # run the first pass to get loudness stats, unless in dynamic EBU mode
212        if not (
213            self.ffmpeg_normalize.dynamic
214            and self.ffmpeg_normalize.normalization_type == "ebu"
215        ):
216            self._first_pass()
217        else:
218            _logger.debug(
219                "Dynamic EBU mode: First pass will not run, as it is not needed."
220            )
221
222        # for second pass, create a temp file
223        temp_dir = mkdtemp()
224        self.temp_file = os.path.join(temp_dir, f"out.{self.output_ext}")
225
226        if self.ffmpeg_normalize.replaygain:
227            _logger.debug(
228                "ReplayGain mode: Second pass will run with temporary file to get stats."
229            )
230            self.output_file = self.temp_file
231
232        # run the second pass as a whole.
233        if self.ffmpeg_normalize.progress:
234            with tqdm(
235                total=100,
236                position=1,
237                desc="Second Pass",
238                bar_format=TQDM_BAR_FORMAT,
239            ) as pbar:
240                for progress in self._second_pass():
241                    pbar.update(progress - pbar.n)
242        else:
243            for _ in self._second_pass():
244                pass
245
246        # remove temp dir; this will remove the temp file as well if it has not been renamed (e.g. for replaygain)
247        if os.path.exists(temp_dir):
248            rmtree(temp_dir, ignore_errors=True)
249
250        # This will use stats from ebu_pass2 if available (from the main second pass),
251        # or fall back to ebu_pass1.
252        if self.ffmpeg_normalize.replaygain:
253            _logger.debug(
254                "ReplayGain tagging is enabled. Proceeding with tag calculation/application."
255            )
256            self._run_replaygain()
257
258        if not self.ffmpeg_normalize.replaygain:
259            _logger.info(f"Normalized file written to {self.output_file}")
260
261    def _run_replaygain(self) -> None:
262        """
263        Run the replaygain process for this file.
264        """
265        _logger.debug(f"Running replaygain for {self.input_file}")
266
267        # get the audio streams
268        audio_streams = list(self.streams["audio"].values())
269
270        # Attempt to use EBU pass 2 statistics, which account for pre-filters.
271        # These are populated by the main second pass if it runs (not a dry run)
272        # and normalization_type is 'ebu'.
273        loudness_stats_source = "ebu_pass2"
274        loudnorm_stats = audio_streams[0].loudness_statistics.get("ebu_pass2")
275
276        if loudnorm_stats is None:
277            _logger.warning(
278                "ReplayGain: Second pass EBU statistics (ebu_pass2) not found. "
279                "Falling back to first pass EBU statistics (ebu_pass1). "
280                "This may not account for pre-filters if any are used."
281            )
282            loudness_stats_source = "ebu_pass1"
283            loudnorm_stats = audio_streams[0].loudness_statistics.get("ebu_pass1")
284
285        if loudnorm_stats is None:
286            _logger.error(
287                f"ReplayGain: No loudness statistics available from {loudness_stats_source} (and fallback) for stream 0. "
288                "Cannot calculate ReplayGain tags."
289            )
290            return
291
292        _logger.debug(
293            f"Using statistics from {loudness_stats_source} for ReplayGain calculation."
294        )
295
296        # apply the replaygain tag from the first audio stream (to all audio streams)
297        if len(audio_streams) > 1:
298            _logger.warning(
299                f"Your input file has {len(audio_streams)} audio streams. "
300                "Only the first audio stream's replaygain tag will be applied. "
301                "All audio streams will receive the same tag."
302            )
303
304        target_level = self.ffmpeg_normalize.target_level
305        # Use 'input_i' and 'input_tp' from the chosen stats.
306        # For ebu_pass2, these are measurements *after* pre-filter but *before* loudnorm adjustment.
307        input_i = loudnorm_stats.get("input_i")
308        input_tp = loudnorm_stats.get("input_tp")
309
310        if input_i is None or input_tp is None:
311            _logger.error(
312                f"ReplayGain: 'input_i' or 'input_tp' missing from {loudness_stats_source} statistics. "
313                "Cannot calculate ReplayGain tags."
314            )
315            return
316
317        track_gain = -(input_i - target_level)  # dB
318        track_peak = 10 ** (input_tp / 20)  # linear scale
319
320        _logger.debug(f"Calculated Track gain: {track_gain:.2f} dB")
321        _logger.debug(f"Calculated Track peak: {track_peak:.2f}")
322
323        if not self.ffmpeg_normalize.dry_run:  # This uses the overall dry_run state
324            self._write_replaygain_tags(track_gain, track_peak)
325        else:
326            _logger.warning(
327                "Overall dry_run is enabled, not actually writing ReplayGain tags to the file. "
328                "Tag calculation based on available stats was performed."
329            )
330
331    def _write_replaygain_tags(self, track_gain: float, track_peak: float) -> None:
332        """
333        Write the replaygain tags to the input file.
334
335        This is based on the code from bohning/usdb_syncer, licensed under the MIT license.
336        See: https://github.com/bohning/usdb_syncer/blob/2fa638c4f487dffe9f5364f91e156ba54cb20233/src/usdb_syncer/resource_dl.py
337        """
338        _logger.debug(f"Writing ReplayGain tags to {self.input_file}")
339
340        input_file_ext = os.path.splitext(self.input_file)[1]
341        if input_file_ext == ".mp3":
342            mp3 = MP3(self.input_file, ID3=ID3)
343            if not mp3.tags:
344                return
345            mp3.tags.add(
346                TXXX(desc="REPLAYGAIN_TRACK_GAIN", text=[f"{track_gain:.2f} dB"])
347            )
348            mp3.tags.add(TXXX(desc="REPLAYGAIN_TRACK_PEAK", text=[f"{track_peak:.6f}"]))
349            mp3.save()
350        elif input_file_ext in [".mp4", ".m4a", ".m4v", ".mov"]:
351            mp4 = MP4(self.input_file)
352            if not mp4.tags:
353                mp4.add_tags()
354            if not mp4.tags:
355                return
356            mp4.tags["----:com.apple.iTunes:REPLAYGAIN_TRACK_GAIN"] = [
357                f"{track_gain:.2f} dB".encode()
358            ]
359            mp4.tags["----:com.apple.iTunes:REPLAYGAIN_TRACK_PEAK"] = [
360                f"{track_peak:.6f}".encode()
361            ]
362            mp4.save()
363        elif input_file_ext == ".ogg":
364            ogg = OggVorbis(self.input_file)
365            ogg["REPLAYGAIN_TRACK_GAIN"] = [f"{track_gain:.2f} dB"]
366            ogg["REPLAYGAIN_TRACK_PEAK"] = [f"{track_peak:.6f}"]
367            ogg.save()
368        elif input_file_ext == ".opus":
369            opus = OggOpus(self.input_file)
370            # See https://datatracker.ietf.org/doc/html/rfc7845#section-5.2.1
371            opus["R128_TRACK_GAIN"] = [str(round(256 * track_gain))]
372            opus.save()
373        else:
374            _logger.error(
375                f"Unsupported input file extension: {input_file_ext} for writing replaygain tags. "
376                "Only .mp3, .mp4/.m4a, .ogg, .opus are supported. "
377                "If you think this should support more formats, please let me know at "
378                "https://github.com/slhck/ffmpeg-normalize/issues"
379            )
380            return
381
382        _logger.info(
383            f"Successfully wrote replaygain tags to input file {self.input_file}"
384        )
385
386    def _can_write_output_video(self) -> bool:
387        """
388        Determine whether the output file can contain video at all.
389
390        Returns:
391            bool: True if the output file can contain video, False otherwise
392        """
393        if self.output_ext.lower() in AUDIO_ONLY_FORMATS:
394            return False
395
396        return not self.ffmpeg_normalize.video_disable
397
398    def _first_pass(self) -> None:
399        """
400        Run the first pass of the normalization process.
401        """
402        _logger.debug(f"Parsing normalization info for {self.input_file}")
403
404        for index, audio_stream in enumerate(self.streams["audio"].values()):
405            if self.ffmpeg_normalize.normalization_type == "ebu":
406                fun = getattr(audio_stream, "parse_loudnorm_stats")
407            else:
408                fun = getattr(audio_stream, "parse_astats")
409
410            if self.ffmpeg_normalize.progress:
411                with tqdm(
412                    total=100,
413                    position=1,
414                    desc=f"Stream {index + 1}/{len(self.streams['audio'].values())}",
415                    bar_format=TQDM_BAR_FORMAT,
416                ) as pbar:
417                    for progress in fun():
418                        pbar.update(progress - pbar.n)
419            else:
420                for _ in fun():
421                    pass
422
423    def _get_audio_filter_cmd(self) -> tuple[str, list[str]]:
424        """
425        Return the audio filter command and output labels needed.
426
427        Returns:
428            tuple[str, list[str]]: filter_complex command and the required output labels
429        """
430        filter_chains = []
431        output_labels = []
432
433        for audio_stream in self.streams["audio"].values():
434            skip_normalization = False
435            if self.ffmpeg_normalize.lower_only:
436                if self.ffmpeg_normalize.normalization_type == "ebu":
437                    if (
438                        audio_stream.loudness_statistics["ebu_pass1"] is not None
439                        and audio_stream.loudness_statistics["ebu_pass1"]["input_i"]
440                        < self.ffmpeg_normalize.target_level
441                    ):
442                        skip_normalization = True
443                elif self.ffmpeg_normalize.normalization_type == "peak":
444                    if (
445                        audio_stream.loudness_statistics["max"] is not None
446                        and audio_stream.loudness_statistics["max"]
447                        < self.ffmpeg_normalize.target_level
448                    ):
449                        skip_normalization = True
450                elif self.ffmpeg_normalize.normalization_type == "rms":
451                    if (
452                        audio_stream.loudness_statistics["mean"] is not None
453                        and audio_stream.loudness_statistics["mean"]
454                        < self.ffmpeg_normalize.target_level
455                    ):
456                        skip_normalization = True
457
458            if skip_normalization:
459                _logger.warning(
460                    f"Stream {audio_stream.stream_id} had measured input loudness lower than target, skipping normalization."
461                )
462                normalization_filter = "acopy"
463            else:
464                if self.ffmpeg_normalize.normalization_type == "ebu":
465                    normalization_filter = audio_stream.get_second_pass_opts_ebu()
466                else:
467                    normalization_filter = audio_stream.get_second_pass_opts_peakrms()
468
469            input_label = f"[0:{audio_stream.stream_id}]"
470            output_label = f"[norm{audio_stream.stream_id}]"
471            output_labels.append(output_label)
472
473            filter_chain = []
474
475            if self.ffmpeg_normalize.pre_filter:
476                filter_chain.append(self.ffmpeg_normalize.pre_filter)
477
478            filter_chain.append(normalization_filter)
479
480            if self.ffmpeg_normalize.post_filter:
481                filter_chain.append(self.ffmpeg_normalize.post_filter)
482
483            filter_chains.append(input_label + ",".join(filter_chain) + output_label)
484
485        filter_complex_cmd = ";".join(filter_chains)
486
487        return filter_complex_cmd, output_labels
488
489    def _second_pass(self) -> Iterator[float]:
490        """
491        Construct the second pass command and run it.
492
493        FIXME: make this method simpler
494        """
495        _logger.info(f"Running second pass for {self.input_file}")
496
497        # get the target output stream types depending on the options
498        output_stream_types: list[Literal["audio", "video", "subtitle"]] = ["audio"]
499        if self._can_write_output_video():
500            output_stream_types.append("video")
501        if not self.ffmpeg_normalize.subtitle_disable:
502            output_stream_types.append("subtitle")
503
504        # base command, here we will add all other options
505        cmd = [self.ffmpeg_normalize.ffmpeg_exe, "-hide_banner", "-y"]
506
507        # extra options (if any)
508        if self.ffmpeg_normalize.extra_input_options:
509            cmd.extend(self.ffmpeg_normalize.extra_input_options)
510
511        # get complex filter command
512        audio_filter_cmd, output_labels = self._get_audio_filter_cmd()
513
514        # add input file and basic filter
515        cmd.extend(["-i", self.input_file, "-filter_complex", audio_filter_cmd])
516
517        # map metadata, only if needed
518        if self.ffmpeg_normalize.metadata_disable:
519            cmd.extend(["-map_metadata", "-1"])
520        else:
521            # map global metadata
522            cmd.extend(["-map_metadata", "0"])
523            # map per-stream metadata (e.g. language tags)
524            for stream_type in output_stream_types:
525                stream_key = stream_type[0]
526                if stream_type not in self.streams:
527                    continue
528                for idx, _ in enumerate(self.streams[stream_type].items()):
529                    cmd.extend(
530                        [
531                            f"-map_metadata:s:{stream_key}:{idx}",
532                            f"0:s:{stream_key}:{idx}",
533                        ]
534                    )
535
536        # map chapters if needed
537        if self.ffmpeg_normalize.chapters_disable:
538            cmd.extend(["-map_chapters", "-1"])
539        else:
540            cmd.extend(["-map_chapters", "0"])
541
542        # collect all '-map' and codecs needed for output video based on input video
543        if self.streams["video"]:
544            if self._can_write_output_video():
545                for s in self.streams["video"].keys():
546                    cmd.extend(["-map", f"0:{s}"])
547                # set codec (copy by default)
548                cmd.extend(["-c:v", self.ffmpeg_normalize.video_codec])
549            else:
550                if not self.ffmpeg_normalize.video_disable:
551                    _logger.warning(
552                        f"The chosen output extension {self.output_ext} does not support video/cover art. It will be disabled."
553                    )
554
555        # ... and map the output of the normalization filters
556        for ol in output_labels:
557            cmd.extend(["-map", ol])
558
559        # set audio codec (never copy)
560        if self.ffmpeg_normalize.audio_codec:
561            cmd.extend(["-c:a", self.ffmpeg_normalize.audio_codec])
562        else:
563            for index, (_, audio_stream) in enumerate(self.streams["audio"].items()):
564                cmd.extend([f"-c:a:{index}", audio_stream.get_pcm_codec()])
565
566        # other audio options (if any)
567        if self.ffmpeg_normalize.audio_bitrate:
568            if self.ffmpeg_normalize.audio_codec == "libvorbis":
569                # libvorbis takes just a "-b" option, for some reason
570                # https://github.com/slhck/ffmpeg-normalize/issues/277
571                cmd.extend(["-b", str(self.ffmpeg_normalize.audio_bitrate)])
572            else:
573                cmd.extend(["-b:a", str(self.ffmpeg_normalize.audio_bitrate)])
574        if self.ffmpeg_normalize.sample_rate:
575            cmd.extend(["-ar", str(self.ffmpeg_normalize.sample_rate)])
576        if self.ffmpeg_normalize.audio_channels:
577            cmd.extend(["-ac", str(self.ffmpeg_normalize.audio_channels)])
578
579        # ... and subtitles
580        if not self.ffmpeg_normalize.subtitle_disable:
581            for s in self.streams["subtitle"].keys():
582                cmd.extend(["-map", f"0:{s}"])
583            # copy subtitles
584            cmd.extend(["-c:s", "copy"])
585
586        if self.ffmpeg_normalize.keep_original_audio:
587            highest_index = len(self.streams["audio"])
588            for index, _ in enumerate(self.streams["audio"].items()):
589                cmd.extend(["-map", f"0:a:{index}"])
590                cmd.extend([f"-c:a:{highest_index + index}", "copy"])
591
592        # extra options (if any)
593        if self.ffmpeg_normalize.extra_output_options:
594            cmd.extend(self.ffmpeg_normalize.extra_output_options)
595
596        # output format (if any)
597        if self.ffmpeg_normalize.output_format:
598            cmd.extend(["-f", self.ffmpeg_normalize.output_format])
599
600        # if dry run, only show sample command
601        if self.ffmpeg_normalize.dry_run:
602            cmd.append(self.output_file)
603            _logger.warning("Dry run used, not actually running second-pass command")
604            CommandRunner(dry=True).run_command(cmd)
605            yield 100
606            return
607
608        # track temp_dir for cleanup
609        temp_dir = None
610        temp_file = None
611
612        # special case: if output is a null device, write directly to it
613        if self.output_file == os.devnull:
614            cmd.append(self.output_file)
615        else:
616            temp_dir = mkdtemp()
617            temp_file = os.path.join(temp_dir, f"out.{self.output_ext}")
618            cmd.append(temp_file)
619
620        cmd_runner = CommandRunner()
621        try:
622            yield from cmd_runner.run_ffmpeg_command(cmd)
623        except Exception as e:
624            _logger.error(f"Error while running command {shlex.join(cmd)}! Error: {e}")
625            raise e
626        else:
627            # only move the temp file if it's not a null device and ReplayGain is not enabled!
628            if self.output_file != os.devnull and temp_file and not self.ffmpeg_normalize.replaygain:
629                _logger.debug(
630                    f"Moving temporary file from {temp_file} to {self.output_file}"
631                )
632                move(temp_file, self.output_file)
633        finally:
634            # clean up temp directory if it was created
635            if temp_dir and os.path.exists(temp_dir):
636                rmtree(temp_dir, ignore_errors=True)
637
638        output = cmd_runner.get_output()
639        # in the second pass, we do not normalize stream-by-stream, so we set the stats based on the
640        # overall output (which includes multiple loudnorm stats)
641        if self.ffmpeg_normalize.normalization_type == "ebu":
642            ebu_pass_2_stats = list(
643                AudioStream.prune_and_parse_loudnorm_output(output).values()
644            )
645            for idx, audio_stream in enumerate(self.streams["audio"].values()):
646                audio_stream.set_second_pass_stats(ebu_pass_2_stats[idx])
647
648        # warn if self.media_file.ffmpeg_normalize.dynamic == False and any of the second pass stats contain "normalization_type" == "dynamic"
649        if self.ffmpeg_normalize.dynamic is False:
650            for audio_stream in self.streams["audio"].values():
651                pass2_stats = audio_stream.get_stats()["ebu_pass2"]
652                if pass2_stats is None:
653                    continue
654                if pass2_stats["normalization_type"] == "dynamic":
655                    _logger.warning(
656                        "You specified linear normalization, but the loudnorm filter reverted to dynamic normalization. "
657                        "This may lead to unexpected results."
658                        "Consider your input settings, e.g. choose a lower target level or higher target loudness range."
659                    )
660
661        _logger.debug("Normalization finished")
662
663    def get_stats(self) -> Iterable[LoudnessStatisticsWithMetadata]:
664        return (
665            audio_stream.get_stats() for audio_stream in self.streams["audio"].values()
666        )

Class that holds a file, its streams and adjustments

MediaFile( ffmpeg_normalize: FFmpegNormalize, input_file: str, output_file: str)
61    def __init__(
62        self, ffmpeg_normalize: FFmpegNormalize, input_file: str, output_file: str
63    ):
64        """
65        Initialize a media file for later normalization by parsing the streams.
66
67        Args:
68            ffmpeg_normalize (FFmpegNormalize): reference to overall settings
69            input_file (str): Path to input file
70            output_file (str): Path to output file
71        """
72        self.ffmpeg_normalize = ffmpeg_normalize
73        self.skip = False
74        self.input_file = input_file
75        self.output_file = output_file
76        current_ext = os.path.splitext(output_file)[1][1:]
77        # we need to check if it's empty, e.g. /dev/null or NUL
78        if current_ext == "" or self.output_file == os.devnull:
79            _logger.debug(
80                f"Current extension is unset, or output file is a null device, using extension: {self.ffmpeg_normalize.extension}"
81            )
82            self.output_ext = self.ffmpeg_normalize.extension
83        else:
84            _logger.debug(
85                f"Current extension is set from output file, using extension: {current_ext}"
86            )
87            self.output_ext = current_ext
88        self.streams: StreamDict = {"audio": {}, "video": {}, "subtitle": {}}
89        self.temp_file: Union[str, None] = None
90
91        self.parse_streams()

Initialize a media file for later normalization by parsing the streams.

Arguments:
  • ffmpeg_normalize (FFmpegNormalize): reference to overall settings
  • input_file (str): Path to input file
  • output_file (str): Path to output file
ffmpeg_normalize
skip
input_file
output_file
streams: ffmpeg_normalize._media_file.StreamDict
temp_file: Optional[str]
def parse_streams(self) -> None:
109    def parse_streams(self) -> None:
110        """
111        Try to parse all input streams from file and set them in self.streams.
112
113        Raises:
114            FFmpegNormalizeError: If no audio streams are found
115        """
116        _logger.debug(f"Parsing streams of {self.input_file}")
117
118        cmd = [
119            self.ffmpeg_normalize.ffmpeg_exe,
120            "-i",
121            self.input_file,
122            "-c",
123            "copy",
124            "-t",
125            "0",
126            "-map",
127            "0",
128            "-f",
129            "null",
130            os.devnull,
131        ]
132
133        output = CommandRunner().run_command(cmd).get_output()
134
135        _logger.debug("Stream parsing command output:")
136        _logger.debug(output)
137
138        output_lines = [line.strip() for line in output.split("\n")]
139
140        duration = None
141        for line in output_lines:
142            if "Duration" in line:
143                if duration_search := DUR_REGEX.search(line):
144                    duration = _to_ms(**duration_search.groupdict()) / 1000
145                    _logger.debug(f"Found duration: {duration} s")
146                else:
147                    _logger.warning("Could not extract duration from input file!")
148
149            if not line.startswith("Stream"):
150                continue
151
152            if stream_id_match := re.search(r"#0:([\d]+)", line):
153                stream_id = int(stream_id_match.group(1))
154                if stream_id in self._stream_ids():
155                    continue
156            else:
157                continue
158
159            if "Audio" in line:
160                _logger.debug(f"Found audio stream at index {stream_id}")
161                sample_rate_match = re.search(r"(\d+) Hz", line)
162                sample_rate = (
163                    int(sample_rate_match.group(1)) if sample_rate_match else None
164                )
165                bit_depth_match = re.search(r"[sfu](\d+)(p|le|be)?", line)
166                bit_depth = int(bit_depth_match.group(1)) if bit_depth_match else None
167                self.streams["audio"][stream_id] = AudioStream(
168                    self.ffmpeg_normalize,
169                    self,
170                    stream_id,
171                    sample_rate,
172                    bit_depth,
173                    duration,
174                )
175
176            elif "Video" in line:
177                _logger.debug(f"Found video stream at index {stream_id}")
178                self.streams["video"][stream_id] = VideoStream(
179                    self.ffmpeg_normalize, self, stream_id
180                )
181
182            elif "Subtitle" in line:
183                _logger.debug(f"Found subtitle stream at index {stream_id}")
184                self.streams["subtitle"][stream_id] = SubtitleStream(
185                    self.ffmpeg_normalize, self, stream_id
186                )
187
188        if not self.streams["audio"]:
189            raise FFmpegNormalizeError(
190                f"Input file {self.input_file} does not contain any audio streams"
191            )
192
193        if (
194            self.output_ext.lower() in ONE_STREAM
195            and len(self.streams["audio"].values()) > 1
196        ):
197            _logger.warning(
198                "Output file only supports one stream. Keeping only first audio stream."
199            )
200            first_stream = list(self.streams["audio"].values())[0]
201            self.streams["audio"] = {first_stream.stream_id: first_stream}
202            self.streams["video"] = {}
203            self.streams["subtitle"] = {}

Try to parse all input streams from file and set them in self.streams.

Raises:
  • FFmpegNormalizeError: If no audio streams are found
def run_normalization(self) -> None:
205    def run_normalization(self) -> None:
206        """
207        Run the normalization process for this file.
208        """
209        _logger.debug(f"Running normalization for {self.input_file}")
210
211        # run the first pass to get loudness stats, unless in dynamic EBU mode
212        if not (
213            self.ffmpeg_normalize.dynamic
214            and self.ffmpeg_normalize.normalization_type == "ebu"
215        ):
216            self._first_pass()
217        else:
218            _logger.debug(
219                "Dynamic EBU mode: First pass will not run, as it is not needed."
220            )
221
222        # for second pass, create a temp file
223        temp_dir = mkdtemp()
224        self.temp_file = os.path.join(temp_dir, f"out.{self.output_ext}")
225
226        if self.ffmpeg_normalize.replaygain:
227            _logger.debug(
228                "ReplayGain mode: Second pass will run with temporary file to get stats."
229            )
230            self.output_file = self.temp_file
231
232        # run the second pass as a whole.
233        if self.ffmpeg_normalize.progress:
234            with tqdm(
235                total=100,
236                position=1,
237                desc="Second Pass",
238                bar_format=TQDM_BAR_FORMAT,
239            ) as pbar:
240                for progress in self._second_pass():
241                    pbar.update(progress - pbar.n)
242        else:
243            for _ in self._second_pass():
244                pass
245
246        # remove temp dir; this will remove the temp file as well if it has not been renamed (e.g. for replaygain)
247        if os.path.exists(temp_dir):
248            rmtree(temp_dir, ignore_errors=True)
249
250        # This will use stats from ebu_pass2 if available (from the main second pass),
251        # or fall back to ebu_pass1.
252        if self.ffmpeg_normalize.replaygain:
253            _logger.debug(
254                "ReplayGain tagging is enabled. Proceeding with tag calculation/application."
255            )
256            self._run_replaygain()
257
258        if not self.ffmpeg_normalize.replaygain:
259            _logger.info(f"Normalized file written to {self.output_file}")

Run the normalization process for this file.

def get_stats( self) -> Iterable[ffmpeg_normalize._streams.LoudnessStatisticsWithMetadata]:
663    def get_stats(self) -> Iterable[LoudnessStatisticsWithMetadata]:
664        return (
665            audio_stream.get_stats() for audio_stream in self.streams["audio"].values()
666        )
class AudioStream(ffmpeg_normalize.MediaStream):
 94class AudioStream(MediaStream):
 95    def __init__(
 96        self,
 97        ffmpeg_normalize: FFmpegNormalize,
 98        media_file: MediaFile,
 99        stream_id: int,
100        sample_rate: int | None,
101        bit_depth: int | None,
102        duration: float | None,
103    ):
104        """
105        Create an AudioStream object.
106
107        Args:
108            ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
109            media_file (MediaFile): The MediaFile object.
110            stream_id (int): The stream ID.
111            sample_rate (int): sample rate in Hz
112            bit_depth (int): bit depth in bits
113            duration (float): duration in seconds
114        """
115        super().__init__(ffmpeg_normalize, media_file, "audio", stream_id)
116
117        self.loudness_statistics: LoudnessStatistics = {
118            "ebu_pass1": None,
119            "ebu_pass2": None,
120            "mean": None,
121            "max": None,
122        }
123
124        self.sample_rate = sample_rate
125        self.bit_depth = bit_depth
126
127        self.duration = duration
128
129    @staticmethod
130    def _constrain(
131        number: float, min_range: float, max_range: float, name: str | None = None
132    ) -> float:
133        """
134        Constrain a number between two values.
135
136        Args:
137            number (float): The number to constrain.
138            min_range (float): The minimum value.
139            max_range (float): The maximum value.
140            name (str): The name of the number (for logging).
141
142        Returns:
143            float: The constrained number.
144
145        Raises:
146            ValueError: If min_range is greater than max_range.
147        """
148        if min_range > max_range:
149            raise ValueError("min must be smaller than max")
150        result = max(min(number, max_range), min_range)
151        if result != number and name is not None:
152            _logger.warning(
153                f"Constraining {name} to range of [{min_range}, {max_range}]: {number} -> {result}"
154            )
155        return result
156
157    def get_stats(self) -> LoudnessStatisticsWithMetadata:
158        """
159        Return loudness statistics for the stream.
160
161        Returns:
162            dict: A dictionary containing the loudness statistics.
163        """
164        stats: LoudnessStatisticsWithMetadata = {
165            "input_file": self.media_file.input_file,
166            "output_file": self.media_file.output_file,
167            "stream_id": self.stream_id,
168            "ebu_pass1": self.loudness_statistics["ebu_pass1"],
169            "ebu_pass2": self.loudness_statistics["ebu_pass2"],
170            "mean": self.loudness_statistics["mean"],
171            "max": self.loudness_statistics["max"],
172        }
173        return stats
174
175    def set_second_pass_stats(self, stats: EbuLoudnessStatistics) -> None:
176        """
177        Set the EBU loudness statistics for the second pass.
178
179        Args:
180            stats (dict): The EBU loudness statistics.
181        """
182        _logger.debug(
183            f"Setting second pass stats for stream {self.stream_id} from {stats}"
184        )
185        self.loudness_statistics["ebu_pass2"] = stats
186
187    def get_pcm_codec(self) -> str:
188        """
189        Get the PCM codec string for the stream.
190
191        Returns:
192            str: The PCM codec string.
193        """
194        if not self.bit_depth:
195            return "pcm_s16le"
196        elif self.bit_depth <= 8:
197            return "pcm_s8"
198        elif self.bit_depth in [16, 24, 32, 64]:
199            return f"pcm_s{self.bit_depth}le"
200        else:
201            _logger.warning(
202                f"Unsupported bit depth {self.bit_depth}, falling back to pcm_s16le"
203            )
204            return "pcm_s16le"
205
206    def _get_filter_str_with_pre_filter(self, current_filter: str) -> str:
207        """
208        Get a filter string for current_filter, with the pre-filter
209        added before. Applies the input label before.
210
211        Args:
212            current_filter (str): The current filter.
213
214        Returns:
215            str: The filter string.
216        """
217        input_label = f"[0:{self.stream_id}]"
218        filter_chain = []
219        if self.media_file.ffmpeg_normalize.pre_filter:
220            filter_chain.append(self.media_file.ffmpeg_normalize.pre_filter)
221        filter_chain.append(current_filter)
222        filter_str = input_label + ",".join(filter_chain)
223        return filter_str
224
225    def parse_astats(self) -> Iterator[float]:
226        """
227        Use ffmpeg with astats filter to get the mean (RMS) and max (peak) volume of the input file.
228
229        Yields:
230            float: The progress of the command.
231        """
232        _logger.info(f"Running first pass astats filter for stream {self.stream_id}")
233
234        filter_str = self._get_filter_str_with_pre_filter(
235            "astats=measure_overall=Peak_level+RMS_level:measure_perchannel=0"
236        )
237
238        cmd = [
239            self.media_file.ffmpeg_normalize.ffmpeg_exe,
240            "-hide_banner",
241            "-y",
242            "-i",
243            self.media_file.input_file,
244            "-filter_complex",
245            filter_str,
246            "-vn",
247            "-sn",
248            "-f",
249            "null",
250            os.devnull,
251        ]
252
253        cmd_runner = CommandRunner()
254        yield from cmd_runner.run_ffmpeg_command(cmd)
255        output = cmd_runner.get_output()
256
257        _logger.debug(
258            f"astats command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}"
259        )
260
261        mean_volume_matches = re.findall(r"RMS level dB: ([\-\d\.]+)", output)
262        if mean_volume_matches:
263            if mean_volume_matches[0] == "-":
264                self.loudness_statistics["mean"] = float("-inf")
265            else:
266                self.loudness_statistics["mean"] = float(mean_volume_matches[0])
267        else:
268            raise FFmpegNormalizeError(
269                f"Could not get mean volume for {self.media_file.input_file}"
270            )
271
272        max_volume_matches = re.findall(r"Peak level dB: ([\-\d\.]+)", output)
273        if max_volume_matches:
274            if max_volume_matches[0] == "-":
275                self.loudness_statistics["max"] = float("-inf")
276            else:
277                self.loudness_statistics["max"] = float(max_volume_matches[0])
278        else:
279            raise FFmpegNormalizeError(
280                f"Could not get max volume for {self.media_file.input_file}"
281            )
282
283    def parse_loudnorm_stats(self) -> Iterator[float]:
284        """
285        Run a first pass loudnorm filter to get measured data.
286
287        Yields:
288            float: The progress of the command.
289        """
290        _logger.info(f"Running first pass loudnorm filter for stream {self.stream_id}")
291
292        opts = {
293            "i": self.media_file.ffmpeg_normalize.target_level,
294            "lra": self.media_file.ffmpeg_normalize.loudness_range_target,
295            "tp": self.media_file.ffmpeg_normalize.true_peak,
296            "offset": self.media_file.ffmpeg_normalize.offset,
297            "print_format": "json",
298        }
299
300        if self.media_file.ffmpeg_normalize.dual_mono:
301            opts["dual_mono"] = "true"
302
303        filter_str = self._get_filter_str_with_pre_filter(
304            "loudnorm=" + dict_to_filter_opts(opts)
305        )
306
307        cmd = [
308            self.media_file.ffmpeg_normalize.ffmpeg_exe,
309            "-hide_banner",
310            "-y",
311            "-i",
312            self.media_file.input_file,
313            "-map",
314            f"0:{self.stream_id}",
315            "-filter_complex",
316            filter_str,
317            "-vn",
318            "-sn",
319            "-f",
320            "null",
321            os.devnull,
322        ]
323
324        cmd_runner = CommandRunner()
325        yield from cmd_runner.run_ffmpeg_command(cmd)
326        output = cmd_runner.get_output()
327
328        _logger.debug(
329            f"Loudnorm first pass command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}"
330        )
331
332        # only one stream
333        self.loudness_statistics["ebu_pass1"] = next(
334            iter(AudioStream.prune_and_parse_loudnorm_output(output).values())
335        )
336
337    @staticmethod
338    def prune_and_parse_loudnorm_output(
339        output: str,
340    ) -> dict[int, EbuLoudnessStatistics]:
341        """
342        Prune ffmpeg progress lines from output and parse the loudnorm filter output.
343        There may be multiple outputs if multiple streams were processed.
344
345        Args:
346            output (str): The output from ffmpeg.
347
348        Returns:
349            dict[int, EbuLoudnessStatistics]: The EBU loudness statistics.
350        """
351        _logger.debug("Parsing loudnorm stats from output")
352        pruned_output = CommandRunner.prune_ffmpeg_progress_from_output(output)
353        output_lines = [line.strip() for line in pruned_output.split("\n")]
354        return AudioStream._parse_loudnorm_output(output_lines)
355
356    @staticmethod
357    def _parse_loudnorm_output(
358        output_lines: list[str],
359    ) -> dict[int, EbuLoudnessStatistics]:
360        """
361        Parse the output of a loudnorm filter to get the EBU loudness statistics.
362
363        Args:
364            output_lines (list[str]): The output lines of the loudnorm filter.
365
366        Raises:
367            FFmpegNormalizeError: When the output could not be parsed.
368
369        Returns:
370            dict[int, EbuLoudnessStatistics]: stream index and the EBU loudness statistics, if found.
371        """
372        result = dict[int, EbuLoudnessStatistics]()
373        stream_index = -1
374        loudnorm_start = 0
375        for index, line in enumerate(output_lines):
376            if stream_index < 0:
377                if m := _loudnorm_pattern.match(line):
378                    loudnorm_start = index + 1
379                    stream_index = int(m.group(1))
380            else:
381                if line.startswith("}"):
382                    loudnorm_end = index + 1
383                    loudnorm_data = "\n".join(output_lines[loudnorm_start:loudnorm_end])
384
385                    try:
386                        loudnorm_stats = json.loads(loudnorm_data)
387
388                        _logger.debug(
389                            f"Loudnorm stats for stream {stream_index} parsed: {loudnorm_data}"
390                        )
391
392                        for key in [
393                            "input_i",
394                            "input_tp",
395                            "input_lra",
396                            "input_thresh",
397                            "output_i",
398                            "output_tp",
399                            "output_lra",
400                            "output_thresh",
401                            "target_offset",
402                            "normalization_type",
403                        ]:
404                            if key not in loudnorm_stats:
405                                continue
406                            if key == "normalization_type":
407                                loudnorm_stats[key] = loudnorm_stats[key].lower()
408                            # handle infinite values
409                            elif float(loudnorm_stats[key]) == -float("inf"):
410                                loudnorm_stats[key] = -99
411                            elif float(loudnorm_stats[key]) == float("inf"):
412                                loudnorm_stats[key] = 0
413                            else:
414                                # convert to floats
415                                loudnorm_stats[key] = float(loudnorm_stats[key])
416
417                        result[stream_index] = cast(
418                            EbuLoudnessStatistics, loudnorm_stats
419                        )
420                        stream_index = -1
421                    except Exception as e:
422                        raise FFmpegNormalizeError(
423                            f"Could not parse loudnorm stats; wrong JSON format in string: {e}"
424                        )
425        return result
426
427    def get_second_pass_opts_ebu(self) -> str:
428        """
429        Return second pass loudnorm filter options string for ffmpeg
430        """
431
432        # In dynamic mode, we can do everything in one pass, and we do not have first pass stats
433        if self.media_file.ffmpeg_normalize.dynamic:
434            if not self.ffmpeg_normalize.sample_rate:
435                _logger.warning(
436                    "In dynamic mode, the sample rate will automatically be set to 192 kHz by the loudnorm filter. "
437                    "Specify -ar/--sample-rate to override it."
438                )
439
440            opts = {
441                "i": self.media_file.ffmpeg_normalize.target_level,
442                "lra": self.media_file.ffmpeg_normalize.loudness_range_target,
443                "tp": self.media_file.ffmpeg_normalize.true_peak,
444                "offset": self.media_file.ffmpeg_normalize.offset,
445                "linear": "false",
446                "print_format": "json",
447            }
448
449            if self.media_file.ffmpeg_normalize.dual_mono:
450                opts["dual_mono"] = "true"
451
452            return "loudnorm=" + dict_to_filter_opts(opts)
453
454        if not self.loudness_statistics["ebu_pass1"]:
455            raise FFmpegNormalizeError(
456                "First pass not run, you must call parse_loudnorm_stats first"
457            )
458
459        if float(self.loudness_statistics["ebu_pass1"]["input_i"]) > 0:
460            _logger.warning(
461                "Input file had measured input loudness greater than zero "
462                f"({self.loudness_statistics['ebu_pass1']['input_i']}), capping at 0"
463            )
464            self.loudness_statistics["ebu_pass1"]["input_i"] = 0
465
466        will_use_dynamic_mode = self.media_file.ffmpeg_normalize.dynamic
467
468        if self.media_file.ffmpeg_normalize.keep_loudness_range_target:
469            _logger.debug(
470                "Keeping target loudness range in second pass loudnorm filter"
471            )
472            input_lra = self.loudness_statistics["ebu_pass1"]["input_lra"]
473            if input_lra < 1 or input_lra > 50:
474                _logger.warning(
475                    "Input file had measured loudness range outside of [1,50] "
476                    f"({input_lra}), capping to allowed range"
477                )
478
479            self.media_file.ffmpeg_normalize.loudness_range_target = self._constrain(
480                self.loudness_statistics["ebu_pass1"]["input_lra"], 1, 50
481            )
482
483        if self.media_file.ffmpeg_normalize.keep_lra_above_loudness_range_target:
484            if (
485                self.loudness_statistics["ebu_pass1"]["input_lra"]
486                <= self.media_file.ffmpeg_normalize.loudness_range_target
487            ):
488                _logger.debug(
489                    "Setting loudness range target in second pass loudnorm filter"
490                )
491            else:
492                self.media_file.ffmpeg_normalize.loudness_range_target = (
493                    self.loudness_statistics["ebu_pass1"]["input_lra"]
494                )
495                _logger.debug(
496                    "Keeping target loudness range in second pass loudnorm filter"
497                )
498
499        if (
500            self.media_file.ffmpeg_normalize.loudness_range_target
501            < self.loudness_statistics["ebu_pass1"]["input_lra"]
502            and not will_use_dynamic_mode
503        ):
504            _logger.warning(
505                f"Input file had loudness range of {self.loudness_statistics['ebu_pass1']['input_lra']}. "
506                f"This is larger than the loudness range target ({self.media_file.ffmpeg_normalize.loudness_range_target}). "
507                "Normalization will revert to dynamic mode. Choose a higher target loudness range if you want linear normalization. "
508                "Alternatively, use the --keep-loudness-range-target or --keep-lra-above-loudness-range-target option to keep the target loudness range from "
509                "the input."
510            )
511            will_use_dynamic_mode = True
512
513        if will_use_dynamic_mode and not self.ffmpeg_normalize.sample_rate:
514            _logger.warning(
515                "In dynamic mode, the sample rate will automatically be set to 192 kHz by the loudnorm filter. "
516                "Specify -ar/--sample-rate to override it."
517            )
518
519        target_level = self.ffmpeg_normalize.target_level
520        if self.ffmpeg_normalize.auto_lower_loudness_target:
521            safe_target = (
522                self.loudness_statistics["ebu_pass1"]["input_i"]
523                - self.loudness_statistics["ebu_pass1"]["input_tp"]
524                + self.ffmpeg_normalize.true_peak
525                - 0.1
526            )
527            if safe_target < self.ffmpeg_normalize.target_level:
528                target_level = safe_target
529                _logger.warning(
530                    f"Using loudness target {target_level} because --auto-lower-loudness-target given.",
531                )
532
533        stats = self.loudness_statistics["ebu_pass1"]
534
535        opts = {
536            "i": target_level,
537            "lra": self.media_file.ffmpeg_normalize.loudness_range_target,
538            "tp": self.media_file.ffmpeg_normalize.true_peak,
539            "offset": self._constrain(
540                stats["target_offset"], -99, 99, name="target_offset"
541            ),
542            "measured_i": self._constrain(stats["input_i"], -99, 0, name="input_i"),
543            "measured_lra": self._constrain(
544                stats["input_lra"], 0, 99, name="input_lra"
545            ),
546            "measured_tp": self._constrain(stats["input_tp"], -99, 99, name="input_tp"),
547            "measured_thresh": self._constrain(
548                stats["input_thresh"], -99, 0, name="input_thresh"
549            ),
550            "linear": "false" if self.media_file.ffmpeg_normalize.dynamic else "true",
551            "print_format": "json",
552        }
553
554        if self.media_file.ffmpeg_normalize.dual_mono:
555            opts["dual_mono"] = "true"
556
557        return "loudnorm=" + dict_to_filter_opts(opts)
558
559    def get_second_pass_opts_peakrms(self) -> str:
560        """
561        Set the adjustment gain based on chosen option and mean/max volume,
562        return the matching ffmpeg volume filter.
563
564        Returns:
565            str: ffmpeg volume filter string
566        """
567        if (
568            self.loudness_statistics["max"] is None
569            or self.loudness_statistics["mean"] is None
570        ):
571            raise FFmpegNormalizeError(
572                "First pass not run, no mean/max volume to normalize to"
573            )
574
575        normalization_type = self.media_file.ffmpeg_normalize.normalization_type
576        target_level = self.media_file.ffmpeg_normalize.target_level
577
578        if normalization_type == "peak":
579            adjustment = 0 + target_level - self.loudness_statistics["max"]
580        elif normalization_type == "rms":
581            adjustment = target_level - self.loudness_statistics["mean"]
582        else:
583            raise FFmpegNormalizeError(
584                "Can only set adjustment for peak and RMS normalization"
585            )
586
587        _logger.info(
588            f"Adjusting stream {self.stream_id} by {adjustment} dB to reach {target_level}"
589        )
590
591        clip_amount = self.loudness_statistics["max"] + adjustment
592        if clip_amount > 0:
593            _logger.warning(f"Adjusting will lead to clipping of {clip_amount} dB")
594
595        return f"volume={adjustment}dB"
AudioStream( ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_id: int, sample_rate: int | None, bit_depth: int | None, duration: float | None)
 95    def __init__(
 96        self,
 97        ffmpeg_normalize: FFmpegNormalize,
 98        media_file: MediaFile,
 99        stream_id: int,
100        sample_rate: int | None,
101        bit_depth: int | None,
102        duration: float | None,
103    ):
104        """
105        Create an AudioStream object.
106
107        Args:
108            ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
109            media_file (MediaFile): The MediaFile object.
110            stream_id (int): The stream ID.
111            sample_rate (int): sample rate in Hz
112            bit_depth (int): bit depth in bits
113            duration (float): duration in seconds
114        """
115        super().__init__(ffmpeg_normalize, media_file, "audio", stream_id)
116
117        self.loudness_statistics: LoudnessStatistics = {
118            "ebu_pass1": None,
119            "ebu_pass2": None,
120            "mean": None,
121            "max": None,
122        }
123
124        self.sample_rate = sample_rate
125        self.bit_depth = bit_depth
126
127        self.duration = duration

Create an AudioStream object.

Arguments:
  • ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
  • media_file (MediaFile): The MediaFile object.
  • stream_id (int): The stream ID.
  • sample_rate (int): sample rate in Hz
  • bit_depth (int): bit depth in bits
  • duration (float): duration in seconds
loudness_statistics: ffmpeg_normalize._streams.LoudnessStatistics
sample_rate
bit_depth
duration
def get_stats(self) -> ffmpeg_normalize._streams.LoudnessStatisticsWithMetadata:
157    def get_stats(self) -> LoudnessStatisticsWithMetadata:
158        """
159        Return loudness statistics for the stream.
160
161        Returns:
162            dict: A dictionary containing the loudness statistics.
163        """
164        stats: LoudnessStatisticsWithMetadata = {
165            "input_file": self.media_file.input_file,
166            "output_file": self.media_file.output_file,
167            "stream_id": self.stream_id,
168            "ebu_pass1": self.loudness_statistics["ebu_pass1"],
169            "ebu_pass2": self.loudness_statistics["ebu_pass2"],
170            "mean": self.loudness_statistics["mean"],
171            "max": self.loudness_statistics["max"],
172        }
173        return stats

Return loudness statistics for the stream.

Returns:

dict: A dictionary containing the loudness statistics.

def set_second_pass_stats(self, stats: ffmpeg_normalize._streams.EbuLoudnessStatistics) -> None:
175    def set_second_pass_stats(self, stats: EbuLoudnessStatistics) -> None:
176        """
177        Set the EBU loudness statistics for the second pass.
178
179        Args:
180            stats (dict): The EBU loudness statistics.
181        """
182        _logger.debug(
183            f"Setting second pass stats for stream {self.stream_id} from {stats}"
184        )
185        self.loudness_statistics["ebu_pass2"] = stats

Set the EBU loudness statistics for the second pass.

Arguments:
  • stats (dict): The EBU loudness statistics.
def get_pcm_codec(self) -> str:
187    def get_pcm_codec(self) -> str:
188        """
189        Get the PCM codec string for the stream.
190
191        Returns:
192            str: The PCM codec string.
193        """
194        if not self.bit_depth:
195            return "pcm_s16le"
196        elif self.bit_depth <= 8:
197            return "pcm_s8"
198        elif self.bit_depth in [16, 24, 32, 64]:
199            return f"pcm_s{self.bit_depth}le"
200        else:
201            _logger.warning(
202                f"Unsupported bit depth {self.bit_depth}, falling back to pcm_s16le"
203            )
204            return "pcm_s16le"

Get the PCM codec string for the stream.

Returns:

str: The PCM codec string.

def parse_astats(self) -> Iterator[float]:
225    def parse_astats(self) -> Iterator[float]:
226        """
227        Use ffmpeg with astats filter to get the mean (RMS) and max (peak) volume of the input file.
228
229        Yields:
230            float: The progress of the command.
231        """
232        _logger.info(f"Running first pass astats filter for stream {self.stream_id}")
233
234        filter_str = self._get_filter_str_with_pre_filter(
235            "astats=measure_overall=Peak_level+RMS_level:measure_perchannel=0"
236        )
237
238        cmd = [
239            self.media_file.ffmpeg_normalize.ffmpeg_exe,
240            "-hide_banner",
241            "-y",
242            "-i",
243            self.media_file.input_file,
244            "-filter_complex",
245            filter_str,
246            "-vn",
247            "-sn",
248            "-f",
249            "null",
250            os.devnull,
251        ]
252
253        cmd_runner = CommandRunner()
254        yield from cmd_runner.run_ffmpeg_command(cmd)
255        output = cmd_runner.get_output()
256
257        _logger.debug(
258            f"astats command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}"
259        )
260
261        mean_volume_matches = re.findall(r"RMS level dB: ([\-\d\.]+)", output)
262        if mean_volume_matches:
263            if mean_volume_matches[0] == "-":
264                self.loudness_statistics["mean"] = float("-inf")
265            else:
266                self.loudness_statistics["mean"] = float(mean_volume_matches[0])
267        else:
268            raise FFmpegNormalizeError(
269                f"Could not get mean volume for {self.media_file.input_file}"
270            )
271
272        max_volume_matches = re.findall(r"Peak level dB: ([\-\d\.]+)", output)
273        if max_volume_matches:
274            if max_volume_matches[0] == "-":
275                self.loudness_statistics["max"] = float("-inf")
276            else:
277                self.loudness_statistics["max"] = float(max_volume_matches[0])
278        else:
279            raise FFmpegNormalizeError(
280                f"Could not get max volume for {self.media_file.input_file}"
281            )

Use ffmpeg with astats filter to get the mean (RMS) and max (peak) volume of the input file.

Yields:

float: The progress of the command.

def parse_loudnorm_stats(self) -> Iterator[float]:
283    def parse_loudnorm_stats(self) -> Iterator[float]:
284        """
285        Run a first pass loudnorm filter to get measured data.
286
287        Yields:
288            float: The progress of the command.
289        """
290        _logger.info(f"Running first pass loudnorm filter for stream {self.stream_id}")
291
292        opts = {
293            "i": self.media_file.ffmpeg_normalize.target_level,
294            "lra": self.media_file.ffmpeg_normalize.loudness_range_target,
295            "tp": self.media_file.ffmpeg_normalize.true_peak,
296            "offset": self.media_file.ffmpeg_normalize.offset,
297            "print_format": "json",
298        }
299
300        if self.media_file.ffmpeg_normalize.dual_mono:
301            opts["dual_mono"] = "true"
302
303        filter_str = self._get_filter_str_with_pre_filter(
304            "loudnorm=" + dict_to_filter_opts(opts)
305        )
306
307        cmd = [
308            self.media_file.ffmpeg_normalize.ffmpeg_exe,
309            "-hide_banner",
310            "-y",
311            "-i",
312            self.media_file.input_file,
313            "-map",
314            f"0:{self.stream_id}",
315            "-filter_complex",
316            filter_str,
317            "-vn",
318            "-sn",
319            "-f",
320            "null",
321            os.devnull,
322        ]
323
324        cmd_runner = CommandRunner()
325        yield from cmd_runner.run_ffmpeg_command(cmd)
326        output = cmd_runner.get_output()
327
328        _logger.debug(
329            f"Loudnorm first pass command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}"
330        )
331
332        # only one stream
333        self.loudness_statistics["ebu_pass1"] = next(
334            iter(AudioStream.prune_and_parse_loudnorm_output(output).values())
335        )

Run a first pass loudnorm filter to get measured data.

Yields:

float: The progress of the command.

@staticmethod
def prune_and_parse_loudnorm_output( output: str) -> dict[int, ffmpeg_normalize._streams.EbuLoudnessStatistics]:
337    @staticmethod
338    def prune_and_parse_loudnorm_output(
339        output: str,
340    ) -> dict[int, EbuLoudnessStatistics]:
341        """
342        Prune ffmpeg progress lines from output and parse the loudnorm filter output.
343        There may be multiple outputs if multiple streams were processed.
344
345        Args:
346            output (str): The output from ffmpeg.
347
348        Returns:
349            dict[int, EbuLoudnessStatistics]: The EBU loudness statistics.
350        """
351        _logger.debug("Parsing loudnorm stats from output")
352        pruned_output = CommandRunner.prune_ffmpeg_progress_from_output(output)
353        output_lines = [line.strip() for line in pruned_output.split("\n")]
354        return AudioStream._parse_loudnorm_output(output_lines)

Prune ffmpeg progress lines from output and parse the loudnorm filter output. There may be multiple outputs if multiple streams were processed.

Arguments:
  • output (str): The output from ffmpeg.
Returns:

dict[int, EbuLoudnessStatistics]: The EBU loudness statistics.

def get_second_pass_opts_ebu(self) -> str:
427    def get_second_pass_opts_ebu(self) -> str:
428        """
429        Return second pass loudnorm filter options string for ffmpeg
430        """
431
432        # In dynamic mode, we can do everything in one pass, and we do not have first pass stats
433        if self.media_file.ffmpeg_normalize.dynamic:
434            if not self.ffmpeg_normalize.sample_rate:
435                _logger.warning(
436                    "In dynamic mode, the sample rate will automatically be set to 192 kHz by the loudnorm filter. "
437                    "Specify -ar/--sample-rate to override it."
438                )
439
440            opts = {
441                "i": self.media_file.ffmpeg_normalize.target_level,
442                "lra": self.media_file.ffmpeg_normalize.loudness_range_target,
443                "tp": self.media_file.ffmpeg_normalize.true_peak,
444                "offset": self.media_file.ffmpeg_normalize.offset,
445                "linear": "false",
446                "print_format": "json",
447            }
448
449            if self.media_file.ffmpeg_normalize.dual_mono:
450                opts["dual_mono"] = "true"
451
452            return "loudnorm=" + dict_to_filter_opts(opts)
453
454        if not self.loudness_statistics["ebu_pass1"]:
455            raise FFmpegNormalizeError(
456                "First pass not run, you must call parse_loudnorm_stats first"
457            )
458
459        if float(self.loudness_statistics["ebu_pass1"]["input_i"]) > 0:
460            _logger.warning(
461                "Input file had measured input loudness greater than zero "
462                f"({self.loudness_statistics['ebu_pass1']['input_i']}), capping at 0"
463            )
464            self.loudness_statistics["ebu_pass1"]["input_i"] = 0
465
466        will_use_dynamic_mode = self.media_file.ffmpeg_normalize.dynamic
467
468        if self.media_file.ffmpeg_normalize.keep_loudness_range_target:
469            _logger.debug(
470                "Keeping target loudness range in second pass loudnorm filter"
471            )
472            input_lra = self.loudness_statistics["ebu_pass1"]["input_lra"]
473            if input_lra < 1 or input_lra > 50:
474                _logger.warning(
475                    "Input file had measured loudness range outside of [1,50] "
476                    f"({input_lra}), capping to allowed range"
477                )
478
479            self.media_file.ffmpeg_normalize.loudness_range_target = self._constrain(
480                self.loudness_statistics["ebu_pass1"]["input_lra"], 1, 50
481            )
482
483        if self.media_file.ffmpeg_normalize.keep_lra_above_loudness_range_target:
484            if (
485                self.loudness_statistics["ebu_pass1"]["input_lra"]
486                <= self.media_file.ffmpeg_normalize.loudness_range_target
487            ):
488                _logger.debug(
489                    "Setting loudness range target in second pass loudnorm filter"
490                )
491            else:
492                self.media_file.ffmpeg_normalize.loudness_range_target = (
493                    self.loudness_statistics["ebu_pass1"]["input_lra"]
494                )
495                _logger.debug(
496                    "Keeping target loudness range in second pass loudnorm filter"
497                )
498
499        if (
500            self.media_file.ffmpeg_normalize.loudness_range_target
501            < self.loudness_statistics["ebu_pass1"]["input_lra"]
502            and not will_use_dynamic_mode
503        ):
504            _logger.warning(
505                f"Input file had loudness range of {self.loudness_statistics['ebu_pass1']['input_lra']}. "
506                f"This is larger than the loudness range target ({self.media_file.ffmpeg_normalize.loudness_range_target}). "
507                "Normalization will revert to dynamic mode. Choose a higher target loudness range if you want linear normalization. "
508                "Alternatively, use the --keep-loudness-range-target or --keep-lra-above-loudness-range-target option to keep the target loudness range from "
509                "the input."
510            )
511            will_use_dynamic_mode = True
512
513        if will_use_dynamic_mode and not self.ffmpeg_normalize.sample_rate:
514            _logger.warning(
515                "In dynamic mode, the sample rate will automatically be set to 192 kHz by the loudnorm filter. "
516                "Specify -ar/--sample-rate to override it."
517            )
518
519        target_level = self.ffmpeg_normalize.target_level
520        if self.ffmpeg_normalize.auto_lower_loudness_target:
521            safe_target = (
522                self.loudness_statistics["ebu_pass1"]["input_i"]
523                - self.loudness_statistics["ebu_pass1"]["input_tp"]
524                + self.ffmpeg_normalize.true_peak
525                - 0.1
526            )
527            if safe_target < self.ffmpeg_normalize.target_level:
528                target_level = safe_target
529                _logger.warning(
530                    f"Using loudness target {target_level} because --auto-lower-loudness-target given.",
531                )
532
533        stats = self.loudness_statistics["ebu_pass1"]
534
535        opts = {
536            "i": target_level,
537            "lra": self.media_file.ffmpeg_normalize.loudness_range_target,
538            "tp": self.media_file.ffmpeg_normalize.true_peak,
539            "offset": self._constrain(
540                stats["target_offset"], -99, 99, name="target_offset"
541            ),
542            "measured_i": self._constrain(stats["input_i"], -99, 0, name="input_i"),
543            "measured_lra": self._constrain(
544                stats["input_lra"], 0, 99, name="input_lra"
545            ),
546            "measured_tp": self._constrain(stats["input_tp"], -99, 99, name="input_tp"),
547            "measured_thresh": self._constrain(
548                stats["input_thresh"], -99, 0, name="input_thresh"
549            ),
550            "linear": "false" if self.media_file.ffmpeg_normalize.dynamic else "true",
551            "print_format": "json",
552        }
553
554        if self.media_file.ffmpeg_normalize.dual_mono:
555            opts["dual_mono"] = "true"
556
557        return "loudnorm=" + dict_to_filter_opts(opts)

Return second pass loudnorm filter options string for ffmpeg

def get_second_pass_opts_peakrms(self) -> str:
559    def get_second_pass_opts_peakrms(self) -> str:
560        """
561        Set the adjustment gain based on chosen option and mean/max volume,
562        return the matching ffmpeg volume filter.
563
564        Returns:
565            str: ffmpeg volume filter string
566        """
567        if (
568            self.loudness_statistics["max"] is None
569            or self.loudness_statistics["mean"] is None
570        ):
571            raise FFmpegNormalizeError(
572                "First pass not run, no mean/max volume to normalize to"
573            )
574
575        normalization_type = self.media_file.ffmpeg_normalize.normalization_type
576        target_level = self.media_file.ffmpeg_normalize.target_level
577
578        if normalization_type == "peak":
579            adjustment = 0 + target_level - self.loudness_statistics["max"]
580        elif normalization_type == "rms":
581            adjustment = target_level - self.loudness_statistics["mean"]
582        else:
583            raise FFmpegNormalizeError(
584                "Can only set adjustment for peak and RMS normalization"
585            )
586
587        _logger.info(
588            f"Adjusting stream {self.stream_id} by {adjustment} dB to reach {target_level}"
589        )
590
591        clip_amount = self.loudness_statistics["max"] + adjustment
592        if clip_amount > 0:
593            _logger.warning(f"Adjusting will lead to clipping of {clip_amount} dB")
594
595        return f"volume={adjustment}dB"

Set the adjustment gain based on chosen option and mean/max volume, return the matching ffmpeg volume filter.

Returns:

str: ffmpeg volume filter string

class VideoStream(ffmpeg_normalize.MediaStream):
80class VideoStream(MediaStream):
81    def __init__(
82        self, ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_id: int
83    ):
84        super().__init__(ffmpeg_normalize, media_file, "video", stream_id)
VideoStream( ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_id: int)
81    def __init__(
82        self, ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_id: int
83    ):
84        super().__init__(ffmpeg_normalize, media_file, "video", stream_id)

Create a MediaStream object.

Arguments:
  • ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
  • media_file (MediaFile): The MediaFile object.
  • stream_type (Literal["audio", "video", "subtitle"]): The type of the stream.
  • stream_id (int): The stream ID.
class SubtitleStream(ffmpeg_normalize.MediaStream):
87class SubtitleStream(MediaStream):
88    def __init__(
89        self, ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_id: int
90    ):
91        super().__init__(ffmpeg_normalize, media_file, "subtitle", stream_id)
SubtitleStream( ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_id: int)
88    def __init__(
89        self, ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_id: int
90    ):
91        super().__init__(ffmpeg_normalize, media_file, "subtitle", stream_id)

Create a MediaStream object.

Arguments:
  • ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
  • media_file (MediaFile): The MediaFile object.
  • stream_type (Literal["audio", "video", "subtitle"]): The type of the stream.
  • stream_id (int): The stream ID.
class MediaStream:
48class MediaStream:
49    def __init__(
50        self,
51        ffmpeg_normalize: FFmpegNormalize,
52        media_file: MediaFile,
53        stream_type: Literal["audio", "video", "subtitle"],
54        stream_id: int,
55    ):
56        """
57        Create a MediaStream object.
58
59        Args:
60            ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
61            media_file (MediaFile): The MediaFile object.
62            stream_type (Literal["audio", "video", "subtitle"]): The type of the stream.
63            stream_id (int): The stream ID.
64        """
65        self.ffmpeg_normalize = ffmpeg_normalize
66        self.media_file = media_file
67        self.stream_type = stream_type
68        self.stream_id = stream_id
69        _logger.debug(
70            f"Created MediaStream for {self.media_file.input_file}, {self.stream_type} stream {self.stream_id}"
71        )
72
73    def __repr__(self) -> str:
74        return (
75            f"<{os.path.basename(self.media_file.input_file)}, "
76            f"{self.stream_type} stream {self.stream_id}>"
77        )
MediaStream( ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_type: Literal['audio', 'video', 'subtitle'], stream_id: int)
49    def __init__(
50        self,
51        ffmpeg_normalize: FFmpegNormalize,
52        media_file: MediaFile,
53        stream_type: Literal["audio", "video", "subtitle"],
54        stream_id: int,
55    ):
56        """
57        Create a MediaStream object.
58
59        Args:
60            ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
61            media_file (MediaFile): The MediaFile object.
62            stream_type (Literal["audio", "video", "subtitle"]): The type of the stream.
63            stream_id (int): The stream ID.
64        """
65        self.ffmpeg_normalize = ffmpeg_normalize
66        self.media_file = media_file
67        self.stream_type = stream_type
68        self.stream_id = stream_id
69        _logger.debug(
70            f"Created MediaStream for {self.media_file.input_file}, {self.stream_type} stream {self.stream_id}"
71        )

Create a MediaStream object.

Arguments:
  • ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
  • media_file (MediaFile): The MediaFile object.
  • stream_type (Literal["audio", "video", "subtitle"]): The type of the stream.
  • stream_id (int): The stream ID.
ffmpeg_normalize
media_file
stream_type
stream_id
__version__ = '1.32.3'