ffmpeg_normalize
1from ._errors import FFmpegNormalizeError 2from ._ffmpeg_normalize import FFmpegNormalize 3from ._media_file import MediaFile 4from ._streams import AudioStream, MediaStream, SubtitleStream, VideoStream 5from ._version import __version__ 6 7__module_name__ = "ffmpeg_normalize" 8 9__all__ = [ 10 "FFmpegNormalize", 11 "FFmpegNormalizeError", 12 "MediaFile", 13 "AudioStream", 14 "VideoStream", 15 "SubtitleStream", 16 "MediaStream", 17 "__version__", 18]
51class FFmpegNormalize: 52 """ 53 ffmpeg-normalize class. 54 55 Args: 56 normalization_type (str, optional): Normalization type. Defaults to "ebu". 57 target_level (float, optional): Target level. Defaults to -23.0. 58 print_stats (bool, optional): Print loudnorm stats. Defaults to False. 59 loudness_range_target (float, optional): Loudness range target. Defaults to 7.0. 60 keep_loudness_range_target (bool, optional): Keep loudness range target. Defaults to False. 61 keep_lra_above_loudness_range_target (bool, optional): Keep input loudness range above loudness range target. Defaults to False. 62 true_peak (float, optional): True peak. Defaults to -2.0. 63 offset (float, optional): Offset. Defaults to 0.0. 64 lower_only (bool, optional): Whether the audio should not increase in loudness. Defaults to False. 65 auto_lower_loudness_target (bool, optional): Automatically lower EBU Integrated Loudness Target. 66 dual_mono (bool, optional): Dual mono. Defaults to False. 67 dynamic (bool, optional): Use dynamic EBU R128 normalization. This is a one-pass algorithm and skips the initial media scan. Defaults to False. 68 audio_codec (str, optional): Audio codec. Defaults to "pcm_s16le". 69 audio_bitrate (float, optional): Audio bitrate. Defaults to None. 70 sample_rate (int, optional): Sample rate. Defaults to None. 71 audio_channels (int | None, optional): Audio channels. Defaults to None. 72 keep_original_audio (bool, optional): Keep original audio. Defaults to False. 73 pre_filter (str, optional): Pre filter. Defaults to None. 74 post_filter (str, optional): Post filter. Defaults to None. 75 video_codec (str, optional): Video codec. Defaults to "copy". 76 video_disable (bool, optional): Disable video. Defaults to False. 77 subtitle_disable (bool, optional): Disable subtitles. Defaults to False. 78 metadata_disable (bool, optional): Disable metadata. Defaults to False. 79 chapters_disable (bool, optional): Disable chapters. Defaults to False. 80 extra_input_options (list, optional): Extra input options. Defaults to None. 81 extra_output_options (list, optional): Extra output options. Defaults to None. 82 output_format (str, optional): Output format. Defaults to None. 83 extension (str, optional): Output file extension to use for output files that were not explicitly specified. Defaults to "mkv". 84 dry_run (bool, optional): Dry run. Defaults to False. 85 debug (bool, optional): Debug. Defaults to False. 86 progress (bool, optional): Progress. Defaults to False. 87 replaygain (bool, optional): Write ReplayGain tags without normalizing. Defaults to False. 88 89 Raises: 90 FFmpegNormalizeError: If the ffmpeg executable is not found or does not support the loudnorm filter. 91 """ 92 93 def __init__( 94 self, 95 normalization_type: Literal["ebu", "rms", "peak"] = "ebu", 96 target_level: float = -23.0, 97 print_stats: bool = False, 98 # threshold=0.5, 99 loudness_range_target: float = 7.0, 100 keep_loudness_range_target: bool = False, 101 keep_lra_above_loudness_range_target: bool = False, 102 true_peak: float = -2.0, 103 offset: float = 0.0, 104 lower_only: bool = False, 105 auto_lower_loudness_target: bool = False, 106 dual_mono: bool = False, 107 dynamic: bool = False, 108 audio_codec: str = "pcm_s16le", 109 audio_bitrate: float | None = None, 110 sample_rate: float | int | None = None, 111 audio_channels: int | None = None, 112 keep_original_audio: bool = False, 113 pre_filter: str | None = None, 114 post_filter: str | None = None, 115 video_codec: str = "copy", 116 video_disable: bool = False, 117 subtitle_disable: bool = False, 118 metadata_disable: bool = False, 119 chapters_disable: bool = False, 120 extra_input_options: list[str] | None = None, 121 extra_output_options: list[str] | None = None, 122 output_format: str | None = None, 123 extension: str = "mkv", 124 dry_run: bool = False, 125 debug: bool = False, 126 progress: bool = False, 127 replaygain: bool = False, 128 ): 129 self.ffmpeg_exe = get_ffmpeg_exe() 130 self.has_loudnorm_capabilities = ffmpeg_has_loudnorm() 131 132 if normalization_type not in NORMALIZATION_TYPES: 133 raise FFmpegNormalizeError( 134 "Normalization type must be: 'ebu', 'rms', or 'peak'" 135 ) 136 self.normalization_type = normalization_type 137 138 if not self.has_loudnorm_capabilities and self.normalization_type == "ebu": 139 raise FFmpegNormalizeError( 140 "Your ffmpeg does not support the 'loudnorm' EBU R128 filter. " 141 "Please install ffmpeg v4.2 or above, or choose another normalization type." 142 ) 143 144 if self.normalization_type == "ebu": 145 self.target_level = check_range(target_level, -70, -5, name="target_level") 146 else: 147 self.target_level = check_range(target_level, -99, 0, name="target_level") 148 149 self.print_stats = print_stats 150 151 # self.threshold = float(threshold) 152 153 self.loudness_range_target = check_range( 154 loudness_range_target, 1, 50, name="loudness_range_target" 155 ) 156 157 self.keep_loudness_range_target = keep_loudness_range_target 158 159 if self.keep_loudness_range_target and loudness_range_target != 7.0: 160 _logger.warning( 161 "Setting --keep-loudness-range-target will override your set loudness range target value! " 162 "Remove --keep-loudness-range-target or remove the --lrt/--loudness-range-target option." 163 ) 164 165 self.keep_lra_above_loudness_range_target = keep_lra_above_loudness_range_target 166 167 if ( 168 self.keep_loudness_range_target 169 and self.keep_lra_above_loudness_range_target 170 ): 171 raise FFmpegNormalizeError( 172 "Options --keep-loudness-range-target and --keep-lra-above-loudness-range-target are mutually exclusive! " 173 "Please choose just one of the two options." 174 ) 175 176 self.true_peak = check_range(true_peak, -9, 0, name="true_peak") 177 self.offset = check_range(offset, -99, 99, name="offset") 178 self.lower_only = lower_only 179 self.auto_lower_loudness_target = auto_lower_loudness_target 180 181 # Ensure library user is passing correct types 182 assert isinstance(dual_mono, bool), "dual_mono must be bool" 183 assert isinstance(dynamic, bool), "dynamic must be bool" 184 185 self.dual_mono = dual_mono 186 self.dynamic = dynamic 187 self.sample_rate = None if sample_rate is None else int(sample_rate) 188 self.audio_channels = None if audio_channels is None else int(audio_channels) 189 190 self.audio_codec = audio_codec 191 self.audio_bitrate = audio_bitrate 192 self.keep_original_audio = keep_original_audio 193 self.video_codec = video_codec 194 self.video_disable = video_disable 195 self.subtitle_disable = subtitle_disable 196 self.metadata_disable = metadata_disable 197 self.chapters_disable = chapters_disable 198 199 self.extra_input_options = extra_input_options 200 self.extra_output_options = extra_output_options 201 self.pre_filter = pre_filter 202 self.post_filter = post_filter 203 204 self.output_format = output_format 205 self.extension = extension 206 self.dry_run = dry_run 207 self.debug = debug 208 self.progress = progress 209 self.replaygain = replaygain 210 211 if ( 212 self.audio_codec is None or "pcm" in self.audio_codec 213 ) and self.output_format in PCM_INCOMPATIBLE_FORMATS: 214 raise FFmpegNormalizeError( 215 f"Output format {self.output_format} does not support PCM audio. " 216 "Please choose a suitable audio codec with the -c:a option." 217 ) 218 219 # replaygain only works for EBU for now 220 if self.replaygain and self.normalization_type != "ebu": 221 raise FFmpegNormalizeError( 222 "ReplayGain only works for EBU normalization type for now." 223 ) 224 225 self.stats: list[LoudnessStatisticsWithMetadata] = [] 226 self.media_files: list[MediaFile] = [] 227 self.file_count = 0 228 229 def add_media_file(self, input_file: str, output_file: str) -> None: 230 """ 231 Add a media file to normalize 232 233 Args: 234 input_file (str): Path to input file 235 output_file (str): Path to output file 236 """ 237 if not os.path.exists(input_file): 238 raise FFmpegNormalizeError(f"file {input_file} does not exist") 239 240 ext = os.path.splitext(output_file)[1][1:] 241 if ( 242 self.audio_codec is None or "pcm" in self.audio_codec 243 ) and ext in PCM_INCOMPATIBLE_EXTS: 244 raise FFmpegNormalizeError( 245 f"Output extension {ext} does not support PCM audio. " 246 "Please choose a suitable audio codec with the -c:a option." 247 ) 248 249 self.media_files.append(MediaFile(self, input_file, output_file)) 250 self.file_count += 1 251 252 def run_normalization(self) -> None: 253 """ 254 Run the normalization procedures 255 """ 256 for index, media_file in enumerate( 257 tqdm(self.media_files, desc="File", disable=not self.progress, position=0) 258 ): 259 _logger.info( 260 f"Normalizing file {media_file} ({index + 1} of {self.file_count})" 261 ) 262 263 try: 264 media_file.run_normalization() 265 except Exception as e: 266 if len(self.media_files) > 1: 267 # simply warn and do not die 268 _logger.error( 269 f"Error processing input file {media_file}, will " 270 f"continue batch-processing. Error was: {e}" 271 ) 272 else: 273 # raise the error so the program will exit 274 raise e 275 276 if self.print_stats: 277 json.dump( 278 list( 279 chain.from_iterable( 280 media_file.get_stats() for media_file in self.media_files 281 ) 282 ), 283 sys.stdout, 284 indent=4, 285 ) 286 print()
ffmpeg-normalize class.
Arguments:
- normalization_type (str, optional): Normalization type. Defaults to "ebu".
- target_level (float, optional): Target level. Defaults to -23.0.
- print_stats (bool, optional): Print loudnorm stats. Defaults to False.
- loudness_range_target (float, optional): Loudness range target. Defaults to 7.0.
- keep_loudness_range_target (bool, optional): Keep loudness range target. Defaults to False.
- keep_lra_above_loudness_range_target (bool, optional): Keep input loudness range above loudness range target. Defaults to False.
- true_peak (float, optional): True peak. Defaults to -2.0.
- offset (float, optional): Offset. Defaults to 0.0.
- lower_only (bool, optional): Whether the audio should not increase in loudness. Defaults to False.
- auto_lower_loudness_target (bool, optional): Automatically lower EBU Integrated Loudness Target.
- dual_mono (bool, optional): Dual mono. Defaults to False.
- dynamic (bool, optional): Use dynamic EBU R128 normalization. This is a one-pass algorithm and skips the initial media scan. Defaults to False.
- audio_codec (str, optional): Audio codec. Defaults to "pcm_s16le".
- audio_bitrate (float, optional): Audio bitrate. Defaults to None.
- sample_rate (int, optional): Sample rate. Defaults to None.
- audio_channels (int | None, optional): Audio channels. Defaults to None.
- keep_original_audio (bool, optional): Keep original audio. Defaults to False.
- pre_filter (str, optional): Pre filter. Defaults to None.
- post_filter (str, optional): Post filter. Defaults to None.
- video_codec (str, optional): Video codec. Defaults to "copy".
- video_disable (bool, optional): Disable video. Defaults to False.
- subtitle_disable (bool, optional): Disable subtitles. Defaults to False.
- metadata_disable (bool, optional): Disable metadata. Defaults to False.
- chapters_disable (bool, optional): Disable chapters. Defaults to False.
- extra_input_options (list, optional): Extra input options. Defaults to None.
- extra_output_options (list, optional): Extra output options. Defaults to None.
- output_format (str, optional): Output format. Defaults to None.
- extension (str, optional): Output file extension to use for output files that were not explicitly specified. Defaults to "mkv".
- dry_run (bool, optional): Dry run. Defaults to False.
- debug (bool, optional): Debug. Defaults to False.
- progress (bool, optional): Progress. Defaults to False.
- replaygain (bool, optional): Write ReplayGain tags without normalizing. Defaults to False.
Raises:
- FFmpegNormalizeError: If the ffmpeg executable is not found or does not support the loudnorm filter.
93 def __init__( 94 self, 95 normalization_type: Literal["ebu", "rms", "peak"] = "ebu", 96 target_level: float = -23.0, 97 print_stats: bool = False, 98 # threshold=0.5, 99 loudness_range_target: float = 7.0, 100 keep_loudness_range_target: bool = False, 101 keep_lra_above_loudness_range_target: bool = False, 102 true_peak: float = -2.0, 103 offset: float = 0.0, 104 lower_only: bool = False, 105 auto_lower_loudness_target: bool = False, 106 dual_mono: bool = False, 107 dynamic: bool = False, 108 audio_codec: str = "pcm_s16le", 109 audio_bitrate: float | None = None, 110 sample_rate: float | int | None = None, 111 audio_channels: int | None = None, 112 keep_original_audio: bool = False, 113 pre_filter: str | None = None, 114 post_filter: str | None = None, 115 video_codec: str = "copy", 116 video_disable: bool = False, 117 subtitle_disable: bool = False, 118 metadata_disable: bool = False, 119 chapters_disable: bool = False, 120 extra_input_options: list[str] | None = None, 121 extra_output_options: list[str] | None = None, 122 output_format: str | None = None, 123 extension: str = "mkv", 124 dry_run: bool = False, 125 debug: bool = False, 126 progress: bool = False, 127 replaygain: bool = False, 128 ): 129 self.ffmpeg_exe = get_ffmpeg_exe() 130 self.has_loudnorm_capabilities = ffmpeg_has_loudnorm() 131 132 if normalization_type not in NORMALIZATION_TYPES: 133 raise FFmpegNormalizeError( 134 "Normalization type must be: 'ebu', 'rms', or 'peak'" 135 ) 136 self.normalization_type = normalization_type 137 138 if not self.has_loudnorm_capabilities and self.normalization_type == "ebu": 139 raise FFmpegNormalizeError( 140 "Your ffmpeg does not support the 'loudnorm' EBU R128 filter. " 141 "Please install ffmpeg v4.2 or above, or choose another normalization type." 142 ) 143 144 if self.normalization_type == "ebu": 145 self.target_level = check_range(target_level, -70, -5, name="target_level") 146 else: 147 self.target_level = check_range(target_level, -99, 0, name="target_level") 148 149 self.print_stats = print_stats 150 151 # self.threshold = float(threshold) 152 153 self.loudness_range_target = check_range( 154 loudness_range_target, 1, 50, name="loudness_range_target" 155 ) 156 157 self.keep_loudness_range_target = keep_loudness_range_target 158 159 if self.keep_loudness_range_target and loudness_range_target != 7.0: 160 _logger.warning( 161 "Setting --keep-loudness-range-target will override your set loudness range target value! " 162 "Remove --keep-loudness-range-target or remove the --lrt/--loudness-range-target option." 163 ) 164 165 self.keep_lra_above_loudness_range_target = keep_lra_above_loudness_range_target 166 167 if ( 168 self.keep_loudness_range_target 169 and self.keep_lra_above_loudness_range_target 170 ): 171 raise FFmpegNormalizeError( 172 "Options --keep-loudness-range-target and --keep-lra-above-loudness-range-target are mutually exclusive! " 173 "Please choose just one of the two options." 174 ) 175 176 self.true_peak = check_range(true_peak, -9, 0, name="true_peak") 177 self.offset = check_range(offset, -99, 99, name="offset") 178 self.lower_only = lower_only 179 self.auto_lower_loudness_target = auto_lower_loudness_target 180 181 # Ensure library user is passing correct types 182 assert isinstance(dual_mono, bool), "dual_mono must be bool" 183 assert isinstance(dynamic, bool), "dynamic must be bool" 184 185 self.dual_mono = dual_mono 186 self.dynamic = dynamic 187 self.sample_rate = None if sample_rate is None else int(sample_rate) 188 self.audio_channels = None if audio_channels is None else int(audio_channels) 189 190 self.audio_codec = audio_codec 191 self.audio_bitrate = audio_bitrate 192 self.keep_original_audio = keep_original_audio 193 self.video_codec = video_codec 194 self.video_disable = video_disable 195 self.subtitle_disable = subtitle_disable 196 self.metadata_disable = metadata_disable 197 self.chapters_disable = chapters_disable 198 199 self.extra_input_options = extra_input_options 200 self.extra_output_options = extra_output_options 201 self.pre_filter = pre_filter 202 self.post_filter = post_filter 203 204 self.output_format = output_format 205 self.extension = extension 206 self.dry_run = dry_run 207 self.debug = debug 208 self.progress = progress 209 self.replaygain = replaygain 210 211 if ( 212 self.audio_codec is None or "pcm" in self.audio_codec 213 ) and self.output_format in PCM_INCOMPATIBLE_FORMATS: 214 raise FFmpegNormalizeError( 215 f"Output format {self.output_format} does not support PCM audio. " 216 "Please choose a suitable audio codec with the -c:a option." 217 ) 218 219 # replaygain only works for EBU for now 220 if self.replaygain and self.normalization_type != "ebu": 221 raise FFmpegNormalizeError( 222 "ReplayGain only works for EBU normalization type for now." 223 ) 224 225 self.stats: list[LoudnessStatisticsWithMetadata] = [] 226 self.media_files: list[MediaFile] = [] 227 self.file_count = 0
229 def add_media_file(self, input_file: str, output_file: str) -> None: 230 """ 231 Add a media file to normalize 232 233 Args: 234 input_file (str): Path to input file 235 output_file (str): Path to output file 236 """ 237 if not os.path.exists(input_file): 238 raise FFmpegNormalizeError(f"file {input_file} does not exist") 239 240 ext = os.path.splitext(output_file)[1][1:] 241 if ( 242 self.audio_codec is None or "pcm" in self.audio_codec 243 ) and ext in PCM_INCOMPATIBLE_EXTS: 244 raise FFmpegNormalizeError( 245 f"Output extension {ext} does not support PCM audio. " 246 "Please choose a suitable audio codec with the -c:a option." 247 ) 248 249 self.media_files.append(MediaFile(self, input_file, output_file)) 250 self.file_count += 1
Add a media file to normalize
Arguments:
- input_file (str): Path to input file
- output_file (str): Path to output file
252 def run_normalization(self) -> None: 253 """ 254 Run the normalization procedures 255 """ 256 for index, media_file in enumerate( 257 tqdm(self.media_files, desc="File", disable=not self.progress, position=0) 258 ): 259 _logger.info( 260 f"Normalizing file {media_file} ({index + 1} of {self.file_count})" 261 ) 262 263 try: 264 media_file.run_normalization() 265 except Exception as e: 266 if len(self.media_files) > 1: 267 # simply warn and do not die 268 _logger.error( 269 f"Error processing input file {media_file}, will " 270 f"continue batch-processing. Error was: {e}" 271 ) 272 else: 273 # raise the error so the program will exit 274 raise e 275 276 if self.print_stats: 277 json.dump( 278 list( 279 chain.from_iterable( 280 media_file.get_stats() for media_file in self.media_files 281 ) 282 ), 283 sys.stdout, 284 indent=4, 285 ) 286 print()
Run the normalization procedures
Common base class for all non-exit exceptions.
56class MediaFile: 57 """ 58 Class that holds a file, its streams and adjustments 59 """ 60 61 def __init__( 62 self, ffmpeg_normalize: FFmpegNormalize, input_file: str, output_file: str 63 ): 64 """ 65 Initialize a media file for later normalization by parsing the streams. 66 67 Args: 68 ffmpeg_normalize (FFmpegNormalize): reference to overall settings 69 input_file (str): Path to input file 70 output_file (str): Path to output file 71 """ 72 self.ffmpeg_normalize = ffmpeg_normalize 73 self.skip = False 74 self.input_file = input_file 75 self.output_file = output_file 76 current_ext = os.path.splitext(output_file)[1][1:] 77 # we need to check if it's empty, e.g. /dev/null or NUL 78 if current_ext == "" or self.output_file == os.devnull: 79 _logger.debug( 80 f"Current extension is unset, or output file is a null device, using extension: {self.ffmpeg_normalize.extension}" 81 ) 82 self.output_ext = self.ffmpeg_normalize.extension 83 else: 84 _logger.debug( 85 f"Current extension is set from output file, using extension: {current_ext}" 86 ) 87 self.output_ext = current_ext 88 self.streams: StreamDict = {"audio": {}, "video": {}, "subtitle": {}} 89 self.temp_file: Union[str, None] = None 90 91 self.parse_streams() 92 93 def _stream_ids(self) -> list[int]: 94 """ 95 Get all stream IDs of this file. 96 97 Returns: 98 list: List of stream IDs 99 """ 100 return ( 101 list(self.streams["audio"].keys()) 102 + list(self.streams["video"].keys()) 103 + list(self.streams["subtitle"].keys()) 104 ) 105 106 def __repr__(self) -> str: 107 return os.path.basename(self.input_file) 108 109 def parse_streams(self) -> None: 110 """ 111 Try to parse all input streams from file and set them in self.streams. 112 113 Raises: 114 FFmpegNormalizeError: If no audio streams are found 115 """ 116 _logger.debug(f"Parsing streams of {self.input_file}") 117 118 cmd = [ 119 self.ffmpeg_normalize.ffmpeg_exe, 120 "-i", 121 self.input_file, 122 "-c", 123 "copy", 124 "-t", 125 "0", 126 "-map", 127 "0", 128 "-f", 129 "null", 130 os.devnull, 131 ] 132 133 output = CommandRunner().run_command(cmd).get_output() 134 135 _logger.debug("Stream parsing command output:") 136 _logger.debug(output) 137 138 output_lines = [line.strip() for line in output.split("\n")] 139 140 duration = None 141 for line in output_lines: 142 if "Duration" in line: 143 if duration_search := DUR_REGEX.search(line): 144 duration = _to_ms(**duration_search.groupdict()) / 1000 145 _logger.debug(f"Found duration: {duration} s") 146 else: 147 _logger.warning("Could not extract duration from input file!") 148 149 if not line.startswith("Stream"): 150 continue 151 152 if stream_id_match := re.search(r"#0:([\d]+)", line): 153 stream_id = int(stream_id_match.group(1)) 154 if stream_id in self._stream_ids(): 155 continue 156 else: 157 continue 158 159 if "Audio" in line: 160 _logger.debug(f"Found audio stream at index {stream_id}") 161 sample_rate_match = re.search(r"(\d+) Hz", line) 162 sample_rate = ( 163 int(sample_rate_match.group(1)) if sample_rate_match else None 164 ) 165 bit_depth_match = re.search(r"[sfu](\d+)(p|le|be)?", line) 166 bit_depth = int(bit_depth_match.group(1)) if bit_depth_match else None 167 self.streams["audio"][stream_id] = AudioStream( 168 self.ffmpeg_normalize, 169 self, 170 stream_id, 171 sample_rate, 172 bit_depth, 173 duration, 174 ) 175 176 elif "Video" in line: 177 _logger.debug(f"Found video stream at index {stream_id}") 178 self.streams["video"][stream_id] = VideoStream( 179 self.ffmpeg_normalize, self, stream_id 180 ) 181 182 elif "Subtitle" in line: 183 _logger.debug(f"Found subtitle stream at index {stream_id}") 184 self.streams["subtitle"][stream_id] = SubtitleStream( 185 self.ffmpeg_normalize, self, stream_id 186 ) 187 188 if not self.streams["audio"]: 189 raise FFmpegNormalizeError( 190 f"Input file {self.input_file} does not contain any audio streams" 191 ) 192 193 if ( 194 self.output_ext.lower() in ONE_STREAM 195 and len(self.streams["audio"].values()) > 1 196 ): 197 _logger.warning( 198 "Output file only supports one stream. Keeping only first audio stream." 199 ) 200 first_stream = list(self.streams["audio"].values())[0] 201 self.streams["audio"] = {first_stream.stream_id: first_stream} 202 self.streams["video"] = {} 203 self.streams["subtitle"] = {} 204 205 def run_normalization(self) -> None: 206 """ 207 Run the normalization process for this file. 208 """ 209 _logger.debug(f"Running normalization for {self.input_file}") 210 211 # run the first pass to get loudness stats, unless in dynamic EBU mode 212 if not ( 213 self.ffmpeg_normalize.dynamic 214 and self.ffmpeg_normalize.normalization_type == "ebu" 215 ): 216 self._first_pass() 217 else: 218 _logger.debug( 219 "Dynamic EBU mode: First pass will not run, as it is not needed." 220 ) 221 222 # for second pass, create a temp file 223 temp_dir = mkdtemp() 224 self.temp_file = os.path.join(temp_dir, f"out.{self.output_ext}") 225 226 if self.ffmpeg_normalize.replaygain: 227 _logger.debug( 228 "ReplayGain mode: Second pass will run with temporary file to get stats." 229 ) 230 self.output_file = self.temp_file 231 232 # run the second pass as a whole. 233 if self.ffmpeg_normalize.progress: 234 with tqdm( 235 total=100, 236 position=1, 237 desc="Second Pass", 238 bar_format=TQDM_BAR_FORMAT, 239 ) as pbar: 240 for progress in self._second_pass(): 241 pbar.update(progress - pbar.n) 242 else: 243 for _ in self._second_pass(): 244 pass 245 246 # remove temp dir; this will remove the temp file as well if it has not been renamed (e.g. for replaygain) 247 if os.path.exists(temp_dir): 248 rmtree(temp_dir, ignore_errors=True) 249 250 # This will use stats from ebu_pass2 if available (from the main second pass), 251 # or fall back to ebu_pass1. 252 if self.ffmpeg_normalize.replaygain: 253 _logger.debug( 254 "ReplayGain tagging is enabled. Proceeding with tag calculation/application." 255 ) 256 self._run_replaygain() 257 258 if not self.ffmpeg_normalize.replaygain: 259 _logger.info(f"Normalized file written to {self.output_file}") 260 261 def _run_replaygain(self) -> None: 262 """ 263 Run the replaygain process for this file. 264 """ 265 _logger.debug(f"Running replaygain for {self.input_file}") 266 267 # get the audio streams 268 audio_streams = list(self.streams["audio"].values()) 269 270 # Attempt to use EBU pass 2 statistics, which account for pre-filters. 271 # These are populated by the main second pass if it runs (not a dry run) 272 # and normalization_type is 'ebu'. 273 loudness_stats_source = "ebu_pass2" 274 loudnorm_stats = audio_streams[0].loudness_statistics.get("ebu_pass2") 275 276 if loudnorm_stats is None: 277 _logger.warning( 278 "ReplayGain: Second pass EBU statistics (ebu_pass2) not found. " 279 "Falling back to first pass EBU statistics (ebu_pass1). " 280 "This may not account for pre-filters if any are used." 281 ) 282 loudness_stats_source = "ebu_pass1" 283 loudnorm_stats = audio_streams[0].loudness_statistics.get("ebu_pass1") 284 285 if loudnorm_stats is None: 286 _logger.error( 287 f"ReplayGain: No loudness statistics available from {loudness_stats_source} (and fallback) for stream 0. " 288 "Cannot calculate ReplayGain tags." 289 ) 290 return 291 292 _logger.debug( 293 f"Using statistics from {loudness_stats_source} for ReplayGain calculation." 294 ) 295 296 # apply the replaygain tag from the first audio stream (to all audio streams) 297 if len(audio_streams) > 1: 298 _logger.warning( 299 f"Your input file has {len(audio_streams)} audio streams. " 300 "Only the first audio stream's replaygain tag will be applied. " 301 "All audio streams will receive the same tag." 302 ) 303 304 target_level = self.ffmpeg_normalize.target_level 305 # Use 'input_i' and 'input_tp' from the chosen stats. 306 # For ebu_pass2, these are measurements *after* pre-filter but *before* loudnorm adjustment. 307 input_i = loudnorm_stats.get("input_i") 308 input_tp = loudnorm_stats.get("input_tp") 309 310 if input_i is None or input_tp is None: 311 _logger.error( 312 f"ReplayGain: 'input_i' or 'input_tp' missing from {loudness_stats_source} statistics. " 313 "Cannot calculate ReplayGain tags." 314 ) 315 return 316 317 track_gain = -(input_i - target_level) # dB 318 track_peak = 10 ** (input_tp / 20) # linear scale 319 320 _logger.debug(f"Calculated Track gain: {track_gain:.2f} dB") 321 _logger.debug(f"Calculated Track peak: {track_peak:.2f}") 322 323 if not self.ffmpeg_normalize.dry_run: # This uses the overall dry_run state 324 self._write_replaygain_tags(track_gain, track_peak) 325 else: 326 _logger.warning( 327 "Overall dry_run is enabled, not actually writing ReplayGain tags to the file. " 328 "Tag calculation based on available stats was performed." 329 ) 330 331 def _write_replaygain_tags(self, track_gain: float, track_peak: float) -> None: 332 """ 333 Write the replaygain tags to the input file. 334 335 This is based on the code from bohning/usdb_syncer, licensed under the MIT license. 336 See: https://github.com/bohning/usdb_syncer/blob/2fa638c4f487dffe9f5364f91e156ba54cb20233/src/usdb_syncer/resource_dl.py 337 """ 338 _logger.debug(f"Writing ReplayGain tags to {self.input_file}") 339 340 input_file_ext = os.path.splitext(self.input_file)[1] 341 if input_file_ext == ".mp3": 342 mp3 = MP3(self.input_file, ID3=ID3) 343 if not mp3.tags: 344 return 345 mp3.tags.add( 346 TXXX(desc="REPLAYGAIN_TRACK_GAIN", text=[f"{track_gain:.2f} dB"]) 347 ) 348 mp3.tags.add(TXXX(desc="REPLAYGAIN_TRACK_PEAK", text=[f"{track_peak:.6f}"])) 349 mp3.save() 350 elif input_file_ext in [".mp4", ".m4a", ".m4v", ".mov"]: 351 mp4 = MP4(self.input_file) 352 if not mp4.tags: 353 mp4.add_tags() 354 if not mp4.tags: 355 return 356 mp4.tags["----:com.apple.iTunes:REPLAYGAIN_TRACK_GAIN"] = [ 357 f"{track_gain:.2f} dB".encode() 358 ] 359 mp4.tags["----:com.apple.iTunes:REPLAYGAIN_TRACK_PEAK"] = [ 360 f"{track_peak:.6f}".encode() 361 ] 362 mp4.save() 363 elif input_file_ext == ".ogg": 364 ogg = OggVorbis(self.input_file) 365 ogg["REPLAYGAIN_TRACK_GAIN"] = [f"{track_gain:.2f} dB"] 366 ogg["REPLAYGAIN_TRACK_PEAK"] = [f"{track_peak:.6f}"] 367 ogg.save() 368 elif input_file_ext == ".opus": 369 opus = OggOpus(self.input_file) 370 # See https://datatracker.ietf.org/doc/html/rfc7845#section-5.2.1 371 opus["R128_TRACK_GAIN"] = [str(round(256 * track_gain))] 372 opus.save() 373 else: 374 _logger.error( 375 f"Unsupported input file extension: {input_file_ext} for writing replaygain tags. " 376 "Only .mp3, .mp4/.m4a, .ogg, .opus are supported. " 377 "If you think this should support more formats, please let me know at " 378 "https://github.com/slhck/ffmpeg-normalize/issues" 379 ) 380 return 381 382 _logger.info( 383 f"Successfully wrote replaygain tags to input file {self.input_file}" 384 ) 385 386 def _can_write_output_video(self) -> bool: 387 """ 388 Determine whether the output file can contain video at all. 389 390 Returns: 391 bool: True if the output file can contain video, False otherwise 392 """ 393 if self.output_ext.lower() in AUDIO_ONLY_FORMATS: 394 return False 395 396 return not self.ffmpeg_normalize.video_disable 397 398 def _first_pass(self) -> None: 399 """ 400 Run the first pass of the normalization process. 401 """ 402 _logger.debug(f"Parsing normalization info for {self.input_file}") 403 404 for index, audio_stream in enumerate(self.streams["audio"].values()): 405 if self.ffmpeg_normalize.normalization_type == "ebu": 406 fun = getattr(audio_stream, "parse_loudnorm_stats") 407 else: 408 fun = getattr(audio_stream, "parse_astats") 409 410 if self.ffmpeg_normalize.progress: 411 with tqdm( 412 total=100, 413 position=1, 414 desc=f"Stream {index + 1}/{len(self.streams['audio'].values())}", 415 bar_format=TQDM_BAR_FORMAT, 416 ) as pbar: 417 for progress in fun(): 418 pbar.update(progress - pbar.n) 419 else: 420 for _ in fun(): 421 pass 422 423 def _get_audio_filter_cmd(self) -> tuple[str, list[str]]: 424 """ 425 Return the audio filter command and output labels needed. 426 427 Returns: 428 tuple[str, list[str]]: filter_complex command and the required output labels 429 """ 430 filter_chains = [] 431 output_labels = [] 432 433 for audio_stream in self.streams["audio"].values(): 434 skip_normalization = False 435 if self.ffmpeg_normalize.lower_only: 436 if self.ffmpeg_normalize.normalization_type == "ebu": 437 if ( 438 audio_stream.loudness_statistics["ebu_pass1"] is not None 439 and audio_stream.loudness_statistics["ebu_pass1"]["input_i"] 440 < self.ffmpeg_normalize.target_level 441 ): 442 skip_normalization = True 443 elif self.ffmpeg_normalize.normalization_type == "peak": 444 if ( 445 audio_stream.loudness_statistics["max"] is not None 446 and audio_stream.loudness_statistics["max"] 447 < self.ffmpeg_normalize.target_level 448 ): 449 skip_normalization = True 450 elif self.ffmpeg_normalize.normalization_type == "rms": 451 if ( 452 audio_stream.loudness_statistics["mean"] is not None 453 and audio_stream.loudness_statistics["mean"] 454 < self.ffmpeg_normalize.target_level 455 ): 456 skip_normalization = True 457 458 if skip_normalization: 459 _logger.warning( 460 f"Stream {audio_stream.stream_id} had measured input loudness lower than target, skipping normalization." 461 ) 462 normalization_filter = "acopy" 463 else: 464 if self.ffmpeg_normalize.normalization_type == "ebu": 465 normalization_filter = audio_stream.get_second_pass_opts_ebu() 466 else: 467 normalization_filter = audio_stream.get_second_pass_opts_peakrms() 468 469 input_label = f"[0:{audio_stream.stream_id}]" 470 output_label = f"[norm{audio_stream.stream_id}]" 471 output_labels.append(output_label) 472 473 filter_chain = [] 474 475 if self.ffmpeg_normalize.pre_filter: 476 filter_chain.append(self.ffmpeg_normalize.pre_filter) 477 478 filter_chain.append(normalization_filter) 479 480 if self.ffmpeg_normalize.post_filter: 481 filter_chain.append(self.ffmpeg_normalize.post_filter) 482 483 filter_chains.append(input_label + ",".join(filter_chain) + output_label) 484 485 filter_complex_cmd = ";".join(filter_chains) 486 487 return filter_complex_cmd, output_labels 488 489 def _second_pass(self) -> Iterator[float]: 490 """ 491 Construct the second pass command and run it. 492 493 FIXME: make this method simpler 494 """ 495 _logger.info(f"Running second pass for {self.input_file}") 496 497 # get the target output stream types depending on the options 498 output_stream_types: list[Literal["audio", "video", "subtitle"]] = ["audio"] 499 if self._can_write_output_video(): 500 output_stream_types.append("video") 501 if not self.ffmpeg_normalize.subtitle_disable: 502 output_stream_types.append("subtitle") 503 504 # base command, here we will add all other options 505 cmd = [self.ffmpeg_normalize.ffmpeg_exe, "-hide_banner", "-y"] 506 507 # extra options (if any) 508 if self.ffmpeg_normalize.extra_input_options: 509 cmd.extend(self.ffmpeg_normalize.extra_input_options) 510 511 # get complex filter command 512 audio_filter_cmd, output_labels = self._get_audio_filter_cmd() 513 514 # add input file and basic filter 515 cmd.extend(["-i", self.input_file, "-filter_complex", audio_filter_cmd]) 516 517 # map metadata, only if needed 518 if self.ffmpeg_normalize.metadata_disable: 519 cmd.extend(["-map_metadata", "-1"]) 520 else: 521 # map global metadata 522 cmd.extend(["-map_metadata", "0"]) 523 # map per-stream metadata (e.g. language tags) 524 for stream_type in output_stream_types: 525 stream_key = stream_type[0] 526 if stream_type not in self.streams: 527 continue 528 for idx, _ in enumerate(self.streams[stream_type].items()): 529 cmd.extend( 530 [ 531 f"-map_metadata:s:{stream_key}:{idx}", 532 f"0:s:{stream_key}:{idx}", 533 ] 534 ) 535 536 # map chapters if needed 537 if self.ffmpeg_normalize.chapters_disable: 538 cmd.extend(["-map_chapters", "-1"]) 539 else: 540 cmd.extend(["-map_chapters", "0"]) 541 542 # collect all '-map' and codecs needed for output video based on input video 543 if self.streams["video"]: 544 if self._can_write_output_video(): 545 for s in self.streams["video"].keys(): 546 cmd.extend(["-map", f"0:{s}"]) 547 # set codec (copy by default) 548 cmd.extend(["-c:v", self.ffmpeg_normalize.video_codec]) 549 else: 550 if not self.ffmpeg_normalize.video_disable: 551 _logger.warning( 552 f"The chosen output extension {self.output_ext} does not support video/cover art. It will be disabled." 553 ) 554 555 # ... and map the output of the normalization filters 556 for ol in output_labels: 557 cmd.extend(["-map", ol]) 558 559 # set audio codec (never copy) 560 if self.ffmpeg_normalize.audio_codec: 561 cmd.extend(["-c:a", self.ffmpeg_normalize.audio_codec]) 562 else: 563 for index, (_, audio_stream) in enumerate(self.streams["audio"].items()): 564 cmd.extend([f"-c:a:{index}", audio_stream.get_pcm_codec()]) 565 566 # other audio options (if any) 567 if self.ffmpeg_normalize.audio_bitrate: 568 if self.ffmpeg_normalize.audio_codec == "libvorbis": 569 # libvorbis takes just a "-b" option, for some reason 570 # https://github.com/slhck/ffmpeg-normalize/issues/277 571 cmd.extend(["-b", str(self.ffmpeg_normalize.audio_bitrate)]) 572 else: 573 cmd.extend(["-b:a", str(self.ffmpeg_normalize.audio_bitrate)]) 574 if self.ffmpeg_normalize.sample_rate: 575 cmd.extend(["-ar", str(self.ffmpeg_normalize.sample_rate)]) 576 if self.ffmpeg_normalize.audio_channels: 577 cmd.extend(["-ac", str(self.ffmpeg_normalize.audio_channels)]) 578 579 # ... and subtitles 580 if not self.ffmpeg_normalize.subtitle_disable: 581 for s in self.streams["subtitle"].keys(): 582 cmd.extend(["-map", f"0:{s}"]) 583 # copy subtitles 584 cmd.extend(["-c:s", "copy"]) 585 586 if self.ffmpeg_normalize.keep_original_audio: 587 highest_index = len(self.streams["audio"]) 588 for index, _ in enumerate(self.streams["audio"].items()): 589 cmd.extend(["-map", f"0:a:{index}"]) 590 cmd.extend([f"-c:a:{highest_index + index}", "copy"]) 591 592 # extra options (if any) 593 if self.ffmpeg_normalize.extra_output_options: 594 cmd.extend(self.ffmpeg_normalize.extra_output_options) 595 596 # output format (if any) 597 if self.ffmpeg_normalize.output_format: 598 cmd.extend(["-f", self.ffmpeg_normalize.output_format]) 599 600 # if dry run, only show sample command 601 if self.ffmpeg_normalize.dry_run: 602 cmd.append(self.output_file) 603 _logger.warning("Dry run used, not actually running second-pass command") 604 CommandRunner(dry=True).run_command(cmd) 605 yield 100 606 return 607 608 # track temp_dir for cleanup 609 temp_dir = None 610 temp_file = None 611 612 # special case: if output is a null device, write directly to it 613 if self.output_file == os.devnull: 614 cmd.append(self.output_file) 615 else: 616 temp_dir = mkdtemp() 617 temp_file = os.path.join(temp_dir, f"out.{self.output_ext}") 618 cmd.append(temp_file) 619 620 cmd_runner = CommandRunner() 621 try: 622 yield from cmd_runner.run_ffmpeg_command(cmd) 623 except Exception as e: 624 _logger.error(f"Error while running command {shlex.join(cmd)}! Error: {e}") 625 raise e 626 else: 627 # only move the temp file if it's not a null device and ReplayGain is not enabled! 628 if self.output_file != os.devnull and temp_file and not self.ffmpeg_normalize.replaygain: 629 _logger.debug( 630 f"Moving temporary file from {temp_file} to {self.output_file}" 631 ) 632 move(temp_file, self.output_file) 633 finally: 634 # clean up temp directory if it was created 635 if temp_dir and os.path.exists(temp_dir): 636 rmtree(temp_dir, ignore_errors=True) 637 638 output = cmd_runner.get_output() 639 # in the second pass, we do not normalize stream-by-stream, so we set the stats based on the 640 # overall output (which includes multiple loudnorm stats) 641 if self.ffmpeg_normalize.normalization_type == "ebu": 642 ebu_pass_2_stats = list( 643 AudioStream.prune_and_parse_loudnorm_output(output).values() 644 ) 645 for idx, audio_stream in enumerate(self.streams["audio"].values()): 646 audio_stream.set_second_pass_stats(ebu_pass_2_stats[idx]) 647 648 # warn if self.media_file.ffmpeg_normalize.dynamic == False and any of the second pass stats contain "normalization_type" == "dynamic" 649 if self.ffmpeg_normalize.dynamic is False: 650 for audio_stream in self.streams["audio"].values(): 651 pass2_stats = audio_stream.get_stats()["ebu_pass2"] 652 if pass2_stats is None: 653 continue 654 if pass2_stats["normalization_type"] == "dynamic": 655 _logger.warning( 656 "You specified linear normalization, but the loudnorm filter reverted to dynamic normalization. " 657 "This may lead to unexpected results." 658 "Consider your input settings, e.g. choose a lower target level or higher target loudness range." 659 ) 660 661 _logger.debug("Normalization finished") 662 663 def get_stats(self) -> Iterable[LoudnessStatisticsWithMetadata]: 664 return ( 665 audio_stream.get_stats() for audio_stream in self.streams["audio"].values() 666 )
Class that holds a file, its streams and adjustments
61 def __init__( 62 self, ffmpeg_normalize: FFmpegNormalize, input_file: str, output_file: str 63 ): 64 """ 65 Initialize a media file for later normalization by parsing the streams. 66 67 Args: 68 ffmpeg_normalize (FFmpegNormalize): reference to overall settings 69 input_file (str): Path to input file 70 output_file (str): Path to output file 71 """ 72 self.ffmpeg_normalize = ffmpeg_normalize 73 self.skip = False 74 self.input_file = input_file 75 self.output_file = output_file 76 current_ext = os.path.splitext(output_file)[1][1:] 77 # we need to check if it's empty, e.g. /dev/null or NUL 78 if current_ext == "" or self.output_file == os.devnull: 79 _logger.debug( 80 f"Current extension is unset, or output file is a null device, using extension: {self.ffmpeg_normalize.extension}" 81 ) 82 self.output_ext = self.ffmpeg_normalize.extension 83 else: 84 _logger.debug( 85 f"Current extension is set from output file, using extension: {current_ext}" 86 ) 87 self.output_ext = current_ext 88 self.streams: StreamDict = {"audio": {}, "video": {}, "subtitle": {}} 89 self.temp_file: Union[str, None] = None 90 91 self.parse_streams()
Initialize a media file for later normalization by parsing the streams.
Arguments:
- ffmpeg_normalize (FFmpegNormalize): reference to overall settings
- input_file (str): Path to input file
- output_file (str): Path to output file
109 def parse_streams(self) -> None: 110 """ 111 Try to parse all input streams from file and set them in self.streams. 112 113 Raises: 114 FFmpegNormalizeError: If no audio streams are found 115 """ 116 _logger.debug(f"Parsing streams of {self.input_file}") 117 118 cmd = [ 119 self.ffmpeg_normalize.ffmpeg_exe, 120 "-i", 121 self.input_file, 122 "-c", 123 "copy", 124 "-t", 125 "0", 126 "-map", 127 "0", 128 "-f", 129 "null", 130 os.devnull, 131 ] 132 133 output = CommandRunner().run_command(cmd).get_output() 134 135 _logger.debug("Stream parsing command output:") 136 _logger.debug(output) 137 138 output_lines = [line.strip() for line in output.split("\n")] 139 140 duration = None 141 for line in output_lines: 142 if "Duration" in line: 143 if duration_search := DUR_REGEX.search(line): 144 duration = _to_ms(**duration_search.groupdict()) / 1000 145 _logger.debug(f"Found duration: {duration} s") 146 else: 147 _logger.warning("Could not extract duration from input file!") 148 149 if not line.startswith("Stream"): 150 continue 151 152 if stream_id_match := re.search(r"#0:([\d]+)", line): 153 stream_id = int(stream_id_match.group(1)) 154 if stream_id in self._stream_ids(): 155 continue 156 else: 157 continue 158 159 if "Audio" in line: 160 _logger.debug(f"Found audio stream at index {stream_id}") 161 sample_rate_match = re.search(r"(\d+) Hz", line) 162 sample_rate = ( 163 int(sample_rate_match.group(1)) if sample_rate_match else None 164 ) 165 bit_depth_match = re.search(r"[sfu](\d+)(p|le|be)?", line) 166 bit_depth = int(bit_depth_match.group(1)) if bit_depth_match else None 167 self.streams["audio"][stream_id] = AudioStream( 168 self.ffmpeg_normalize, 169 self, 170 stream_id, 171 sample_rate, 172 bit_depth, 173 duration, 174 ) 175 176 elif "Video" in line: 177 _logger.debug(f"Found video stream at index {stream_id}") 178 self.streams["video"][stream_id] = VideoStream( 179 self.ffmpeg_normalize, self, stream_id 180 ) 181 182 elif "Subtitle" in line: 183 _logger.debug(f"Found subtitle stream at index {stream_id}") 184 self.streams["subtitle"][stream_id] = SubtitleStream( 185 self.ffmpeg_normalize, self, stream_id 186 ) 187 188 if not self.streams["audio"]: 189 raise FFmpegNormalizeError( 190 f"Input file {self.input_file} does not contain any audio streams" 191 ) 192 193 if ( 194 self.output_ext.lower() in ONE_STREAM 195 and len(self.streams["audio"].values()) > 1 196 ): 197 _logger.warning( 198 "Output file only supports one stream. Keeping only first audio stream." 199 ) 200 first_stream = list(self.streams["audio"].values())[0] 201 self.streams["audio"] = {first_stream.stream_id: first_stream} 202 self.streams["video"] = {} 203 self.streams["subtitle"] = {}
Try to parse all input streams from file and set them in self.streams.
Raises:
- FFmpegNormalizeError: If no audio streams are found
205 def run_normalization(self) -> None: 206 """ 207 Run the normalization process for this file. 208 """ 209 _logger.debug(f"Running normalization for {self.input_file}") 210 211 # run the first pass to get loudness stats, unless in dynamic EBU mode 212 if not ( 213 self.ffmpeg_normalize.dynamic 214 and self.ffmpeg_normalize.normalization_type == "ebu" 215 ): 216 self._first_pass() 217 else: 218 _logger.debug( 219 "Dynamic EBU mode: First pass will not run, as it is not needed." 220 ) 221 222 # for second pass, create a temp file 223 temp_dir = mkdtemp() 224 self.temp_file = os.path.join(temp_dir, f"out.{self.output_ext}") 225 226 if self.ffmpeg_normalize.replaygain: 227 _logger.debug( 228 "ReplayGain mode: Second pass will run with temporary file to get stats." 229 ) 230 self.output_file = self.temp_file 231 232 # run the second pass as a whole. 233 if self.ffmpeg_normalize.progress: 234 with tqdm( 235 total=100, 236 position=1, 237 desc="Second Pass", 238 bar_format=TQDM_BAR_FORMAT, 239 ) as pbar: 240 for progress in self._second_pass(): 241 pbar.update(progress - pbar.n) 242 else: 243 for _ in self._second_pass(): 244 pass 245 246 # remove temp dir; this will remove the temp file as well if it has not been renamed (e.g. for replaygain) 247 if os.path.exists(temp_dir): 248 rmtree(temp_dir, ignore_errors=True) 249 250 # This will use stats from ebu_pass2 if available (from the main second pass), 251 # or fall back to ebu_pass1. 252 if self.ffmpeg_normalize.replaygain: 253 _logger.debug( 254 "ReplayGain tagging is enabled. Proceeding with tag calculation/application." 255 ) 256 self._run_replaygain() 257 258 if not self.ffmpeg_normalize.replaygain: 259 _logger.info(f"Normalized file written to {self.output_file}")
Run the normalization process for this file.
94class AudioStream(MediaStream): 95 def __init__( 96 self, 97 ffmpeg_normalize: FFmpegNormalize, 98 media_file: MediaFile, 99 stream_id: int, 100 sample_rate: int | None, 101 bit_depth: int | None, 102 duration: float | None, 103 ): 104 """ 105 Create an AudioStream object. 106 107 Args: 108 ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object. 109 media_file (MediaFile): The MediaFile object. 110 stream_id (int): The stream ID. 111 sample_rate (int): sample rate in Hz 112 bit_depth (int): bit depth in bits 113 duration (float): duration in seconds 114 """ 115 super().__init__(ffmpeg_normalize, media_file, "audio", stream_id) 116 117 self.loudness_statistics: LoudnessStatistics = { 118 "ebu_pass1": None, 119 "ebu_pass2": None, 120 "mean": None, 121 "max": None, 122 } 123 124 self.sample_rate = sample_rate 125 self.bit_depth = bit_depth 126 127 self.duration = duration 128 129 @staticmethod 130 def _constrain( 131 number: float, min_range: float, max_range: float, name: str | None = None 132 ) -> float: 133 """ 134 Constrain a number between two values. 135 136 Args: 137 number (float): The number to constrain. 138 min_range (float): The minimum value. 139 max_range (float): The maximum value. 140 name (str): The name of the number (for logging). 141 142 Returns: 143 float: The constrained number. 144 145 Raises: 146 ValueError: If min_range is greater than max_range. 147 """ 148 if min_range > max_range: 149 raise ValueError("min must be smaller than max") 150 result = max(min(number, max_range), min_range) 151 if result != number and name is not None: 152 _logger.warning( 153 f"Constraining {name} to range of [{min_range}, {max_range}]: {number} -> {result}" 154 ) 155 return result 156 157 def get_stats(self) -> LoudnessStatisticsWithMetadata: 158 """ 159 Return loudness statistics for the stream. 160 161 Returns: 162 dict: A dictionary containing the loudness statistics. 163 """ 164 stats: LoudnessStatisticsWithMetadata = { 165 "input_file": self.media_file.input_file, 166 "output_file": self.media_file.output_file, 167 "stream_id": self.stream_id, 168 "ebu_pass1": self.loudness_statistics["ebu_pass1"], 169 "ebu_pass2": self.loudness_statistics["ebu_pass2"], 170 "mean": self.loudness_statistics["mean"], 171 "max": self.loudness_statistics["max"], 172 } 173 return stats 174 175 def set_second_pass_stats(self, stats: EbuLoudnessStatistics) -> None: 176 """ 177 Set the EBU loudness statistics for the second pass. 178 179 Args: 180 stats (dict): The EBU loudness statistics. 181 """ 182 _logger.debug( 183 f"Setting second pass stats for stream {self.stream_id} from {stats}" 184 ) 185 self.loudness_statistics["ebu_pass2"] = stats 186 187 def get_pcm_codec(self) -> str: 188 """ 189 Get the PCM codec string for the stream. 190 191 Returns: 192 str: The PCM codec string. 193 """ 194 if not self.bit_depth: 195 return "pcm_s16le" 196 elif self.bit_depth <= 8: 197 return "pcm_s8" 198 elif self.bit_depth in [16, 24, 32, 64]: 199 return f"pcm_s{self.bit_depth}le" 200 else: 201 _logger.warning( 202 f"Unsupported bit depth {self.bit_depth}, falling back to pcm_s16le" 203 ) 204 return "pcm_s16le" 205 206 def _get_filter_str_with_pre_filter(self, current_filter: str) -> str: 207 """ 208 Get a filter string for current_filter, with the pre-filter 209 added before. Applies the input label before. 210 211 Args: 212 current_filter (str): The current filter. 213 214 Returns: 215 str: The filter string. 216 """ 217 input_label = f"[0:{self.stream_id}]" 218 filter_chain = [] 219 if self.media_file.ffmpeg_normalize.pre_filter: 220 filter_chain.append(self.media_file.ffmpeg_normalize.pre_filter) 221 filter_chain.append(current_filter) 222 filter_str = input_label + ",".join(filter_chain) 223 return filter_str 224 225 def parse_astats(self) -> Iterator[float]: 226 """ 227 Use ffmpeg with astats filter to get the mean (RMS) and max (peak) volume of the input file. 228 229 Yields: 230 float: The progress of the command. 231 """ 232 _logger.info(f"Running first pass astats filter for stream {self.stream_id}") 233 234 filter_str = self._get_filter_str_with_pre_filter( 235 "astats=measure_overall=Peak_level+RMS_level:measure_perchannel=0" 236 ) 237 238 cmd = [ 239 self.media_file.ffmpeg_normalize.ffmpeg_exe, 240 "-hide_banner", 241 "-y", 242 "-i", 243 self.media_file.input_file, 244 "-filter_complex", 245 filter_str, 246 "-vn", 247 "-sn", 248 "-f", 249 "null", 250 os.devnull, 251 ] 252 253 cmd_runner = CommandRunner() 254 yield from cmd_runner.run_ffmpeg_command(cmd) 255 output = cmd_runner.get_output() 256 257 _logger.debug( 258 f"astats command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}" 259 ) 260 261 mean_volume_matches = re.findall(r"RMS level dB: ([\-\d\.]+)", output) 262 if mean_volume_matches: 263 if mean_volume_matches[0] == "-": 264 self.loudness_statistics["mean"] = float("-inf") 265 else: 266 self.loudness_statistics["mean"] = float(mean_volume_matches[0]) 267 else: 268 raise FFmpegNormalizeError( 269 f"Could not get mean volume for {self.media_file.input_file}" 270 ) 271 272 max_volume_matches = re.findall(r"Peak level dB: ([\-\d\.]+)", output) 273 if max_volume_matches: 274 if max_volume_matches[0] == "-": 275 self.loudness_statistics["max"] = float("-inf") 276 else: 277 self.loudness_statistics["max"] = float(max_volume_matches[0]) 278 else: 279 raise FFmpegNormalizeError( 280 f"Could not get max volume for {self.media_file.input_file}" 281 ) 282 283 def parse_loudnorm_stats(self) -> Iterator[float]: 284 """ 285 Run a first pass loudnorm filter to get measured data. 286 287 Yields: 288 float: The progress of the command. 289 """ 290 _logger.info(f"Running first pass loudnorm filter for stream {self.stream_id}") 291 292 opts = { 293 "i": self.media_file.ffmpeg_normalize.target_level, 294 "lra": self.media_file.ffmpeg_normalize.loudness_range_target, 295 "tp": self.media_file.ffmpeg_normalize.true_peak, 296 "offset": self.media_file.ffmpeg_normalize.offset, 297 "print_format": "json", 298 } 299 300 if self.media_file.ffmpeg_normalize.dual_mono: 301 opts["dual_mono"] = "true" 302 303 filter_str = self._get_filter_str_with_pre_filter( 304 "loudnorm=" + dict_to_filter_opts(opts) 305 ) 306 307 cmd = [ 308 self.media_file.ffmpeg_normalize.ffmpeg_exe, 309 "-hide_banner", 310 "-y", 311 "-i", 312 self.media_file.input_file, 313 "-map", 314 f"0:{self.stream_id}", 315 "-filter_complex", 316 filter_str, 317 "-vn", 318 "-sn", 319 "-f", 320 "null", 321 os.devnull, 322 ] 323 324 cmd_runner = CommandRunner() 325 yield from cmd_runner.run_ffmpeg_command(cmd) 326 output = cmd_runner.get_output() 327 328 _logger.debug( 329 f"Loudnorm first pass command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}" 330 ) 331 332 # only one stream 333 self.loudness_statistics["ebu_pass1"] = next( 334 iter(AudioStream.prune_and_parse_loudnorm_output(output).values()) 335 ) 336 337 @staticmethod 338 def prune_and_parse_loudnorm_output( 339 output: str, 340 ) -> dict[int, EbuLoudnessStatistics]: 341 """ 342 Prune ffmpeg progress lines from output and parse the loudnorm filter output. 343 There may be multiple outputs if multiple streams were processed. 344 345 Args: 346 output (str): The output from ffmpeg. 347 348 Returns: 349 dict[int, EbuLoudnessStatistics]: The EBU loudness statistics. 350 """ 351 _logger.debug("Parsing loudnorm stats from output") 352 pruned_output = CommandRunner.prune_ffmpeg_progress_from_output(output) 353 output_lines = [line.strip() for line in pruned_output.split("\n")] 354 return AudioStream._parse_loudnorm_output(output_lines) 355 356 @staticmethod 357 def _parse_loudnorm_output( 358 output_lines: list[str], 359 ) -> dict[int, EbuLoudnessStatistics]: 360 """ 361 Parse the output of a loudnorm filter to get the EBU loudness statistics. 362 363 Args: 364 output_lines (list[str]): The output lines of the loudnorm filter. 365 366 Raises: 367 FFmpegNormalizeError: When the output could not be parsed. 368 369 Returns: 370 dict[int, EbuLoudnessStatistics]: stream index and the EBU loudness statistics, if found. 371 """ 372 result = dict[int, EbuLoudnessStatistics]() 373 stream_index = -1 374 loudnorm_start = 0 375 for index, line in enumerate(output_lines): 376 if stream_index < 0: 377 if m := _loudnorm_pattern.match(line): 378 loudnorm_start = index + 1 379 stream_index = int(m.group(1)) 380 else: 381 if line.startswith("}"): 382 loudnorm_end = index + 1 383 loudnorm_data = "\n".join(output_lines[loudnorm_start:loudnorm_end]) 384 385 try: 386 loudnorm_stats = json.loads(loudnorm_data) 387 388 _logger.debug( 389 f"Loudnorm stats for stream {stream_index} parsed: {loudnorm_data}" 390 ) 391 392 for key in [ 393 "input_i", 394 "input_tp", 395 "input_lra", 396 "input_thresh", 397 "output_i", 398 "output_tp", 399 "output_lra", 400 "output_thresh", 401 "target_offset", 402 "normalization_type", 403 ]: 404 if key not in loudnorm_stats: 405 continue 406 if key == "normalization_type": 407 loudnorm_stats[key] = loudnorm_stats[key].lower() 408 # handle infinite values 409 elif float(loudnorm_stats[key]) == -float("inf"): 410 loudnorm_stats[key] = -99 411 elif float(loudnorm_stats[key]) == float("inf"): 412 loudnorm_stats[key] = 0 413 else: 414 # convert to floats 415 loudnorm_stats[key] = float(loudnorm_stats[key]) 416 417 result[stream_index] = cast( 418 EbuLoudnessStatistics, loudnorm_stats 419 ) 420 stream_index = -1 421 except Exception as e: 422 raise FFmpegNormalizeError( 423 f"Could not parse loudnorm stats; wrong JSON format in string: {e}" 424 ) 425 return result 426 427 def get_second_pass_opts_ebu(self) -> str: 428 """ 429 Return second pass loudnorm filter options string for ffmpeg 430 """ 431 432 # In dynamic mode, we can do everything in one pass, and we do not have first pass stats 433 if self.media_file.ffmpeg_normalize.dynamic: 434 if not self.ffmpeg_normalize.sample_rate: 435 _logger.warning( 436 "In dynamic mode, the sample rate will automatically be set to 192 kHz by the loudnorm filter. " 437 "Specify -ar/--sample-rate to override it." 438 ) 439 440 opts = { 441 "i": self.media_file.ffmpeg_normalize.target_level, 442 "lra": self.media_file.ffmpeg_normalize.loudness_range_target, 443 "tp": self.media_file.ffmpeg_normalize.true_peak, 444 "offset": self.media_file.ffmpeg_normalize.offset, 445 "linear": "false", 446 "print_format": "json", 447 } 448 449 if self.media_file.ffmpeg_normalize.dual_mono: 450 opts["dual_mono"] = "true" 451 452 return "loudnorm=" + dict_to_filter_opts(opts) 453 454 if not self.loudness_statistics["ebu_pass1"]: 455 raise FFmpegNormalizeError( 456 "First pass not run, you must call parse_loudnorm_stats first" 457 ) 458 459 if float(self.loudness_statistics["ebu_pass1"]["input_i"]) > 0: 460 _logger.warning( 461 "Input file had measured input loudness greater than zero " 462 f"({self.loudness_statistics['ebu_pass1']['input_i']}), capping at 0" 463 ) 464 self.loudness_statistics["ebu_pass1"]["input_i"] = 0 465 466 will_use_dynamic_mode = self.media_file.ffmpeg_normalize.dynamic 467 468 if self.media_file.ffmpeg_normalize.keep_loudness_range_target: 469 _logger.debug( 470 "Keeping target loudness range in second pass loudnorm filter" 471 ) 472 input_lra = self.loudness_statistics["ebu_pass1"]["input_lra"] 473 if input_lra < 1 or input_lra > 50: 474 _logger.warning( 475 "Input file had measured loudness range outside of [1,50] " 476 f"({input_lra}), capping to allowed range" 477 ) 478 479 self.media_file.ffmpeg_normalize.loudness_range_target = self._constrain( 480 self.loudness_statistics["ebu_pass1"]["input_lra"], 1, 50 481 ) 482 483 if self.media_file.ffmpeg_normalize.keep_lra_above_loudness_range_target: 484 if ( 485 self.loudness_statistics["ebu_pass1"]["input_lra"] 486 <= self.media_file.ffmpeg_normalize.loudness_range_target 487 ): 488 _logger.debug( 489 "Setting loudness range target in second pass loudnorm filter" 490 ) 491 else: 492 self.media_file.ffmpeg_normalize.loudness_range_target = ( 493 self.loudness_statistics["ebu_pass1"]["input_lra"] 494 ) 495 _logger.debug( 496 "Keeping target loudness range in second pass loudnorm filter" 497 ) 498 499 if ( 500 self.media_file.ffmpeg_normalize.loudness_range_target 501 < self.loudness_statistics["ebu_pass1"]["input_lra"] 502 and not will_use_dynamic_mode 503 ): 504 _logger.warning( 505 f"Input file had loudness range of {self.loudness_statistics['ebu_pass1']['input_lra']}. " 506 f"This is larger than the loudness range target ({self.media_file.ffmpeg_normalize.loudness_range_target}). " 507 "Normalization will revert to dynamic mode. Choose a higher target loudness range if you want linear normalization. " 508 "Alternatively, use the --keep-loudness-range-target or --keep-lra-above-loudness-range-target option to keep the target loudness range from " 509 "the input." 510 ) 511 will_use_dynamic_mode = True 512 513 if will_use_dynamic_mode and not self.ffmpeg_normalize.sample_rate: 514 _logger.warning( 515 "In dynamic mode, the sample rate will automatically be set to 192 kHz by the loudnorm filter. " 516 "Specify -ar/--sample-rate to override it." 517 ) 518 519 target_level = self.ffmpeg_normalize.target_level 520 if self.ffmpeg_normalize.auto_lower_loudness_target: 521 safe_target = ( 522 self.loudness_statistics["ebu_pass1"]["input_i"] 523 - self.loudness_statistics["ebu_pass1"]["input_tp"] 524 + self.ffmpeg_normalize.true_peak 525 - 0.1 526 ) 527 if safe_target < self.ffmpeg_normalize.target_level: 528 target_level = safe_target 529 _logger.warning( 530 f"Using loudness target {target_level} because --auto-lower-loudness-target given.", 531 ) 532 533 stats = self.loudness_statistics["ebu_pass1"] 534 535 opts = { 536 "i": target_level, 537 "lra": self.media_file.ffmpeg_normalize.loudness_range_target, 538 "tp": self.media_file.ffmpeg_normalize.true_peak, 539 "offset": self._constrain( 540 stats["target_offset"], -99, 99, name="target_offset" 541 ), 542 "measured_i": self._constrain(stats["input_i"], -99, 0, name="input_i"), 543 "measured_lra": self._constrain( 544 stats["input_lra"], 0, 99, name="input_lra" 545 ), 546 "measured_tp": self._constrain(stats["input_tp"], -99, 99, name="input_tp"), 547 "measured_thresh": self._constrain( 548 stats["input_thresh"], -99, 0, name="input_thresh" 549 ), 550 "linear": "false" if self.media_file.ffmpeg_normalize.dynamic else "true", 551 "print_format": "json", 552 } 553 554 if self.media_file.ffmpeg_normalize.dual_mono: 555 opts["dual_mono"] = "true" 556 557 return "loudnorm=" + dict_to_filter_opts(opts) 558 559 def get_second_pass_opts_peakrms(self) -> str: 560 """ 561 Set the adjustment gain based on chosen option and mean/max volume, 562 return the matching ffmpeg volume filter. 563 564 Returns: 565 str: ffmpeg volume filter string 566 """ 567 if ( 568 self.loudness_statistics["max"] is None 569 or self.loudness_statistics["mean"] is None 570 ): 571 raise FFmpegNormalizeError( 572 "First pass not run, no mean/max volume to normalize to" 573 ) 574 575 normalization_type = self.media_file.ffmpeg_normalize.normalization_type 576 target_level = self.media_file.ffmpeg_normalize.target_level 577 578 if normalization_type == "peak": 579 adjustment = 0 + target_level - self.loudness_statistics["max"] 580 elif normalization_type == "rms": 581 adjustment = target_level - self.loudness_statistics["mean"] 582 else: 583 raise FFmpegNormalizeError( 584 "Can only set adjustment for peak and RMS normalization" 585 ) 586 587 _logger.info( 588 f"Adjusting stream {self.stream_id} by {adjustment} dB to reach {target_level}" 589 ) 590 591 clip_amount = self.loudness_statistics["max"] + adjustment 592 if clip_amount > 0: 593 _logger.warning(f"Adjusting will lead to clipping of {clip_amount} dB") 594 595 return f"volume={adjustment}dB"
95 def __init__( 96 self, 97 ffmpeg_normalize: FFmpegNormalize, 98 media_file: MediaFile, 99 stream_id: int, 100 sample_rate: int | None, 101 bit_depth: int | None, 102 duration: float | None, 103 ): 104 """ 105 Create an AudioStream object. 106 107 Args: 108 ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object. 109 media_file (MediaFile): The MediaFile object. 110 stream_id (int): The stream ID. 111 sample_rate (int): sample rate in Hz 112 bit_depth (int): bit depth in bits 113 duration (float): duration in seconds 114 """ 115 super().__init__(ffmpeg_normalize, media_file, "audio", stream_id) 116 117 self.loudness_statistics: LoudnessStatistics = { 118 "ebu_pass1": None, 119 "ebu_pass2": None, 120 "mean": None, 121 "max": None, 122 } 123 124 self.sample_rate = sample_rate 125 self.bit_depth = bit_depth 126 127 self.duration = duration
Create an AudioStream object.
Arguments:
- ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
- media_file (MediaFile): The MediaFile object.
- stream_id (int): The stream ID.
- sample_rate (int): sample rate in Hz
- bit_depth (int): bit depth in bits
- duration (float): duration in seconds
157 def get_stats(self) -> LoudnessStatisticsWithMetadata: 158 """ 159 Return loudness statistics for the stream. 160 161 Returns: 162 dict: A dictionary containing the loudness statistics. 163 """ 164 stats: LoudnessStatisticsWithMetadata = { 165 "input_file": self.media_file.input_file, 166 "output_file": self.media_file.output_file, 167 "stream_id": self.stream_id, 168 "ebu_pass1": self.loudness_statistics["ebu_pass1"], 169 "ebu_pass2": self.loudness_statistics["ebu_pass2"], 170 "mean": self.loudness_statistics["mean"], 171 "max": self.loudness_statistics["max"], 172 } 173 return stats
Return loudness statistics for the stream.
Returns:
dict: A dictionary containing the loudness statistics.
175 def set_second_pass_stats(self, stats: EbuLoudnessStatistics) -> None: 176 """ 177 Set the EBU loudness statistics for the second pass. 178 179 Args: 180 stats (dict): The EBU loudness statistics. 181 """ 182 _logger.debug( 183 f"Setting second pass stats for stream {self.stream_id} from {stats}" 184 ) 185 self.loudness_statistics["ebu_pass2"] = stats
Set the EBU loudness statistics for the second pass.
Arguments:
- stats (dict): The EBU loudness statistics.
187 def get_pcm_codec(self) -> str: 188 """ 189 Get the PCM codec string for the stream. 190 191 Returns: 192 str: The PCM codec string. 193 """ 194 if not self.bit_depth: 195 return "pcm_s16le" 196 elif self.bit_depth <= 8: 197 return "pcm_s8" 198 elif self.bit_depth in [16, 24, 32, 64]: 199 return f"pcm_s{self.bit_depth}le" 200 else: 201 _logger.warning( 202 f"Unsupported bit depth {self.bit_depth}, falling back to pcm_s16le" 203 ) 204 return "pcm_s16le"
Get the PCM codec string for the stream.
Returns:
str: The PCM codec string.
225 def parse_astats(self) -> Iterator[float]: 226 """ 227 Use ffmpeg with astats filter to get the mean (RMS) and max (peak) volume of the input file. 228 229 Yields: 230 float: The progress of the command. 231 """ 232 _logger.info(f"Running first pass astats filter for stream {self.stream_id}") 233 234 filter_str = self._get_filter_str_with_pre_filter( 235 "astats=measure_overall=Peak_level+RMS_level:measure_perchannel=0" 236 ) 237 238 cmd = [ 239 self.media_file.ffmpeg_normalize.ffmpeg_exe, 240 "-hide_banner", 241 "-y", 242 "-i", 243 self.media_file.input_file, 244 "-filter_complex", 245 filter_str, 246 "-vn", 247 "-sn", 248 "-f", 249 "null", 250 os.devnull, 251 ] 252 253 cmd_runner = CommandRunner() 254 yield from cmd_runner.run_ffmpeg_command(cmd) 255 output = cmd_runner.get_output() 256 257 _logger.debug( 258 f"astats command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}" 259 ) 260 261 mean_volume_matches = re.findall(r"RMS level dB: ([\-\d\.]+)", output) 262 if mean_volume_matches: 263 if mean_volume_matches[0] == "-": 264 self.loudness_statistics["mean"] = float("-inf") 265 else: 266 self.loudness_statistics["mean"] = float(mean_volume_matches[0]) 267 else: 268 raise FFmpegNormalizeError( 269 f"Could not get mean volume for {self.media_file.input_file}" 270 ) 271 272 max_volume_matches = re.findall(r"Peak level dB: ([\-\d\.]+)", output) 273 if max_volume_matches: 274 if max_volume_matches[0] == "-": 275 self.loudness_statistics["max"] = float("-inf") 276 else: 277 self.loudness_statistics["max"] = float(max_volume_matches[0]) 278 else: 279 raise FFmpegNormalizeError( 280 f"Could not get max volume for {self.media_file.input_file}" 281 )
Use ffmpeg with astats filter to get the mean (RMS) and max (peak) volume of the input file.
Yields:
float: The progress of the command.
283 def parse_loudnorm_stats(self) -> Iterator[float]: 284 """ 285 Run a first pass loudnorm filter to get measured data. 286 287 Yields: 288 float: The progress of the command. 289 """ 290 _logger.info(f"Running first pass loudnorm filter for stream {self.stream_id}") 291 292 opts = { 293 "i": self.media_file.ffmpeg_normalize.target_level, 294 "lra": self.media_file.ffmpeg_normalize.loudness_range_target, 295 "tp": self.media_file.ffmpeg_normalize.true_peak, 296 "offset": self.media_file.ffmpeg_normalize.offset, 297 "print_format": "json", 298 } 299 300 if self.media_file.ffmpeg_normalize.dual_mono: 301 opts["dual_mono"] = "true" 302 303 filter_str = self._get_filter_str_with_pre_filter( 304 "loudnorm=" + dict_to_filter_opts(opts) 305 ) 306 307 cmd = [ 308 self.media_file.ffmpeg_normalize.ffmpeg_exe, 309 "-hide_banner", 310 "-y", 311 "-i", 312 self.media_file.input_file, 313 "-map", 314 f"0:{self.stream_id}", 315 "-filter_complex", 316 filter_str, 317 "-vn", 318 "-sn", 319 "-f", 320 "null", 321 os.devnull, 322 ] 323 324 cmd_runner = CommandRunner() 325 yield from cmd_runner.run_ffmpeg_command(cmd) 326 output = cmd_runner.get_output() 327 328 _logger.debug( 329 f"Loudnorm first pass command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}" 330 ) 331 332 # only one stream 333 self.loudness_statistics["ebu_pass1"] = next( 334 iter(AudioStream.prune_and_parse_loudnorm_output(output).values()) 335 )
Run a first pass loudnorm filter to get measured data.
Yields:
float: The progress of the command.
337 @staticmethod 338 def prune_and_parse_loudnorm_output( 339 output: str, 340 ) -> dict[int, EbuLoudnessStatistics]: 341 """ 342 Prune ffmpeg progress lines from output and parse the loudnorm filter output. 343 There may be multiple outputs if multiple streams were processed. 344 345 Args: 346 output (str): The output from ffmpeg. 347 348 Returns: 349 dict[int, EbuLoudnessStatistics]: The EBU loudness statistics. 350 """ 351 _logger.debug("Parsing loudnorm stats from output") 352 pruned_output = CommandRunner.prune_ffmpeg_progress_from_output(output) 353 output_lines = [line.strip() for line in pruned_output.split("\n")] 354 return AudioStream._parse_loudnorm_output(output_lines)
Prune ffmpeg progress lines from output and parse the loudnorm filter output. There may be multiple outputs if multiple streams were processed.
Arguments:
- output (str): The output from ffmpeg.
Returns:
dict[int, EbuLoudnessStatistics]: The EBU loudness statistics.
427 def get_second_pass_opts_ebu(self) -> str: 428 """ 429 Return second pass loudnorm filter options string for ffmpeg 430 """ 431 432 # In dynamic mode, we can do everything in one pass, and we do not have first pass stats 433 if self.media_file.ffmpeg_normalize.dynamic: 434 if not self.ffmpeg_normalize.sample_rate: 435 _logger.warning( 436 "In dynamic mode, the sample rate will automatically be set to 192 kHz by the loudnorm filter. " 437 "Specify -ar/--sample-rate to override it." 438 ) 439 440 opts = { 441 "i": self.media_file.ffmpeg_normalize.target_level, 442 "lra": self.media_file.ffmpeg_normalize.loudness_range_target, 443 "tp": self.media_file.ffmpeg_normalize.true_peak, 444 "offset": self.media_file.ffmpeg_normalize.offset, 445 "linear": "false", 446 "print_format": "json", 447 } 448 449 if self.media_file.ffmpeg_normalize.dual_mono: 450 opts["dual_mono"] = "true" 451 452 return "loudnorm=" + dict_to_filter_opts(opts) 453 454 if not self.loudness_statistics["ebu_pass1"]: 455 raise FFmpegNormalizeError( 456 "First pass not run, you must call parse_loudnorm_stats first" 457 ) 458 459 if float(self.loudness_statistics["ebu_pass1"]["input_i"]) > 0: 460 _logger.warning( 461 "Input file had measured input loudness greater than zero " 462 f"({self.loudness_statistics['ebu_pass1']['input_i']}), capping at 0" 463 ) 464 self.loudness_statistics["ebu_pass1"]["input_i"] = 0 465 466 will_use_dynamic_mode = self.media_file.ffmpeg_normalize.dynamic 467 468 if self.media_file.ffmpeg_normalize.keep_loudness_range_target: 469 _logger.debug( 470 "Keeping target loudness range in second pass loudnorm filter" 471 ) 472 input_lra = self.loudness_statistics["ebu_pass1"]["input_lra"] 473 if input_lra < 1 or input_lra > 50: 474 _logger.warning( 475 "Input file had measured loudness range outside of [1,50] " 476 f"({input_lra}), capping to allowed range" 477 ) 478 479 self.media_file.ffmpeg_normalize.loudness_range_target = self._constrain( 480 self.loudness_statistics["ebu_pass1"]["input_lra"], 1, 50 481 ) 482 483 if self.media_file.ffmpeg_normalize.keep_lra_above_loudness_range_target: 484 if ( 485 self.loudness_statistics["ebu_pass1"]["input_lra"] 486 <= self.media_file.ffmpeg_normalize.loudness_range_target 487 ): 488 _logger.debug( 489 "Setting loudness range target in second pass loudnorm filter" 490 ) 491 else: 492 self.media_file.ffmpeg_normalize.loudness_range_target = ( 493 self.loudness_statistics["ebu_pass1"]["input_lra"] 494 ) 495 _logger.debug( 496 "Keeping target loudness range in second pass loudnorm filter" 497 ) 498 499 if ( 500 self.media_file.ffmpeg_normalize.loudness_range_target 501 < self.loudness_statistics["ebu_pass1"]["input_lra"] 502 and not will_use_dynamic_mode 503 ): 504 _logger.warning( 505 f"Input file had loudness range of {self.loudness_statistics['ebu_pass1']['input_lra']}. " 506 f"This is larger than the loudness range target ({self.media_file.ffmpeg_normalize.loudness_range_target}). " 507 "Normalization will revert to dynamic mode. Choose a higher target loudness range if you want linear normalization. " 508 "Alternatively, use the --keep-loudness-range-target or --keep-lra-above-loudness-range-target option to keep the target loudness range from " 509 "the input." 510 ) 511 will_use_dynamic_mode = True 512 513 if will_use_dynamic_mode and not self.ffmpeg_normalize.sample_rate: 514 _logger.warning( 515 "In dynamic mode, the sample rate will automatically be set to 192 kHz by the loudnorm filter. " 516 "Specify -ar/--sample-rate to override it." 517 ) 518 519 target_level = self.ffmpeg_normalize.target_level 520 if self.ffmpeg_normalize.auto_lower_loudness_target: 521 safe_target = ( 522 self.loudness_statistics["ebu_pass1"]["input_i"] 523 - self.loudness_statistics["ebu_pass1"]["input_tp"] 524 + self.ffmpeg_normalize.true_peak 525 - 0.1 526 ) 527 if safe_target < self.ffmpeg_normalize.target_level: 528 target_level = safe_target 529 _logger.warning( 530 f"Using loudness target {target_level} because --auto-lower-loudness-target given.", 531 ) 532 533 stats = self.loudness_statistics["ebu_pass1"] 534 535 opts = { 536 "i": target_level, 537 "lra": self.media_file.ffmpeg_normalize.loudness_range_target, 538 "tp": self.media_file.ffmpeg_normalize.true_peak, 539 "offset": self._constrain( 540 stats["target_offset"], -99, 99, name="target_offset" 541 ), 542 "measured_i": self._constrain(stats["input_i"], -99, 0, name="input_i"), 543 "measured_lra": self._constrain( 544 stats["input_lra"], 0, 99, name="input_lra" 545 ), 546 "measured_tp": self._constrain(stats["input_tp"], -99, 99, name="input_tp"), 547 "measured_thresh": self._constrain( 548 stats["input_thresh"], -99, 0, name="input_thresh" 549 ), 550 "linear": "false" if self.media_file.ffmpeg_normalize.dynamic else "true", 551 "print_format": "json", 552 } 553 554 if self.media_file.ffmpeg_normalize.dual_mono: 555 opts["dual_mono"] = "true" 556 557 return "loudnorm=" + dict_to_filter_opts(opts)
Return second pass loudnorm filter options string for ffmpeg
559 def get_second_pass_opts_peakrms(self) -> str: 560 """ 561 Set the adjustment gain based on chosen option and mean/max volume, 562 return the matching ffmpeg volume filter. 563 564 Returns: 565 str: ffmpeg volume filter string 566 """ 567 if ( 568 self.loudness_statistics["max"] is None 569 or self.loudness_statistics["mean"] is None 570 ): 571 raise FFmpegNormalizeError( 572 "First pass not run, no mean/max volume to normalize to" 573 ) 574 575 normalization_type = self.media_file.ffmpeg_normalize.normalization_type 576 target_level = self.media_file.ffmpeg_normalize.target_level 577 578 if normalization_type == "peak": 579 adjustment = 0 + target_level - self.loudness_statistics["max"] 580 elif normalization_type == "rms": 581 adjustment = target_level - self.loudness_statistics["mean"] 582 else: 583 raise FFmpegNormalizeError( 584 "Can only set adjustment for peak and RMS normalization" 585 ) 586 587 _logger.info( 588 f"Adjusting stream {self.stream_id} by {adjustment} dB to reach {target_level}" 589 ) 590 591 clip_amount = self.loudness_statistics["max"] + adjustment 592 if clip_amount > 0: 593 _logger.warning(f"Adjusting will lead to clipping of {clip_amount} dB") 594 595 return f"volume={adjustment}dB"
Set the adjustment gain based on chosen option and mean/max volume, return the matching ffmpeg volume filter.
Returns:
str: ffmpeg volume filter string
80class VideoStream(MediaStream): 81 def __init__( 82 self, ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_id: int 83 ): 84 super().__init__(ffmpeg_normalize, media_file, "video", stream_id)
81 def __init__( 82 self, ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_id: int 83 ): 84 super().__init__(ffmpeg_normalize, media_file, "video", stream_id)
Create a MediaStream object.
Arguments:
- ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
- media_file (MediaFile): The MediaFile object.
- stream_type (Literal["audio", "video", "subtitle"]): The type of the stream.
- stream_id (int): The stream ID.
87class SubtitleStream(MediaStream): 88 def __init__( 89 self, ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_id: int 90 ): 91 super().__init__(ffmpeg_normalize, media_file, "subtitle", stream_id)
88 def __init__( 89 self, ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_id: int 90 ): 91 super().__init__(ffmpeg_normalize, media_file, "subtitle", stream_id)
Create a MediaStream object.
Arguments:
- ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
- media_file (MediaFile): The MediaFile object.
- stream_type (Literal["audio", "video", "subtitle"]): The type of the stream.
- stream_id (int): The stream ID.
48class MediaStream: 49 def __init__( 50 self, 51 ffmpeg_normalize: FFmpegNormalize, 52 media_file: MediaFile, 53 stream_type: Literal["audio", "video", "subtitle"], 54 stream_id: int, 55 ): 56 """ 57 Create a MediaStream object. 58 59 Args: 60 ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object. 61 media_file (MediaFile): The MediaFile object. 62 stream_type (Literal["audio", "video", "subtitle"]): The type of the stream. 63 stream_id (int): The stream ID. 64 """ 65 self.ffmpeg_normalize = ffmpeg_normalize 66 self.media_file = media_file 67 self.stream_type = stream_type 68 self.stream_id = stream_id 69 _logger.debug( 70 f"Created MediaStream for {self.media_file.input_file}, {self.stream_type} stream {self.stream_id}" 71 ) 72 73 def __repr__(self) -> str: 74 return ( 75 f"<{os.path.basename(self.media_file.input_file)}, " 76 f"{self.stream_type} stream {self.stream_id}>" 77 )
49 def __init__( 50 self, 51 ffmpeg_normalize: FFmpegNormalize, 52 media_file: MediaFile, 53 stream_type: Literal["audio", "video", "subtitle"], 54 stream_id: int, 55 ): 56 """ 57 Create a MediaStream object. 58 59 Args: 60 ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object. 61 media_file (MediaFile): The MediaFile object. 62 stream_type (Literal["audio", "video", "subtitle"]): The type of the stream. 63 stream_id (int): The stream ID. 64 """ 65 self.ffmpeg_normalize = ffmpeg_normalize 66 self.media_file = media_file 67 self.stream_type = stream_type 68 self.stream_id = stream_id 69 _logger.debug( 70 f"Created MediaStream for {self.media_file.input_file}, {self.stream_type} stream {self.stream_id}" 71 )
Create a MediaStream object.
Arguments:
- ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
- media_file (MediaFile): The MediaFile object.
- stream_type (Literal["audio", "video", "subtitle"]): The type of the stream.
- stream_id (int): The stream ID.