Public functions
get_win_length
Return the window length used to estimate the RMS.
View Source
def get_win_length(self) -> float:
"""Return the window length used to estimate the RMS."""
return self._win_len
set_win_length
Set a new length of window and perform estimation of volume values.
It cancels any previous estimation of volume and silence search.
Parameters
- w: (float) between 0.001 and 0.05.
Raises
- ValueError: if w is not a float
View Source
def set_win_length(self, w: float) -> None:
"""Set a new length of window and perform estimation of volume values.
It cancels any previous estimation of volume and silence search.
:param w: (float) between 0.001 and 0.05.
:raises: ValueError: if w is not a float
"""
win_len = float(w)
if ChannelSilences.MIN_WIN_LEN <= win_len <= ChannelSilences.MAX_WIN_LEN:
self._win_len = win_len
elif win_len > ChannelSilences.MAX_WIN_LEN:
self._win_len = ChannelSilences.MAX_WIN_LEN
else:
self._win_len = ChannelSilences.MIN_WIN_LEN
if self._channel is not None:
self.set_channel(self._channel)
get_vagueness
Return the vagueness value.
View Source
def get_vagueness(self) -> float:
"""Return the vagueness value."""
return self._vagueness
set_vagueness
Fix the windows length to estimate the boundaries.
Parameters
- vagueness: (float) Maximum value of vagueness is win_len.
View Source
def set_vagueness(self, vagueness: float) -> None:
"""Fix the windows length to estimate the boundaries.
:param vagueness: (float) Maximum value of vagueness is win_len.
"""
vagueness = float(vagueness)
if vagueness > 0.0:
self._vagueness = min(vagueness, self._win_len)
else:
self._vagueness = 0.0
get_channel
Return the channel.
View Source
def get_channel(self) -> Channel:
"""Return the channel."""
return self._channel
set_channel
Set a channel, then reset all previous results.
Parameters
- channel: (Channel) The channel to be used to search for silences
Raises
- TypeError: Given parameter is not a Channel
View Source
def set_channel(self, channel: Channel) -> None:
"""Set a channel, then reset all previous results.
:param channel: (Channel) The channel to be used to search for silences
:raises: TypeError: Given parameter is not a Channel
"""
if isinstance(channel, Channel) is False:
raise TypeError('Expected a Channel, got {:s} instead.'.format(str(type(channel))))
self._channel = channel
self.__volume_stats = ChannelVolume(channel, self._win_len)
self.__silences = list()
get_volstats
Return the RMS values estimated on the channel.
View Source
def get_volstats(self) -> ChannelVolume | None:
"""Return the RMS values estimated on the channel."""
return self.__volume_stats
set_silences
Fix manually silences; to be use carefully.
Assign manually the list of tuples (start, end) of each silence.
Parameters
- silences: (list of tuples (startpos, endpos))
Raises
- TypeError: Invalid given parameter
View Source
def set_silences(self, silences: list) -> None:
"""Fix manually silences; to be use carefully.
Assign manually the list of tuples (start, end) of each silence.
:param silences: (list of tuples (start_pos, end_pos))
:raises: TypeError: Invalid given parameter
"""
if isinstance(silences, (list, tuple)) is False:
raise TypeError('Expected a list, got {:s} instead'.format(str(type(silences))))
for v in silences:
if isinstance(v, (list, tuple)) is False:
raise TypeError('Expected a list or tuple, got {:s} instead'.format(v))
try:
if len(v) != 2:
raise ValueError
int(v[0])
int(v[1])
except ValueError:
raise TypeError('Expected a list of 2 int values, got {} instead'.format(v))
self.__silences = silences
reset_silences
Reset silences to an empty list.
View Source
def reset_silences(self) -> None:
"""Reset silences to an empty list."""
self.__silences = list()
refine
Improve the precision of the given position of a silence.
Parameters
- pos: (int) Initial position of the silence
- threshold: (int) rms threshold value for a silence
- win_length: (float) Windows duration to estimate the rms
- direction: (int)
Returns
View Source
def refine(self, pos: int, threshold: int, win_length: float=0.005, direction: int=1):
"""Improve the precision of the given position of a silence.
:param pos: (int) Initial position of the silence
:param threshold: (int) rms threshold value for a silence
:param win_length: (float) Windows duration to estimate the rms
:param direction: (int)
:return: (int) updated position
"""
delta = int(self.__volume_stats.get_winlen() * self._channel.get_framerate())
from_pos = max(pos - delta, 0)
self._channel.seek(from_pos)
frames = self._channel.get_frames(delta * 2)
c = Channel(self._channel.get_framerate(), self._channel.get_sampwidth(), frames)
vol_stats = ChannelVolume(c, win_length)
if direction == 1:
for i, v in enumerate(vol_stats):
if v > threshold:
return from_pos + i * int(win_length * self._channel.get_framerate())
if direction == -1:
i = len(vol_stats)
for v in reversed(vol_stats):
if v > threshold:
return from_pos + i * int(win_length * self._channel.get_framerate())
i -= 1
return pos
extract_tracks
Return the tracks, deduced from the silences and track constrains.
Parameters
- mintrackdur: (float) The minimum duration for a track
- shiftdurstart: (float) The time to remove to the start bound
- shiftdurend: (float) The time to add to the end boundary
Returns
- list of tuples(frompos,topos)
Duration is in seconds.
View Source
def extract_tracks(self, min_track_dur: float, shift_dur_start: float, shift_dur_end: float):
"""Return the tracks, deduced from the silences and track constrains.
:param min_track_dur: (float) The minimum duration for a track
:param shift_dur_start: (float) The time to remove to the start bound
:param shift_dur_end: (float) The time to add to the end boundary
:return: list of tuples (from_pos,to_pos)
Duration is in seconds.
"""
if self._channel is None:
return []
tracks = list()
if len(self.__silences) == 0:
tracks.append((0, self._channel.get_nframes()))
return tracks
delta = int(min_track_dur * self._channel.get_framerate())
shift_start = int(shift_dur_start * self._channel.get_framerate())
shift_end = int(shift_dur_end * self._channel.get_framerate())
from_pos = 0
for to_pos, next_from in self.__silences:
if to_pos - from_pos >= delta:
shift_from_pos = max(from_pos - shift_start, 0)
shift_to_pos = min(to_pos + shift_end, self._channel.get_nframes())
tracks.append((int(shift_from_pos), int(shift_to_pos)))
from_pos = next_from
to_pos = self._channel.get_nframes()
if to_pos - from_pos >= delta:
tracks.append((int(from_pos), int(to_pos)))
return tracks
fix_threshold_vol
Fix automatically the threshold for optimizing tracks/silences search.
This is an observation of the distribution of rms values.
Returns
View Source
def fix_threshold_vol(self) -> int:
"""Fix automatically the threshold for optimizing tracks/silences search.
This is an observation of the distribution of rms values.
:return: (int) volume value
"""
vmin = max(self.__volume_stats.min(), 0)
vmean = self.__volume_stats.mean()
vmedian = self.__volume_stats.median()
vvar = self.__volume_stats.coefvariation()
if vmedian > vmean:
logging.warning(' ... Due to un-expected outlier values, the automatic threshold estimation requires the rms distribution to be normalized.')
vol_stats = ChannelVolume(self._channel, self._win_len)
vol_stats.normalize_volumes()
vmean = vol_stats.mean()
vmedian = vol_stats.median()
vvar = vol_stats.coefvariation()
volumes = sorted(vol_stats.volumes())
else:
volumes = sorted(self.__volume_stats.volumes())
logging.info('- rms min={:.2f}'.format(vmin))
logging.info('- rms mean={:.2f}'.format(vmean))
logging.info('- rms median={:2f}'.format(vmedian))
logging.info('- rms coef. var={:2f}'.format(vvar))
vcvar = 1.5 * vvar
if vmedian > vmean:
median_index = 0.55 * len(volumes)
threshold = int(volumes[int(median_index)])
logging.info(' ... Un-expected audio quality. Threshold with estimator exception 1 - median > mean: {:d}'.format(threshold))
elif vcvar > vmean:
if vmedian < vmean * 0.2:
threshold = int(vmin) + int(vmean - vmedian)
logging.info(' ... Un-expected audio quality. Threshold with estimator exception 2 - median < 0.2*mean: {:d}'.format(threshold))
else:
threshold = int(vmin) + int(0.2 * float(vmean))
logging.info(' ... Un-expected audio quality. Threshold with estimator exception 3 - vcvar > mean: {:d}'.format(threshold))
else:
threshold = int(vmin) + int(vmean - vcvar)
logging.info('Audio of expected quality. Threshold uses the normal estimator: {:d}'.format(threshold))
return threshold
search_silences
Search windows with a volume lesser than a given threshold.
This is then a search for silences. All windows with a volume
higher than the threshold are considered as tracks and not included
in the result. Block of silences lesser than minsildur are
also considered tracks.
If threshold is set to 0, a value is automatically assigned.
Parameters
- threshold: (int) Expected minimum volume (rms value).
Returns
- (int) The actual threshold value
View Source
def search_silences(self, threshold: int=0) -> int:
"""Search windows with a volume lesser than a given threshold.
This is then a search for silences. All windows with a volume
higher than the threshold are considered as tracks and not included
in the result. Block of silences lesser than min_sil_dur are
also considered tracks.
If threshold is set to 0, a value is automatically assigned.
:param threshold: (int) Expected minimum volume (rms value).
:return: (int) The actual threshold value
"""
if self._channel is None:
return 0
if threshold == 0:
threshold = self.fix_threshold_vol()
self.__silences = list()
inside = False
idx_begin = 0
nframes = self.__volume_stats.get_winlen() * self._channel.get_framerate()
i = 0
for v in self.__volume_stats:
if v < threshold:
if inside is False:
idx_begin = i
inside = True
elif inside is True:
from_pos = int(idx_begin * nframes)
to_pos = int((i - 1) * nframes)
self.__silences.append((from_pos, to_pos))
inside = False
i += 1
if inside is True:
start_pos = int(idx_begin * self.__volume_stats.get_winlen() * self._channel.get_framerate())
end_pos = self._channel.get_nframes()
self.__silences.append((start_pos, end_pos))
self.__filter_silences(2.0 * self._win_len)
return threshold
filter_silences
Filter the current silences.
Parameters
- threshold: (int) Expected minimum volume (rms value)
- minsildur: (float) Minimum silence duration in seconds
Returns
- (int) Number of silences with the expected minimum duration
View Source
def filter_silences(self, threshold: int, min_sil_dur: float=0.2) -> int:
"""Filter the current silences.
:param threshold: (int) Expected minimum volume (rms value)
:param min_sil_dur: (float) Minimum silence duration in seconds
:return: (int) Number of silences with the expected minimum duration
"""
if len(self.__silences) == 0:
return 0
if threshold == 0:
threshold = self.fix_threshold_vol()
adjusted = list()
for from_pos, to_pos in self.__silences:
adjusted_from = self.__adjust_bound(from_pos, threshold, direction=-1)
adjusted_to = self.__adjust_bound(to_pos, threshold, direction=1)
adjusted.append((adjusted_from, adjusted_to))
self.__silences = adjusted
self.__filter_silences(min_sil_dur)
return len(self.__silences)
filter_silences_from_tracks
Filter the given silences to remove very small tracks.
Parameters
- mintrackdur: (float) Minimum duration of a track
View Source
def filter_silences_from_tracks(self, min_track_dur: float=0.6) -> None:
"""Filter the given silences to remove very small tracks.
:param min_track_dur: (float) Minimum duration of a track
"""
if len(self.__silences) < 3:
return
tracks = self.extract_tracks(min_track_dur, 0.0, 0.0)
keep_tracks = list()
for from_track, to_track in tracks:
delta = float(to_track - from_track) / float(self._channel.get_framerate())
if delta > min_track_dur:
keep_tracks.append((from_track, to_track))
filtered_sil = list()
if self.__silences[0][0] < keep_tracks[0][0]:
filtered_sil.append((self.__silences[0][0], self.__silences[0][1]))
prev_track_end = -1
for from_track, to_track in keep_tracks:
if prev_track_end > -1:
filtered_sil.append((int(prev_track_end), int(from_track)))
prev_track_end = to_track
to_pos = self._channel.get_nframes()
to_track = tracks[-1][1]
if to_pos - to_track > 0:
filtered_sil.append((int(to_track), int(to_pos)))
self.__silences = filtered_sil