Module audioopy.ipus

Class ChannelSilences

Description

Silence search on a channel of an audio file.

Silences are stored in a list of (frompos,topos) values, indicating the frame from which each silence is starting and ending. The rms - root-mean-square, is estimated in windows of 10 ms by default. The silence versus sounding intervals are stamped depending on a rms threshold value. Consecutive silences then sounding intervals are then grouped and compared to given minimum durations allowing to get tracks.

Constructor

Create a ChannelSilence instance.

The duration of a window (winlen) is relevant for the estimation of the rms values. The maximum value of vagueness is winlen.

Parameters

channel: (Channel) Input channel object
win_len: (float) Duration of a window for the estimation of the volume values
vagueness: (float) Windows length to estimate the silence boundaries

View Source

def __init__(self, channel: Channel, win_len: float=0.01, vagueness: float=0.005):
    """Create a ChannelSilence instance.

    The duration of a window (win_len) is relevant for the estimation of the rms values.
    The maximum value of vagueness is win_len.

    :param channel: (Channel) Input channel object
    :param win_len: (float) Duration of a window for the estimation of the volume values
    :param vagueness: (float) Windows length to estimate the silence boundaries

    """
    self._channel = None
    self._win_len = 0.01
    self.set_win_length(win_len)
    self._vagueness = min(float(vagueness), self._win_len)
    self.__volume_stats = None
    self.__silences = list()
    if channel is not None:
        self.set_channel(channel)

Public functions

get_win_length

Return the window length used to estimate the RMS.

View Source

def get_win_length(self) -> float:
    """Return the window length used to estimate the RMS."""
    return self._win_len

set_win_length

Set a new length of window and perform estimation of volume values.

It cancels any previous estimation of volume and silence search.

Parameters

w: (float) between 0.001 and 0.05.

Raises

ValueError: if w is not a float

View Source

def set_win_length(self, w: float) -> None:
    """Set a new length of window and perform estimation of volume values.

        It cancels any previous estimation of volume and silence search.

        :param w: (float) between 0.001 and 0.05.
        :raises: ValueError: if w is not a float

        """
    win_len = float(w)
    if ChannelSilences.MIN_WIN_LEN <= win_len <= ChannelSilences.MAX_WIN_LEN:
        self._win_len = win_len
    elif win_len > ChannelSilences.MAX_WIN_LEN:
        self._win_len = ChannelSilences.MAX_WIN_LEN
    else:
        self._win_len = ChannelSilences.MIN_WIN_LEN
    if self._channel is not None:
        self.set_channel(self._channel)

get_vagueness

Return the vagueness value.

View Source

def get_vagueness(self) -> float:
    """Return the vagueness value."""
    return self._vagueness

set_vagueness

Fix the windows length to estimate the boundaries.

Parameters

vagueness: (float) Maximum value of vagueness is win_len.

View Source

def set_vagueness(self, vagueness: float) -> None:
    """Fix the windows length to estimate the boundaries.

        :param vagueness: (float) Maximum value of vagueness is win_len.

        """
    vagueness = float(vagueness)
    if vagueness > 0.0:
        self._vagueness = min(vagueness, self._win_len)
    else:
        self._vagueness = 0.0

get_channel

Return the channel.

View Source

def get_channel(self) -> Channel:
    """Return the channel."""
    return self._channel

set_channel

Set a channel, then reset all previous results.

Parameters

channel: (Channel) The channel to be used to search for silences

Raises

TypeError: Given parameter is not a Channel

View Source

def set_channel(self, channel: Channel) -> None:
    """Set a channel, then reset all previous results.

        :param channel: (Channel) The channel to be used to search for silences
        :raises: TypeError: Given parameter is not a Channel

        """
    if isinstance(channel, Channel) is False:
        raise TypeError('Expected a Channel, got {:s} instead.'.format(str(type(channel))))
    self._channel = channel
    self.__volume_stats = ChannelVolume(channel, self._win_len)
    self.__silences = list()

get_volstats

Return the RMS values estimated on the channel.

View Source

def get_volstats(self) -> ChannelVolume | None:
    """Return the RMS values estimated on the channel."""
    return self.__volume_stats

set_silences

Fix manually silences; to be use carefully.

Assign manually the list of tuples (start, end) of each silence.

Parameters

silences: (list of tuples (startpos, endpos))

Raises

TypeError: Invalid given parameter

View Source

def set_silences(self, silences: list) -> None:
    """Fix manually silences; to be use carefully.

        Assign manually the list of tuples (start, end) of each silence.

        :param silences: (list of tuples (start_pos, end_pos))
        :raises: TypeError: Invalid given parameter

        """
    if isinstance(silences, (list, tuple)) is False:
        raise TypeError('Expected a list, got {:s} instead'.format(str(type(silences))))
    for v in silences:
        if isinstance(v, (list, tuple)) is False:
            raise TypeError('Expected a list or tuple, got {:s} instead'.format(v))
        try:
            if len(v) != 2:
                raise ValueError
            int(v[0])
            int(v[1])
        except ValueError:
            raise TypeError('Expected a list of 2 int values, got {} instead'.format(v))
    self.__silences = silences

reset_silences

Reset silences to an empty list.

View Source

def reset_silences(self) -> None:
    """Reset silences to an empty list."""
    self.__silences = list()

refine

Improve the precision of the given position of a silence.

Parameters

pos: (int) Initial position of the silence
threshold: (int) rms threshold value for a silence
win_length: (float) Windows duration to estimate the rms
direction: (int)

Returns

(int) updated position

View Source

def refine(self, pos: int, threshold: int, win_length: float=0.005, direction: int=1):
    """Improve the precision of the given position of a silence.

        :param pos: (int) Initial position of the silence
        :param threshold: (int) rms threshold value for a silence
        :param win_length: (float) Windows duration to estimate the rms
        :param direction: (int)
        :return: (int) updated position

        """
    delta = int(self.__volume_stats.get_winlen() * self._channel.get_framerate())
    from_pos = max(pos - delta, 0)
    self._channel.seek(from_pos)
    frames = self._channel.get_frames(delta * 2)
    c = Channel(self._channel.get_framerate(), self._channel.get_sampwidth(), frames)
    vol_stats = ChannelVolume(c, win_length)
    if direction == 1:
        for i, v in enumerate(vol_stats):
            if v > threshold:
                return from_pos + i * int(win_length * self._channel.get_framerate())
    if direction == -1:
        i = len(vol_stats)
        for v in reversed(vol_stats):
            if v > threshold:
                return from_pos + i * int(win_length * self._channel.get_framerate())
            i -= 1
    return pos

extract_tracks

Return the tracks, deduced from the silences and track constrains.

Parameters

mintrackdur: (float) The minimum duration for a track
shiftdurstart: (float) The time to remove to the start bound
shiftdurend: (float) The time to add to the end boundary

Returns

list of tuples(frompos,topos)

Duration is in seconds.

View Source

def extract_tracks(self, min_track_dur: float, shift_dur_start: float, shift_dur_end: float):
    """Return the tracks, deduced from the silences and track constrains.

        :param min_track_dur: (float) The minimum duration for a track
        :param shift_dur_start: (float) The time to remove to the start bound
        :param shift_dur_end: (float) The time to add to the end boundary
        :return: list of tuples (from_pos,to_pos)

        Duration is in seconds.

        """
    if self._channel is None:
        return []
    tracks = list()
    if len(self.__silences) == 0:
        tracks.append((0, self._channel.get_nframes()))
        return tracks
    delta = int(min_track_dur * self._channel.get_framerate())
    shift_start = int(shift_dur_start * self._channel.get_framerate())
    shift_end = int(shift_dur_end * self._channel.get_framerate())
    from_pos = 0
    for to_pos, next_from in self.__silences:
        if to_pos - from_pos >= delta:
            shift_from_pos = max(from_pos - shift_start, 0)
            shift_to_pos = min(to_pos + shift_end, self._channel.get_nframes())
            tracks.append((int(shift_from_pos), int(shift_to_pos)))
        from_pos = next_from
    to_pos = self._channel.get_nframes()
    if to_pos - from_pos >= delta:
        tracks.append((int(from_pos), int(to_pos)))
    return tracks

fix_threshold_vol

Fix automatically the threshold for optimizing tracks/silences search.

This is an observation of the distribution of rms values.

Returns

(int) volume value

View Source

def fix_threshold_vol(self) -> int:
    """Fix automatically the threshold for optimizing tracks/silences search.

        This is an observation of the distribution of rms values.

        :return: (int) volume value

        """
    vmin = max(self.__volume_stats.min(), 0)
    vmean = self.__volume_stats.mean()
    vmedian = self.__volume_stats.median()
    vvar = self.__volume_stats.coefvariation()
    if vmedian > vmean:
        logging.warning(' ... Due to un-expected outlier values, the automatic threshold estimation requires the rms distribution to be normalized.')
        vol_stats = ChannelVolume(self._channel, self._win_len)
        vol_stats.normalize_volumes()
        vmean = vol_stats.mean()
        vmedian = vol_stats.median()
        vvar = vol_stats.coefvariation()
        volumes = sorted(vol_stats.volumes())
    else:
        volumes = sorted(self.__volume_stats.volumes())
    logging.info('- rms min={:.2f}'.format(vmin))
    logging.info('- rms mean={:.2f}'.format(vmean))
    logging.info('- rms median={:2f}'.format(vmedian))
    logging.info('- rms coef. var={:2f}'.format(vvar))
    vcvar = 1.5 * vvar
    if vmedian > vmean:
        median_index = 0.55 * len(volumes)
        threshold = int(volumes[int(median_index)])
        logging.info(' ... Un-expected audio quality. Threshold with estimator exception 1 - median > mean: {:d}'.format(threshold))
    elif vcvar > vmean:
        if vmedian < vmean * 0.2:
            threshold = int(vmin) + int(vmean - vmedian)
            logging.info(' ... Un-expected audio quality. Threshold with estimator exception 2 - median < 0.2*mean: {:d}'.format(threshold))
        else:
            threshold = int(vmin) + int(0.2 * float(vmean))
            logging.info(' ... Un-expected audio quality. Threshold with estimator exception 3 - vcvar > mean: {:d}'.format(threshold))
    else:
        threshold = int(vmin) + int(vmean - vcvar)
        logging.info('Audio of expected quality. Threshold uses the normal estimator: {:d}'.format(threshold))
    return threshold

search_silences

Search windows with a volume lesser than a given threshold.

This is then a search for silences. All windows with a volume higher than the threshold are considered as tracks and not included in the result. Block of silences lesser than minsildur are also considered tracks. If threshold is set to 0, a value is automatically assigned.

Parameters

threshold: (int) Expected minimum volume (rms value).

Returns

(int) The actual threshold value

View Source

def search_silences(self, threshold: int=0) -> int:
    """Search windows with a volume lesser than a given threshold.

        This is then a search for silences. All windows with a volume
        higher than the threshold are considered as tracks and not included
        in the result. Block of silences lesser than min_sil_dur are
        also considered tracks.
        If threshold is set to 0, a value is automatically assigned.

        :param threshold: (int) Expected minimum volume (rms value).
        :return: (int) The actual threshold value

        """
    if self._channel is None:
        return 0
    if threshold == 0:
        threshold = self.fix_threshold_vol()
    self.__silences = list()
    inside = False
    idx_begin = 0
    nframes = self.__volume_stats.get_winlen() * self._channel.get_framerate()
    i = 0
    for v in self.__volume_stats:
        if v < threshold:
            if inside is False:
                idx_begin = i
                inside = True
        elif inside is True:
            from_pos = int(idx_begin * nframes)
            to_pos = int((i - 1) * nframes)
            self.__silences.append((from_pos, to_pos))
            inside = False
        i += 1
    if inside is True:
        start_pos = int(idx_begin * self.__volume_stats.get_winlen() * self._channel.get_framerate())
        end_pos = self._channel.get_nframes()
        self.__silences.append((start_pos, end_pos))
    self.__filter_silences(2.0 * self._win_len)
    return threshold

filter_silences

Filter the current silences.

Parameters

threshold: (int) Expected minimum volume (rms value)
minsildur: (float) Minimum silence duration in seconds

Returns

(int) Number of silences with the expected minimum duration

View Source

def filter_silences(self, threshold: int, min_sil_dur: float=0.2) -> int:
    """Filter the current silences.

        :param threshold: (int) Expected minimum volume (rms value)
        :param min_sil_dur: (float) Minimum silence duration in seconds
        :return: (int) Number of silences with the expected minimum duration

        """
    if len(self.__silences) == 0:
        return 0
    if threshold == 0:
        threshold = self.fix_threshold_vol()
    adjusted = list()
    for from_pos, to_pos in self.__silences:
        adjusted_from = self.__adjust_bound(from_pos, threshold, direction=-1)
        adjusted_to = self.__adjust_bound(to_pos, threshold, direction=1)
        adjusted.append((adjusted_from, adjusted_to))
    self.__silences = adjusted
    self.__filter_silences(min_sil_dur)
    return len(self.__silences)

filter_silences_from_tracks

Filter the given silences to remove very small tracks.

Parameters

mintrackdur: (float) Minimum duration of a track

View Source

def filter_silences_from_tracks(self, min_track_dur: float=0.6) -> None:
    """Filter the given silences to remove very small tracks.

        :param min_track_dur: (float) Minimum duration of a track

        """
    if len(self.__silences) < 3:
        return
    tracks = self.extract_tracks(min_track_dur, 0.0, 0.0)
    keep_tracks = list()
    for from_track, to_track in tracks:
        delta = float(to_track - from_track) / float(self._channel.get_framerate())
        if delta > min_track_dur:
            keep_tracks.append((from_track, to_track))
    filtered_sil = list()
    if self.__silences[0][0] < keep_tracks[0][0]:
        filtered_sil.append((self.__silences[0][0], self.__silences[0][1]))
    prev_track_end = -1
    for from_track, to_track in keep_tracks:
        if prev_track_end > -1:
            filtered_sil.append((int(prev_track_end), int(from_track)))
        prev_track_end = to_track
    to_pos = self._channel.get_nframes()
    to_track = tracks[-1][1]
    if to_pos - to_track > 0:
        filtered_sil.append((int(to_track), int(to_pos)))
    self.__silences = filtered_sil

Protected functions

__filter_silences

Filter the given silences.

Parameters

minsildur: (float) Minimum silence duration in seconds

View Source

def __filter_silences(self, min_sil_dur: float=0.2) -> None:
    """Filter the given silences.

        :param min_sil_dur: (float) Minimum silence duration in seconds

        """
    filtered_sil = list()
    for start_pos, end_pos in self.__silences:
        sil_dur = float(end_pos - start_pos) / float(self._channel.get_framerate())
        if sil_dur > min_sil_dur:
            filtered_sil.append((start_pos, end_pos))
    self.__silences = filtered_sil

__adjust_bound

Adjust the position of a silence around a given position.

Here "around" the position means in a range of 18 windows, i.e. 6 before + 12 after the given position.

Parameters

pos: (int) Initial position of the silence
threshold: (int) RMS threshold value for a silence
direction: (int)

Returns

(int) estimated position

View Source

def __adjust_bound(self, pos: int, threshold: int, direction: int=0) -> int:
    """Adjust the position of a silence around a given position.

        Here "around" the position means in a range of 18 windows,
        i.e. 6 before + 12 after the given position.

        :param pos: (int) Initial position of the silence
        :param threshold: (int) RMS threshold value for a silence
        :param direction: (int)
        :return: (int) estimated position

        """
    if self._vagueness == self._win_len:
        return pos
    if direction not in (-1, 1):
        return pos
    delta = int(1.5 * self.__volume_stats.get_winlen() * self._channel.get_framerate())
    start_pos = int(max(pos - delta, 0))
    self._channel.seek(start_pos)
    frames = self._channel.get_frames(int(delta * 3))
    c = Channel(self._channel.get_framerate(), self._channel.get_sampwidth(), frames)
    vol_stats = ChannelVolume(c, self._vagueness)
    new_pos = pos
    if direction == 1:
        for idx, v in enumerate(vol_stats):
            shift = idx * int(self._vagueness * self._channel.get_framerate())
            if v > threshold:
                new_pos = start_pos + int(shift)
                break
    elif direction == -1:
        idx = len(vol_stats)
        for v in reversed(vol_stats):
            if v >= threshold:
                shift = idx * int(self._vagueness * self._channel.get_framerate())
                new_pos = start_pos + int(shift)
                break
            idx -= 1
    return new_pos

Overloads

len

View Source

def __len__(self):
    return len(self.__silences)

iter

View Source

def __iter__(self):
    for x in self.__silences:
        yield x

getitem

View Source

def __getitem__(self, i):
    return self.__silences[i]