AudiooPy 0.5

https://sourceforge.net/projects/audioopy/

Module audioopy.ipus

Class ChannelSilences

Description

Silence search on a channel of an audio file.

Silences are stored in a list of (frompos,topos) values, indicating the frame from which each silence is starting and ending. The rms - root-mean-square, is estimated in windows of 10 ms by default. The silence versus sounding intervals are stamped depending on a rms threshold value. Consecutive silences then sounding intervals are then grouped and compared to given minimum durations allowing to get tracks.

Constructor

Create a ChannelSilence instance.

The duration of a window (winlen) is relevant for the estimation of the rms values. The maximum value of vagueness is winlen.

Parameters
  • channel: (Channel) Input channel object
  • win_len: (float) Duration of a window for the estimation of the volume values
  • vagueness: (float) Windows length to estimate the silence boundaries
View Source
def __init__(self, channel: Channel, win_len: float=0.01, vagueness: float=0.005):
    """Create a ChannelSilence instance.

    The duration of a window (win_len) is relevant for the estimation of the rms values.
    The maximum value of vagueness is win_len.

    :param channel: (Channel) Input channel object
    :param win_len: (float) Duration of a window for the estimation of the volume values
    :param vagueness: (float) Windows length to estimate the silence boundaries

    """
    self._channel = None
    self._win_len = 0.01
    self.set_win_length(win_len)
    self._vagueness = min(float(vagueness), self._win_len)
    self.__volume_stats = None
    self.__silences = list()
    if channel is not None:
        self.set_channel(channel)

Public functions

get_win_length

Return the window length used to estimate the RMS.

View Source
def get_win_length(self) -> float:
    """Return the window length used to estimate the RMS."""
    return self._win_len

set_win_length

Set a new length of window and perform estimation of volume values.

It cancels any previous estimation of volume and silence search.

Parameters
  • w: (float) between 0.001 and 0.05.
Raises
  • ValueError: if w is not a float
View Source
def set_win_length(self, w: float) -> None:
    """Set a new length of window and perform estimation of volume values.

        It cancels any previous estimation of volume and silence search.

        :param w: (float) between 0.001 and 0.05.
        :raises: ValueError: if w is not a float

        """
    win_len = float(w)
    if ChannelSilences.MIN_WIN_LEN <= win_len <= ChannelSilences.MAX_WIN_LEN:
        self._win_len = win_len
    elif win_len > ChannelSilences.MAX_WIN_LEN:
        self._win_len = ChannelSilences.MAX_WIN_LEN
    else:
        self._win_len = ChannelSilences.MIN_WIN_LEN
    if self._channel is not None:
        self.set_channel(self._channel)

get_vagueness

Return the vagueness value.

View Source
def get_vagueness(self) -> float:
    """Return the vagueness value."""
    return self._vagueness

set_vagueness

Fix the windows length to estimate the boundaries.

Parameters
  • vagueness: (float) Maximum value of vagueness is win_len.
View Source
def set_vagueness(self, vagueness: float) -> None:
    """Fix the windows length to estimate the boundaries.

        :param vagueness: (float) Maximum value of vagueness is win_len.

        """
    vagueness = float(vagueness)
    if vagueness > 0.0:
        self._vagueness = min(vagueness, self._win_len)
    else:
        self._vagueness = 0.0

get_channel

Return the channel.

View Source
def get_channel(self) -> Channel:
    """Return the channel."""
    return self._channel

set_channel

Set a channel, then reset all previous results.

Parameters
  • channel: (Channel) The channel to be used to search for silences
Raises
  • TypeError: Given parameter is not a Channel
View Source
def set_channel(self, channel: Channel) -> None:
    """Set a channel, then reset all previous results.

        :param channel: (Channel) The channel to be used to search for silences
        :raises: TypeError: Given parameter is not a Channel

        """
    if isinstance(channel, Channel) is False:
        raise TypeError('Expected a Channel, got {:s} instead.'.format(str(type(channel))))
    self._channel = channel
    self.__volume_stats = ChannelVolume(channel, self._win_len)
    self.__silences = list()

get_volstats

Return the RMS values estimated on the channel.

View Source
def get_volstats(self) -> ChannelVolume | None:
    """Return the RMS values estimated on the channel."""
    return self.__volume_stats

set_silences

Fix manually silences; to be use carefully.

Assign manually the list of tuples (start, end) of each silence.

Parameters
  • silences: (list of tuples (startpos, endpos))
Raises
  • TypeError: Invalid given parameter
View Source
def set_silences(self, silences: list) -> None:
    """Fix manually silences; to be use carefully.

        Assign manually the list of tuples (start, end) of each silence.

        :param silences: (list of tuples (start_pos, end_pos))
        :raises: TypeError: Invalid given parameter

        """
    if isinstance(silences, (list, tuple)) is False:
        raise TypeError('Expected a list, got {:s} instead'.format(str(type(silences))))
    for v in silences:
        if isinstance(v, (list, tuple)) is False:
            raise TypeError('Expected a list or tuple, got {:s} instead'.format(v))
        try:
            if len(v) != 2:
                raise ValueError
            int(v[0])
            int(v[1])
        except ValueError:
            raise TypeError('Expected a list of 2 int values, got {} instead'.format(v))
    self.__silences = silences

reset_silences

Reset silences to an empty list.

View Source
def reset_silences(self) -> None:
    """Reset silences to an empty list."""
    self.__silences = list()

refine

Improve the precision of the given position of a silence.

Parameters
  • pos: (int) Initial position of the silence
  • threshold: (int) rms threshold value for a silence
  • win_length: (float) Windows duration to estimate the rms
  • direction: (int)
Returns
  • (int) updated position
View Source
def refine(self, pos: int, threshold: int, win_length: float=0.005, direction: int=1):
    """Improve the precision of the given position of a silence.

        :param pos: (int) Initial position of the silence
        :param threshold: (int) rms threshold value for a silence
        :param win_length: (float) Windows duration to estimate the rms
        :param direction: (int)
        :return: (int) updated position

        """
    delta = int(self.__volume_stats.get_winlen() * self._channel.get_framerate())
    from_pos = max(pos - delta, 0)
    self._channel.seek(from_pos)
    frames = self._channel.get_frames(delta * 2)
    c = Channel(self._channel.get_framerate(), self._channel.get_sampwidth(), frames)
    vol_stats = ChannelVolume(c, win_length)
    if direction == 1:
        for i, v in enumerate(vol_stats):
            if v > threshold:
                return from_pos + i * int(win_length * self._channel.get_framerate())
    if direction == -1:
        i = len(vol_stats)
        for v in reversed(vol_stats):
            if v > threshold:
                return from_pos + i * int(win_length * self._channel.get_framerate())
            i -= 1
    return pos

extract_tracks

Return the tracks, deduced from the silences and track constrains.

Parameters
  • mintrackdur: (float) The minimum duration for a track
  • shiftdurstart: (float) The time to remove to the start bound
  • shiftdurend: (float) The time to add to the end boundary
Returns
  • list of tuples(frompos,topos)

Duration is in seconds.

View Source
def extract_tracks(self, min_track_dur: float, shift_dur_start: float, shift_dur_end: float):
    """Return the tracks, deduced from the silences and track constrains.

        :param min_track_dur: (float) The minimum duration for a track
        :param shift_dur_start: (float) The time to remove to the start bound
        :param shift_dur_end: (float) The time to add to the end boundary
        :return: list of tuples (from_pos,to_pos)

        Duration is in seconds.

        """
    if self._channel is None:
        return []
    tracks = list()
    if len(self.__silences) == 0:
        tracks.append((0, self._channel.get_nframes()))
        return tracks
    delta = int(min_track_dur * self._channel.get_framerate())
    shift_start = int(shift_dur_start * self._channel.get_framerate())
    shift_end = int(shift_dur_end * self._channel.get_framerate())
    from_pos = 0
    for to_pos, next_from in self.__silences:
        if to_pos - from_pos >= delta:
            shift_from_pos = max(from_pos - shift_start, 0)
            shift_to_pos = min(to_pos + shift_end, self._channel.get_nframes())
            tracks.append((int(shift_from_pos), int(shift_to_pos)))
        from_pos = next_from
    to_pos = self._channel.get_nframes()
    if to_pos - from_pos >= delta:
        tracks.append((int(from_pos), int(to_pos)))
    return tracks

fix_threshold_vol

Fix automatically the threshold for optimizing tracks/silences search.

This is an observation of the distribution of rms values.

Returns
  • (int) volume value
View Source
def fix_threshold_vol(self) -> int:
    """Fix automatically the threshold for optimizing tracks/silences search.

        This is an observation of the distribution of rms values.

        :return: (int) volume value

        """
    vmin = max(self.__volume_stats.min(), 0)
    vmean = self.__volume_stats.mean()
    vmedian = self.__volume_stats.median()
    vvar = self.__volume_stats.coefvariation()
    if vmedian > vmean:
        logging.warning(' ... Due to un-expected outlier values, the automatic threshold estimation requires the rms distribution to be normalized.')
        vol_stats = ChannelVolume(self._channel, self._win_len)
        vol_stats.normalize_volumes()
        vmean = vol_stats.mean()
        vmedian = vol_stats.median()
        vvar = vol_stats.coefvariation()
        volumes = sorted(vol_stats.volumes())
    else:
        volumes = sorted(self.__volume_stats.volumes())
    logging.info('- rms min={:.2f}'.format(vmin))
    logging.info('- rms mean={:.2f}'.format(vmean))
    logging.info('- rms median={:2f}'.format(vmedian))
    logging.info('- rms coef. var={:2f}'.format(vvar))
    vcvar = 1.5 * vvar
    if vmedian > vmean:
        median_index = 0.55 * len(volumes)
        threshold = int(volumes[int(median_index)])
        logging.info(' ... Un-expected audio quality. Threshold with estimator exception 1 - median > mean: {:d}'.format(threshold))
    elif vcvar > vmean:
        if vmedian < vmean * 0.2:
            threshold = int(vmin) + int(vmean - vmedian)
            logging.info(' ... Un-expected audio quality. Threshold with estimator exception 2 - median < 0.2*mean: {:d}'.format(threshold))
        else:
            threshold = int(vmin) + int(0.2 * float(vmean))
            logging.info(' ... Un-expected audio quality. Threshold with estimator exception 3 - vcvar > mean: {:d}'.format(threshold))
    else:
        threshold = int(vmin) + int(vmean - vcvar)
        logging.info('Audio of expected quality. Threshold uses the normal estimator: {:d}'.format(threshold))
    return threshold

search_silences

Search windows with a volume lesser than a given threshold.

This is then a search for silences. All windows with a volume higher than the threshold are considered as tracks and not included in the result. Block of silences lesser than minsildur are also considered tracks. If threshold is set to 0, a value is automatically assigned.

Parameters
  • threshold: (int) Expected minimum volume (rms value).
Returns
  • (int) The actual threshold value
View Source
def search_silences(self, threshold: int=0) -> int:
    """Search windows with a volume lesser than a given threshold.

        This is then a search for silences. All windows with a volume
        higher than the threshold are considered as tracks and not included
        in the result. Block of silences lesser than min_sil_dur are
        also considered tracks.
        If threshold is set to 0, a value is automatically assigned.

        :param threshold: (int) Expected minimum volume (rms value).
        :return: (int) The actual threshold value

        """
    if self._channel is None:
        return 0
    if threshold == 0:
        threshold = self.fix_threshold_vol()
    self.__silences = list()
    inside = False
    idx_begin = 0
    nframes = self.__volume_stats.get_winlen() * self._channel.get_framerate()
    i = 0
    for v in self.__volume_stats:
        if v < threshold:
            if inside is False:
                idx_begin = i
                inside = True
        elif inside is True:
            from_pos = int(idx_begin * nframes)
            to_pos = int((i - 1) * nframes)
            self.__silences.append((from_pos, to_pos))
            inside = False
        i += 1
    if inside is True:
        start_pos = int(idx_begin * self.__volume_stats.get_winlen() * self._channel.get_framerate())
        end_pos = self._channel.get_nframes()
        self.__silences.append((start_pos, end_pos))
    self.__filter_silences(2.0 * self._win_len)
    return threshold

filter_silences

Filter the current silences.

Parameters
  • threshold: (int) Expected minimum volume (rms value)
  • minsildur: (float) Minimum silence duration in seconds
Returns
  • (int) Number of silences with the expected minimum duration
View Source
def filter_silences(self, threshold: int, min_sil_dur: float=0.2) -> int:
    """Filter the current silences.

        :param threshold: (int) Expected minimum volume (rms value)
        :param min_sil_dur: (float) Minimum silence duration in seconds
        :return: (int) Number of silences with the expected minimum duration

        """
    if len(self.__silences) == 0:
        return 0
    if threshold == 0:
        threshold = self.fix_threshold_vol()
    adjusted = list()
    for from_pos, to_pos in self.__silences:
        adjusted_from = self.__adjust_bound(from_pos, threshold, direction=-1)
        adjusted_to = self.__adjust_bound(to_pos, threshold, direction=1)
        adjusted.append((adjusted_from, adjusted_to))
    self.__silences = adjusted
    self.__filter_silences(min_sil_dur)
    return len(self.__silences)

filter_silences_from_tracks

Filter the given silences to remove very small tracks.

Parameters
  • mintrackdur: (float) Minimum duration of a track
View Source
def filter_silences_from_tracks(self, min_track_dur: float=0.6) -> None:
    """Filter the given silences to remove very small tracks.

        :param min_track_dur: (float) Minimum duration of a track

        """
    if len(self.__silences) < 3:
        return
    tracks = self.extract_tracks(min_track_dur, 0.0, 0.0)
    keep_tracks = list()
    for from_track, to_track in tracks:
        delta = float(to_track - from_track) / float(self._channel.get_framerate())
        if delta > min_track_dur:
            keep_tracks.append((from_track, to_track))
    filtered_sil = list()
    if self.__silences[0][0] < keep_tracks[0][0]:
        filtered_sil.append((self.__silences[0][0], self.__silences[0][1]))
    prev_track_end = -1
    for from_track, to_track in keep_tracks:
        if prev_track_end > -1:
            filtered_sil.append((int(prev_track_end), int(from_track)))
        prev_track_end = to_track
    to_pos = self._channel.get_nframes()
    to_track = tracks[-1][1]
    if to_pos - to_track > 0:
        filtered_sil.append((int(to_track), int(to_pos)))
    self.__silences = filtered_sil

Protected functions

__filter_silences

Filter the given silences.

Parameters
  • minsildur: (float) Minimum silence duration in seconds
View Source
def __filter_silences(self, min_sil_dur: float=0.2) -> None:
    """Filter the given silences.

        :param min_sil_dur: (float) Minimum silence duration in seconds

        """
    filtered_sil = list()
    for start_pos, end_pos in self.__silences:
        sil_dur = float(end_pos - start_pos) / float(self._channel.get_framerate())
        if sil_dur > min_sil_dur:
            filtered_sil.append((start_pos, end_pos))
    self.__silences = filtered_sil

__adjust_bound

Adjust the position of a silence around a given position.

Here "around" the position means in a range of 18 windows, i.e. 6 before + 12 after the given position.

Parameters
  • pos: (int) Initial position of the silence
  • threshold: (int) RMS threshold value for a silence
  • direction: (int)
Returns
  • (int) estimated position
View Source
def __adjust_bound(self, pos: int, threshold: int, direction: int=0) -> int:
    """Adjust the position of a silence around a given position.

        Here "around" the position means in a range of 18 windows,
        i.e. 6 before + 12 after the given position.

        :param pos: (int) Initial position of the silence
        :param threshold: (int) RMS threshold value for a silence
        :param direction: (int)
        :return: (int) estimated position

        """
    if self._vagueness == self._win_len:
        return pos
    if direction not in (-1, 1):
        return pos
    delta = int(1.5 * self.__volume_stats.get_winlen() * self._channel.get_framerate())
    start_pos = int(max(pos - delta, 0))
    self._channel.seek(start_pos)
    frames = self._channel.get_frames(int(delta * 3))
    c = Channel(self._channel.get_framerate(), self._channel.get_sampwidth(), frames)
    vol_stats = ChannelVolume(c, self._vagueness)
    new_pos = pos
    if direction == 1:
        for idx, v in enumerate(vol_stats):
            shift = idx * int(self._vagueness * self._channel.get_framerate())
            if v > threshold:
                new_pos = start_pos + int(shift)
                break
    elif direction == -1:
        idx = len(vol_stats)
        for v in reversed(vol_stats):
            if v >= threshold:
                shift = idx * int(self._vagueness * self._channel.get_framerate())
                new_pos = start_pos + int(shift)
                break
            idx -= 1
    return new_pos

Overloads

__len__

View Source
def __len__(self):
    return len(self.__silences)

__iter__

View Source
def __iter__(self):
    for x in self.__silences:
        yield x

__getitem__

View Source
def __getitem__(self, i):
    return self.__silences[i]