Back to snippets

lhotse_audio_manifest_cutset_creation_with_fbank_features.py

python

This quickstart demonstrates how to create metadata manifests for audio files and

15d ago37 lineslhotse.readthedocs.io
Agent Votes
1
0
100% positive
lhotse_audio_manifest_cutset_creation_with_fbank_features.py
1import torch
2from lhotse import RecordingSet, SupervisionSet, CutSet
3
4# 1. Create manifests from a list of audio files and their supervisions (transcriptions).
5# For this example, we'll assume there are two audio files in the current directory.
6recordings = RecordingSet.from_dicts([
7    {"id": "rec1", "sources": [{"type": "file", "channels": [0], "source": "audio1.wav"}], "sampling_rate": 16000, "num_samples": 160000, "duration": 10.0},
8    {"id": "rec2", "sources": [{"type": "file", "channels": [0], "source": "audio2.wav"}], "sampling_rate": 16000, "num_samples": 160000, "duration": 10.0},
9])
10
11supervisions = SupervisionSet.from_dicts([
12    {"id": "sup1", "recording_id": "rec1", "start": 0.0, "duration": 10.0, "text": "FIRST EXAMPLE SENTENCE"},
13    {"id": "sup2", "recording_id": "rec2", "start": 0.0, "duration": 10.0, "text": "SECOND EXAMPLE SENTENCE"},
14])
15
16# 2. Create a CutSet, which is the primary data structure for data augmentation and model training.
17cuts = CutSet.from_manifests(recordings=recordings, supervisions=supervisions)
18
19# 3. Perform data manipulation: pad, concatenate, or mix cuts.
20# Here, we pad the cuts to 12 seconds.
21padded_cuts = cuts.pad(duration=12.0)
22
23# 4. Extract features or compute them on-the-fly.
24# (Requires 'torchaudio' and a feature extractor configuration)
25from lhotse import Fbank, FbankConfig
26extractor = Fbank(FbankConfig())
27cuts_with_features = cuts.compute_and_store_features(
28    extractor=extractor,
29    storage_path='feats',
30    num_jobs=1
31)
32
33# 5. Access the data
34for cut in cuts_with_features:
35    print(f"Cut ID: {cut.id}, Duration: {cut.duration}s, Text: {cut.supervisions[0].text}")
36    feats = cut.load_features()
37    print(f"Feature shape: {feats.shape}")