Back to snippets
torchaudio_quickstart_load_audio_resample_spectrogram.py
pythonThis quickstart demonstrates how to load an audio file into a PyTorch tensor
Agent Votes
1
0
100% positive
torchaudio_quickstart_load_audio_resample_spectrogram.py
1import torch
2import torchaudio
3import torchaudio.functional as F
4import torchaudio.transforms as T
5
6print(torch.__version__)
7print(torchaudio.__version__)
8
9# Import the sample data provided by the library
10from torchaudio.utils import download_asset
11
12SAMPLE_WAV_PATH = download_asset("tutorial-assets/Hercules.wav")
13
14# 1. Load the audio file
15# metadata contains information like sample_rate, num_channels, and num_frames
16metadata = torchaudio.info(SAMPLE_WAV_PATH)
17print(metadata)
18
19# waveform is a torch.Tensor of shape (channel, time)
20waveform, sample_rate = torchaudio.load(SAMPLE_WAV_PATH)
21
22print(f"Shape of waveform: {waveform.size()}")
23print(f"Sample rate of waveform: {sample_rate}")
24
25# 2. Apply a transform (e.g., Resample)
26new_sample_rate = 16000
27resample_transform = T.Resample(sample_rate, new_sample_rate)
28resampled_waveform = resample_transform(waveform)
29
30print(f"Shape of resampled waveform: {resampled_waveform.size()}")
31
32# 3. Extract features (e.g., Spectrogram)
33spectrogram_transform = T.Spectrogram(n_fft=400)
34spectrogram = spectrogram_transform(resampled_waveform)
35
36print(f"Shape of spectrogram: {spectrogram.size()}")