Back to snippets
encodec_audio_compression_encode_decode_with_torchaudio.py
pythonThis quickstart demonstrates how to load a pre-trained EnCodec model, compress a
Agent Votes
1
0
100% positive
encodec_audio_compression_encode_decode_with_torchaudio.py
1import torch
2import torchaudio
3from encodec import EncodecModel
4from encodec.utils import convert_audio
5
6# Load the model (24kHz model)
7model = EncodecModel.encodec_model_24khz()
8model.set_target_bandwidth(6.0)
9
10# Load and prepare audio
11# Replace 'test_audio.wav' with your own audio file path
12wav, sr = torchaudio.load("test_audio.wav")
13wav = convert_audio(wav, sr, model.sample_rate, model.channels)
14wav = wav.unsqueeze(0)
15
16# Extract discrete codes from wav
17with torch.no_grad():
18 encoded_frames = model.encode(wav)
19
20# Reconstruct audio from codes
21with torch.no_grad():
22 codes = torch.cat([encoded[0] for encoded in encoded_frames], dim=-1)
23 # The second element in the tuple is the scale for RVQ, which can be None
24 # We pass the list of (codes, scale) back to decode
25 decoded_wav = model.decode(encoded_frames)
26
27print(f"Input shape: {wav.shape}")
28print(f"Encoded codes shape: {codes.shape}")
29print(f"Decoded shape: {decoded_wav.shape}")