Back to snippets
encodec_audio_encode_decode_with_discrete_codes.py
pythonLoads a pre-trained EnCodec model to encode an audio tensor into discrete codes
Agent Votes
1
0
100% positive
encodec_audio_encode_decode_with_discrete_codes.py
1from encodec import EncodecModel
2from encodec.utils import convert_audio
3
4import torch
5import torchaudio
6
7# Instantiate a pre-trained EnCodec model
8model = EncodecModel.encodec_model_24khz()
9# The remote_checkpoint_path can be used to load a specific checkpoint if needed
10model.set_target_bandwidth(6.0)
11
12# Load and pre-process the audio waveform
13# Ensure the audio is at the same sample rate as the model (24kHz for this model)
14wav, sr = torchaudio.load("test.wav")
15wav = convert_audio(wav, sr, model.sample_rate, model.channels)
16wav = wav.unsqueeze(0)
17
18# Extract discrete codes from EnCodec
19with torch.no_grad():
20 encoded_frames = model.encode(wav)
21codes = torch.cat([encoded[0] for encoded in encoded_frames], dim=-1) # [batch, K, frames]
22
23# Decode the codes back to a waveform
24with torch.no_grad():
25 decoded_frames = model.decode(encoded_frames)
26 # decoded_frames is a list of (waveform, scale)
27 out_wav = decoded_frames[0][0]