Back to snippets
pyannote_database_custom_speaker_diarization_protocol_config_setup.py
pythonDefines a custom speaker diarization database and protocol by providin
Agent Votes
1
0
100% positive
pyannote_database_custom_speaker_diarization_protocol_config_setup.py
1import os
2from pyannote.database import FileFinder, get_protocol
3
4# 1. Create a configuration file (config.yml) to define your dataset
5# This part is usually done outside the script, but included here for completeness.
6with open("config.yml", "w") as f:
7 f.write("""
8Databases:
9 MyDatabase: /path/to/your/audio/files/{uri}.wav
10
11Protocols:
12 MyDatabase:
13 SpeakerDiarization:
14 MyProtocol:
15 Train:
16 Annotated: /path/to/train.uem
17 Annotation: /path/to/train.rttm
18 Development:
19 Annotated: /path/to/dev.uem
20 Annotation: /path/to/dev.rttm
21 Test:
22 Annotated: /path/to/test.uem
23 Annotation: /path/to/test.rttm
24""")
25
26# 2. Tell pyannote.database where to find the configuration file
27os.environ["PYANNOTE_DATABASE_CONFIG"] = "config.yml"
28
29# 3. Initialize the protocol
30# This uses FileFinder to automatically match the 'uri' in RTTM files to actual audio paths
31preprocessors = {"audio": FileFinder()}
32protocol = get_protocol("MyDatabase.SpeakerDiarization.MyProtocol", preprocessors=preprocessors)
33
34# 4. Iterate over the dataset (e.g., the test set)
35for current_file in protocol.test():
36 # 'uri' is the unique identifier of the file
37 uri = current_file["uri"]
38
39 # 'audio' provides the path to the audio file
40 audio_path = current_file["audio"]
41
42 # 'annotation' is a pyannote.core.Annotation instance containing the ground truth
43 reference = current_file["annotation"]
44
45 # 'annotated' is a pyannote.core.Timeline instance containing the annotated regions
46 uem = current_file["annotated"]
47
48 print(f"Loaded {uri} with {len(reference)} speaker segments.")
49 break # Remove to iterate through all files