Refactor segment extraction for improved compatibility with pyannote versions

This commit is contained in:
2025-11-30 21:50:50 +01:00
parent acf733fb3d
commit 4c1dd4f15e

35
main.py
View File

@@ -117,12 +117,41 @@ with Progress(TextColumn("Diarization fut…"), BarColumn(), TimeElapsedColumn()
speaker_segments = []
speakers = set()
for segment, _, speaker in diarization.itertracks(yield_label=True):
def extract_segments(diar_result):
# pyannote 3.x returns Annotation with itertracks; 4.x returns DiarizeOutput
if hasattr(diar_result, "itertracks"):
for segment, _, speaker in diar_result.itertracks(yield_label=True):
yield speaker, float(segment.start), float(segment.end)
return
annotation = getattr(diar_result, "annotation", None)
if annotation is None and isinstance(diar_result, dict):
annotation = diar_result.get("annotation")
if annotation is not None and hasattr(annotation, "itertracks"):
for segment, _, speaker in annotation.itertracks(yield_label=True):
yield speaker, float(segment.start), float(segment.end)
return
tracks = getattr(diar_result, "tracks", None)
if tracks is None and isinstance(diar_result, dict):
tracks = diar_result.get("tracks")
if tracks:
for item in tracks:
speaker = item.get("speaker") or item.get("label") or "UNKNOWN"
start = float(item.get("start", 0.0))
end = float(item.get("end", start))
yield speaker, start, end
return
console.print(f"❌ Ismeretlen diarization output: {type(diar_result)}")
sys.exit(1)
for speaker, start, end in extract_segments(diarization):
speakers.add(speaker)
speaker_segments.append({
"speaker": speaker,
"start": float(segment.start),
"end": float(segment.end)
"start": start,
"end": end
})
with open("diar.json","w",encoding="utf-8") as f: