From b1c8ac7de62c5969343352f2d10a31ebe5a107fd Mon Sep 17 00:00:00 2001 From: Nguyen Binh Date: Thu, 11 Apr 2024 16:01:20 +0200 Subject: [PATCH] Change alignment model for Vietnamese language Since the current model is a wav2vec2 pre-trained model for Vietnamese audio, it won't work with alignment tasks. To make it work as expected, I recommend chaining to a fine-tuned ASR version. --- whisperx/alignment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whisperx/alignment.py b/whisperx/alignment.py index ae9f997..765cb82 100644 --- a/whisperx/alignment.py +++ b/whisperx/alignment.py @@ -52,7 +52,7 @@ DEFAULT_ALIGN_MODELS_HF = { "tr": "mpoyraz/wav2vec2-xls-r-300m-cv7-turkish", "da": "saattrupdan/wav2vec2-xls-r-300m-ftspeech", "he": "imvladikon/wav2vec2-xls-r-300m-hebrew", - "vi": 'nguyenvulebinh/wav2vec2-base-vi', + "vi": 'nguyenvulebinh/wav2vec2-base-vi-vlsp2020', "ko": "kresnik/wav2vec2-large-xlsr-korean", "ur": "kingabzpro/wav2vec2-large-xls-r-300m-Urdu", "te": "anuragshas/wav2vec2-large-xlsr-53-telugu",