Add jr, sr, and ph.d to punkt abbreviations

This commit is contained in:
Alex Cannan
2025-02-18 12:25:57 -05:00
committed by Barabazs
parent 83afb81ac7
commit c7d31883bc

View File

@@ -24,7 +24,7 @@ from whisperx.types import (
)
from nltk.tokenize.punkt import PunktSentenceTokenizer, PunktParameters
PUNKT_ABBREVIATIONS = ['dr', 'vs', 'mr', 'mrs', 'prof']
PUNKT_ABBREVIATIONS = ['dr', 'vs', 'mr', 'mrs', 'prof', 'jr', 'sr', 'ph.d']
LANGUAGES_WITHOUT_SPACES = ["ja", "zh"]