From 5b0d04a2405d58d484d19ad2856613c60d124dfb Mon Sep 17 00:00:00 2001 From: vladislav Date: Tue, 15 Jul 2025 17:30:37 +0300 Subject: [PATCH 1/2] fix: model load on request and keep in mem rename: model_name to model --- app.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/app.py b/app.py index cb47a23..b902c31 100644 --- a/app.py +++ b/app.py @@ -18,7 +18,6 @@ logger = logging.getLogger(__name__) app = FastAPI() - # API key header api_key_header = APIKeyHeader(name="x-api-key") @@ -108,7 +107,7 @@ def get_audio_duration(file_path: str) -> float: async def transcribe_audio( file: UploadFile = File(...), token: str = Depends(api_key_header), - model_name: str = "turbo", + model: str = "turbo", verbose: Optional[bool] = None, temperature: Union[float, Tuple[float, ...]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0), compression_ratio_threshold: Optional[float] = 2.4, @@ -127,7 +126,9 @@ async def transcribe_audio( logger.warning(f"Invalid token attempt: {token}") raise HTTPException(status_code=403, detail="Forbidden") - logger.info(f"Processing file: {file.filename} with model: {model_name}") + model = whisper.load_model(model) # Load the Whisper model + + logger.info(f"Processing file: {file.filename} with model: {model}") metrics = TranscriptionMetrics() # Save uploaded file From 9eb026b220595aa7423fff2c97d44b580b218329 Mon Sep 17 00:00:00 2001 From: red Date: Sun, 17 Aug 2025 23:24:24 +0900 Subject: [PATCH 2/2] - deleted metrics --- app.py | 38 ++++---------------------------------- 1 file changed, 4 insertions(+), 34 deletions(-) diff --git a/app.py b/app.py index b902c31..f5cf989 100644 --- a/app.py +++ b/app.py @@ -42,9 +42,9 @@ def get_keys(): # не бейте меня за это return keys -def convert_audio(input_path: str, output_path: str, speed: float = 1.25): +def convert_audio(input_path: str, output_path: str, speed: float = 1.0): """ - Convert audio to compatible format and speed up + Convert audio to compatible format and speed up if needed. """ try: command = [ @@ -64,29 +64,6 @@ def convert_audio(input_path: str, output_path: str, speed: float = 1.25): return False -class TranscriptionMetrics: - def __init__(self): - self.start_time = time.time() - self.end_time = None - self.text_length = 0 - self.audio_duration = 0 - - def stop(self, text: str, audio_duration: float): - self.end_time = time.time() - self.text_length = len(text) - self.audio_duration = audio_duration - - def get_metrics(self) -> Dict[str, float]: - processing_time = self.end_time - self.start_time - return { - "processing_time_seconds": round(processing_time, 2), - "characters_per_second": round(self.text_length / processing_time, 2), - "audio_realtime_ratio": round(self.audio_duration / processing_time, 2), - "audio_duration": round(self.audio_duration, 2), - "text_length": self.text_length - } - - def get_audio_duration(file_path: str) -> float: """Get audio duration using ffprobe""" cmd = [ @@ -111,6 +88,7 @@ async def transcribe_audio( verbose: Optional[bool] = None, temperature: Union[float, Tuple[float, ...]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0), compression_ratio_threshold: Optional[float] = 2.4, + speed_up: Optional[float] = 1.25, logprob_threshold: Optional[float] = -1.0, no_speech_threshold: Optional[float] = 0.6, condition_on_previous_text: bool = True, @@ -129,7 +107,6 @@ async def transcribe_audio( model = whisper.load_model(model) # Load the Whisper model logger.info(f"Processing file: {file.filename} with model: {model}") - metrics = TranscriptionMetrics() # Save uploaded file temp_input_path = f"/tmp/input_{file.filename}" @@ -141,7 +118,7 @@ async def transcribe_audio( # Convert audio if needed logger.debug("Converting audio file") - if not convert_audio(temp_input_path, temp_output_path): + if not convert_audio(temp_input_path, temp_output_path, speed_up): raise HTTPException(status_code=400, detail="Audio conversion failed") # Get audio duration before speed up @@ -165,13 +142,6 @@ async def transcribe_audio( hallucination_silence_threshold=hallucination_silence_threshold ) - # Calculate metrics - metrics.stop(result["text"], original_duration) - logger.info(f"Transcription metrics: {metrics.get_metrics()}") - - # Add metrics to result - result["metrics"] = metrics.get_metrics() - return result except Exception as e: