From 5b0d04a2405d58d484d19ad2856613c60d124dfb Mon Sep 17 00:00:00 2001
From: vladislav <nauka.2.0.vs@gmail.com>
Date: Tue, 15 Jul 2025 17:30:37 +0300
Subject: [PATCH 1/2] fix: model load on request and keep in mem rename:
 model_name to model

---
 app.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/app.py b/app.py
index cb47a23..b902c31 100644
--- a/app.py
+++ b/app.py
@@ -18,7 +18,6 @@ logger = logging.getLogger(__name__)
 
 app = FastAPI()
 
-
 # API key header
 api_key_header = APIKeyHeader(name="x-api-key")
 
@@ -108,7 +107,7 @@ def get_audio_duration(file_path: str) -> float:
 async def transcribe_audio(
         file: UploadFile = File(...),
         token: str = Depends(api_key_header),
-        model_name: str = "turbo",
+        model: str = "turbo",
         verbose: Optional[bool] = None,
         temperature: Union[float, Tuple[float, ...]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
         compression_ratio_threshold: Optional[float] = 2.4,
@@ -127,7 +126,9 @@ async def transcribe_audio(
         logger.warning(f"Invalid token attempt: {token}")
         raise HTTPException(status_code=403, detail="Forbidden")
 
-    logger.info(f"Processing file: {file.filename} with model: {model_name}")
+    model = whisper.load_model(model)  # Load the Whisper model
+
+    logger.info(f"Processing file: {file.filename} with model: {model}")
     metrics = TranscriptionMetrics()
 
     # Save uploaded file

From 9eb026b220595aa7423fff2c97d44b580b218329 Mon Sep 17 00:00:00 2001
From: red <red@itmo.ru>
Date: Sun, 17 Aug 2025 23:24:24 +0900
Subject: [PATCH 2/2] - deleted metrics

---
 app.py | 38 ++++----------------------------------
 1 file changed, 4 insertions(+), 34 deletions(-)

diff --git a/app.py b/app.py
index b902c31..f5cf989 100644
--- a/app.py
+++ b/app.py
@@ -42,9 +42,9 @@ def get_keys():  # не бейте меня за это
         return keys
 
 
-def convert_audio(input_path: str, output_path: str, speed: float = 1.25):
+def convert_audio(input_path: str, output_path: str, speed: float = 1.0):
     """
-    Convert audio to compatible format and speed up
+    Convert audio to compatible format and speed up if needed.
     """
     try:
         command = [
@@ -64,29 +64,6 @@ def convert_audio(input_path: str, output_path: str, speed: float = 1.25):
         return False
 
 
-class TranscriptionMetrics:
-    def __init__(self):
-        self.start_time = time.time()
-        self.end_time = None
-        self.text_length = 0
-        self.audio_duration = 0
-
-    def stop(self, text: str, audio_duration: float):
-        self.end_time = time.time()
-        self.text_length = len(text)
-        self.audio_duration = audio_duration
-
-    def get_metrics(self) -> Dict[str, float]:
-        processing_time = self.end_time - self.start_time
-        return {
-            "processing_time_seconds": round(processing_time, 2),
-            "characters_per_second": round(self.text_length / processing_time, 2),
-            "audio_realtime_ratio": round(self.audio_duration / processing_time, 2),
-            "audio_duration": round(self.audio_duration, 2),
-            "text_length": self.text_length
-        }
-
-
 def get_audio_duration(file_path: str) -> float:
     """Get audio duration using ffprobe"""
     cmd = [
@@ -111,6 +88,7 @@ async def transcribe_audio(
         verbose: Optional[bool] = None,
         temperature: Union[float, Tuple[float, ...]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
         compression_ratio_threshold: Optional[float] = 2.4,
+        speed_up: Optional[float] = 1.25,
         logprob_threshold: Optional[float] = -1.0,
         no_speech_threshold: Optional[float] = 0.6,
         condition_on_previous_text: bool = True,
@@ -129,7 +107,6 @@ async def transcribe_audio(
     model = whisper.load_model(model)  # Load the Whisper model
 
     logger.info(f"Processing file: {file.filename} with model: {model}")
-    metrics = TranscriptionMetrics()
 
     # Save uploaded file
     temp_input_path = f"/tmp/input_{file.filename}"
@@ -141,7 +118,7 @@ async def transcribe_audio(
 
         # Convert audio if needed
         logger.debug("Converting audio file")
-        if not convert_audio(temp_input_path, temp_output_path):
+        if not convert_audio(temp_input_path, temp_output_path, speed_up):
             raise HTTPException(status_code=400, detail="Audio conversion failed")
 
         # Get audio duration before speed up
@@ -165,13 +142,6 @@ async def transcribe_audio(
             hallucination_silence_threshold=hallucination_silence_threshold
         )
 
-        # Calculate metrics
-        metrics.stop(result["text"], original_duration)
-        logger.info(f"Transcription metrics: {metrics.get_metrics()}")
-
-        # Add metrics to result
-        result["metrics"] = metrics.get_metrics()
-
         return result
 
     except Exception as e: