From d8815b55848bf640ccc90394c1c3f98a79655f3e Mon Sep 17 00:00:00 2001
From: vladislav <nauka.2.0.vs@gmail.com>
Date: Tue, 15 Jul 2025 00:17:20 +0300
Subject: [PATCH 01/11] =?UTF-8?q?=D0=97=D0=B0=D0=BC=D0=B5=D0=BD=D0=B0=20?=
 =?UTF-8?q?=D0=BC=D0=BE=D0=B4=D0=B5=D0=BB=D0=B8=20ASR=20=D0=BD=D0=B0=20Gig?=
 =?UTF-8?q?aAM=20(CTC=20v2)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app.py             | 54 +++++++++++++++++++++-------------------------
 docker-compose.yml | 12 ++++++-----
 requirements.txt   |  1 +
 3 files changed, 32 insertions(+), 35 deletions(-)

diff --git a/app.py b/app.py
index cb47a23..b984f52 100644
--- a/app.py
+++ b/app.py
@@ -4,8 +4,8 @@ import subprocess
 import time
 from typing import Dict
 from typing import Optional, Union, List, Tuple
+import gigaam
 
-import whisper
 from fastapi import FastAPI, Depends, HTTPException, UploadFile, File
 from fastapi.security import APIKeyHeader
 
@@ -18,6 +18,7 @@ logger = logging.getLogger(__name__)
 
 app = FastAPI()
 
+model = gigaam.load_model("v2_ctc", device="cuda", download_root="./model")
 
 # API key header
 api_key_header = APIKeyHeader(name="x-api-key")
@@ -108,19 +109,7 @@ def get_audio_duration(file_path: str) -> float:
 async def transcribe_audio(
         file: UploadFile = File(...),
         token: str = Depends(api_key_header),
-        model_name: str = "turbo",
-        verbose: Optional[bool] = None,
-        temperature: Union[float, Tuple[float, ...]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
-        compression_ratio_threshold: Optional[float] = 2.4,
-        logprob_threshold: Optional[float] = -1.0,
-        no_speech_threshold: Optional[float] = 0.6,
-        condition_on_previous_text: bool = True,
-        initial_prompt: Optional[str] = None,
-        word_timestamps: bool = False,
-        prepend_punctuations: str = "\"'\"¿([{-",
-        append_punctuations: str = "\"\'.。,，!！?？:：\")]}、",
-        clip_timestamps: Union[str, List[float]] = "0",
-        hallucination_silence_threshold: Optional[float] = None
+        model_name: str = "turbo"
 ):
     # Token validation
     if token not in get_keys():
@@ -148,24 +137,29 @@ async def transcribe_audio(
 
         # Transcribe
         logger.info("Starting transcription")
-        result = model.transcribe(
-            temp_output_path,
-            verbose=verbose,
-            temperature=temperature,
-            compression_ratio_threshold=compression_ratio_threshold,
-            logprob_threshold=logprob_threshold,
-            no_speech_threshold=no_speech_threshold,
-            condition_on_previous_text=condition_on_previous_text,
-            initial_prompt=initial_prompt,
-            word_timestamps=word_timestamps,
-            prepend_punctuations=prepend_punctuations,
-            append_punctuations=append_punctuations,
-            clip_timestamps=clip_timestamps,
-            hallucination_silence_threshold=hallucination_silence_threshold
-        )
+        if original_duration > 30:
+            logger.info("Audio duration > 30 seconds, using transcribe_longform")
+            transcription_result = model.transcribe_longform(
+                temp_output_path
+            )
+        else:
+            logger.info("Audio duration <= 30 seconds, using transcribe")
+            transcription_result = model.transcribe(
+                temp_output_path
+            )
+
+        full_text = ""
+        for part in transcription_result:
+            if part["transcription"].strip() != "":
+                full_text += part["transcription"].strip() + " "
+
+        result = {
+            "transcription": transcription_result,
+            "text": full_text
+        }
 
         # Calculate metrics
-        metrics.stop(result["text"], original_duration)
+        metrics.stop(full_text, original_duration)
         logger.info(f"Transcription metrics: {metrics.get_metrics()}")
 
         # Add metrics to result
diff --git a/docker-compose.yml b/docker-compose.yml
index 9841f63..31db88c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,10 +1,12 @@
+version: '3.8'
+
 services:
   whisper-app:
     build: .
     ports:
       - "9854:9854"
-    devices:
-      - "/dev/kfd:/dev/kfd"
-      - "/dev/dri:/dev/dri"
-    group_add:
-      - video
+    volumes:
+      - ./keys.txt:/app/keys.txt
+      - /tmp:/tmp
+    command: ["python", "app.py"]
+
diff --git a/requirements.txt b/requirements.txt
index a35e823..7492a97 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ python-multipart
 openai-whisper
 ffmpeg-python
 PyYAML
+numpy<2.0.0
\ No newline at end of file

From d02b1b46d2b5f24819e120ef17f48e9dd98ea317 Mon Sep 17 00:00:00 2001
From: SlavaVlad <nauka.2.0.vs@gmail.com>
Date: Tue, 15 Jul 2025 00:26:37 +0300
Subject: [PATCH 02/11] Create README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 README.md

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..d343b76
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+Not whisper anymore :)

From fede1fcf45098637d7d9d63afae739f475abdfc9 Mon Sep 17 00:00:00 2001
From: vladislav <nauka.2.0.vs@gmail.com>
Date: Tue, 15 Jul 2025 00:32:45 +0300
Subject: [PATCH 03/11] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8?=
 =?UTF-8?q?=D0=BB=20=D0=BF=D0=BE=D0=B4=D1=80=D0=BE=D0=B1=D0=BD=D1=8B=D0=B9?=
 =?UTF-8?q?=20Readme.md=20=D1=81=20=D0=B8=D0=BD=D1=81=D1=82=D1=80=D1=83?=
 =?UTF-8?q?=D0=BA=D1=86=D0=B8=D1=8F=D0=BC=D0=B8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 102 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d343b76..95fea9e 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,102 @@
-Not whisper anymore :)
+# Whisper Transcription API
+
+This project provides a RESTful API for audio transcription using a Whisper model. The API is built with FastAPI and runs in a Docker container.
+
+## Prerequisites
+
+Before you begin, ensure you have the following installed:
+
+*   [Docker](https://docs.docker.com/get-docker/)
+*   [Docker Compose](https://docs.docker.com/compose/install/)
+
+## Project Structure
+
+```
+.
+├── app.py              # Main application file with FastAPI endpoint
+├── docker-compose.yml  # Docker Compose configuration
+├── Dockerfile          # Dockerfile for building the application image
+├── model/              # Directory for Whisper model files
+└── requirements.txt    # Python dependencies
+```
+
+## Setup
+
+1.  **Clone the repository:**
+
+    ```bash
+    git clone https://github.com/SlavaVlad/faster-whisper-api
+    cd faster-whisper-api
+    ```
+3.  **Add API keys:**
+
+    Create a `keys.txt` file in the root of the project and add your API keys, one per line.
+
+## Building and Running the Project
+
+You can build and run the project using Docker Compose.
+
+1.  **Build the Docker image:**
+
+    ```bash
+    docker-compose build
+    ```
+
+2.  **Run the container:**
+
+    ```bash
+    docker-compose up
+    ```
+
+    The application will be available at `http://0.0.0.0:9854`.
+
+## API Endpoint
+
+### POST /transcribe
+
+This endpoint accepts an audio file and returns the transcription.
+
+*   **URL:** `/transcribe`
+*   **Method:** `POST`
+*   **Headers:**
+    *   `X-API-Key`: Your API key.
+*   **Form Data:**
+    *   `file`: The audio file to be transcribed.
+
+**Example using `curl`:**
+
+```bash
+curl -X POST "http://localhost:9854/transcribe" \
+     -H "X-API-Key: YOUR_API_KEY" \
+     -F "file=@/path/to/your/audio.wav"
+```
+
+**Successful Response (200 OK):**
+
+```json
+{
+  "transcription": [
+    {
+      "start_time": 0.0,
+      "end_time": 2.5,
+      "transcription": "Hello world."
+    }
+  ],
+  "text": "Hello world. ",
+  "metrics": {
+    "processing_time": 5.2,
+    "rtf": 0.5,
+    "word_rate": 2.0
+  }
+}
+```
+
+**Error Response (401 Unauthorized):**
+
+If the API key is missing or invalid.
+
+```json
+{
+  "detail": "Invalid API Key"
+}
+```

From b75f83130a36792a549558e8a7701e227dec1574 Mon Sep 17 00:00:00 2001
From: vladislav <nauka.2.0.vs@gmail.com>
Date: Tue, 15 Jul 2025 00:34:02 +0300
Subject: [PATCH 04/11] =?UTF-8?q?=D0=9E=D0=B1=D0=BD=D0=BE=D0=B2=D0=B8?=
 =?UTF-8?q?=D0=BB=20=D1=81=D1=81=D1=8B=D0=BB=D0=BA=D1=83=20=D0=BD=D0=B0=20?=
 =?UTF-8?q?=D1=80=D0=B5=D0=BF=D0=BE=D0=B7=D0=B8=D1=82=D0=BE=D1=80=D0=B8?=
 =?UTF-8?q?=D0=B9=20=D0=B2=20README.md?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 95fea9e..54338a1 100644
--- a/README.md
+++ b/README.md
@@ -25,8 +25,8 @@ Before you begin, ensure you have the following installed:
 1.  **Clone the repository:**
 
     ```bash
-    git clone https://github.com/SlavaVlad/faster-whisper-api
-    cd faster-whisper-api
+    git clone https://github.com/SlavaVlad/simple-asr-server
+    cd simple-asr-server
     ```
 3.  **Add API keys:**
 

From 91495aac0f455806e350eafafb8dcda1f279c9d0 Mon Sep 17 00:00:00 2001
From: SlavaVlad <nauka.2.0.vs@gmail.com>
Date: Tue, 15 Jul 2025 00:38:19 +0300
Subject: [PATCH 05/11] Create LICENSE

---
 LICENSE | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 LICENSE

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..d9e5e5d
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Vladimirov Vladislav
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

From e47e8ec40265dab8ddb500d6c0de86ac5ebfe0a8 Mon Sep 17 00:00:00 2001
From: vladislav <nauka.2.0.vs@gmail.com>
Date: Tue, 15 Jul 2025 01:13:07 +0300
Subject: [PATCH 06/11] fix: requirements.txt updated fix: removed version
 attribute from docker-compose.yml fix: Dockerfile more correct

---
 Dockerfile         | 2 +-
 docker-compose.yml | 6 ------
 requirements.txt   | 2 +-
 3 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index d78ae68..2f7749a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -5,7 +5,7 @@ WORKDIR /app
 RUN apt-get update && apt-get install -y \
     ffmpeg \
     python3-pip \
-    && rm -rf /var/lib/apt/lists/*
+    python3-venv \
 
 COPY requirements.txt .
 RUN pip install --no-cache-dir --default-timeout=100 -r requirements.txt
diff --git a/docker-compose.yml b/docker-compose.yml
index 31db88c..cbc6f67 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,12 +1,6 @@
-version: '3.8'
-
 services:
   whisper-app:
     build: .
     ports:
       - "9854:9854"
-    volumes:
-      - ./keys.txt:/app/keys.txt
-      - /tmp:/tmp
     command: ["python", "app.py"]
-
diff --git a/requirements.txt b/requirements.txt
index 7492a97..dda53b3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 fastapi
 uvicorn[standard]
 python-multipart
-openai-whisper
+gigaam
 ffmpeg-python
 PyYAML
 numpy<2.0.0
\ No newline at end of file

From c2b060c8a69583833b28cd55dd2399ee790d91f9 Mon Sep 17 00:00:00 2001
From: vladislav <nauka.2.0.vs@gmail.com>
Date: Tue, 15 Jul 2025 01:14:22 +0300
Subject: [PATCH 07/11] Added GigaAM attribution to README.md

---
 README.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 54338a1..46e9195 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,8 @@
-# Whisper Transcription API
+BASED ON https://github.com/salute-developers/GigaAM
+
+# Simple ASR Server
+
+![GitHub Repo stars](https://img.shields.io/github/stars/SlavaVlad/simple-asr-server?style=social)
 
 This project provides a RESTful API for audio transcription using a Whisper model. The API is built with FastAPI and runs in a Docker container.
 

From 5f9e6bafaa238adc5052deed3ca19a25e1bdce16 Mon Sep 17 00:00:00 2001
From: vladislav <nauka.2.0.vs@gmail.com>
Date: Tue, 15 Jul 2025 01:14:22 +0300
Subject: [PATCH 08/11] Added GigaAM attribution to README.md

---
 README.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 54338a1..46e9195 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,8 @@
-# Whisper Transcription API
+BASED ON https://github.com/salute-developers/GigaAM
+
+# Simple ASR Server
+
+![GitHub Repo stars](https://img.shields.io/github/stars/SlavaVlad/simple-asr-server?style=social)
 
 This project provides a RESTful API for audio transcription using a Whisper model. The API is built with FastAPI and runs in a Docker container.
 

From aa1d21f9bc8f33cd258d9c6622e1b6411413b4ff Mon Sep 17 00:00:00 2001
From: vladislav <nauka.2.0.vs@gmail.com>
Date: Tue, 15 Jul 2025 01:15:30 +0300
Subject: [PATCH 09/11] Added GigaAM attribution to README.md

---
 README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/README.md b/README.md
index 46e9195..72464ed 100644
--- a/README.md
+++ b/README.md
@@ -2,8 +2,6 @@ BASED ON https://github.com/salute-developers/GigaAM
 
 # Simple ASR Server
 
-![GitHub Repo stars](https://img.shields.io/github/stars/SlavaVlad/simple-asr-server?style=social)
-
 This project provides a RESTful API for audio transcription using a Whisper model. The API is built with FastAPI and runs in a Docker container.
 
 ## Prerequisites

From e7f7120f27815b4121efe66be002753725104b5c Mon Sep 17 00:00:00 2001
From: vladislav <nauka.2.0.vs@gmail.com>
Date: Tue, 15 Jul 2025 02:13:35 +0300
Subject: [PATCH 10/11] =?UTF-8?q?=D0=94=D0=B5=D0=BB=D0=B0=D0=B5=D0=BC=20HI?=
 =?UTF-8?q?P=20=D1=81=D0=BE=D0=B2=D0=BC=D0=B5=D1=81=D1=82=D0=B8=D0=BC?=
 =?UTF-8?q?=D0=BE=D1=81=D1=82=D1=8C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app.py b/app.py
index b984f52..781a047 100644
--- a/app.py
+++ b/app.py
@@ -2,10 +2,10 @@ import logging
 import os
 import subprocess
 import time
+from os import getenv
 from typing import Dict
-from typing import Optional, Union, List, Tuple
-import gigaam
 
+import gigaam
 from fastapi import FastAPI, Depends, HTTPException, UploadFile, File
 from fastapi.security import APIKeyHeader
 
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
 
 app = FastAPI()
 
-model = gigaam.load_model("v2_ctc", device="cuda", download_root="./model")
+model = gigaam.load_model("v2_ctc", device=getenv("ASR_DEVICE"), download_root=getenv("ASR_MODELS_ROOT"))
 
 # API key header
 api_key_header = APIKeyHeader(name="x-api-key")

From f718da13d6af706391b1f4f4f4fde62005a21071 Mon Sep 17 00:00:00 2001
From: vladislav <nauka.2.0.vs@gmail.com>
Date: Tue, 15 Jul 2025 02:35:58 +0300
Subject: [PATCH 11/11] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8?=
 =?UTF-8?q?=D0=BB=20longform=20=D0=B7=D0=B0=D0=B2=D0=B8=D1=81=D0=B8=D0=BC?=
 =?UTF-8?q?=D0=BE=D1=81=D1=82=D1=8C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index dda53b3..6a0fedf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,6 +2,7 @@ fastapi
 uvicorn[standard]
 python-multipart
 gigaam
+gigaam[longform]
 ffmpeg-python
 PyYAML
 numpy<2.0.0
\ No newline at end of file