Compare commits
10 Commits
Whisper-Ba
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| b212d83161 | |||
| 50fcb2d025 | |||
| 1f13bc4f86 | |||
| fcae47cad1 | |||
| 3f97810f89 | |||
| d8c27b1cbb | |||
|
|
d70f2e7089 | ||
|
|
ce41cf4a09 | ||
|
|
228f67d07f | ||
| 4fd0f18dd1 |
25
.env.example
Normal file
25
.env.example
Normal file
@@ -0,0 +1,25 @@
|
||||
# Simple ASR Server Configuration
|
||||
# Этот файл содержит переменные окружения для сервера ASR
|
||||
|
||||
# Сервер
|
||||
HOST=0.0.0.0
|
||||
PORT=9854
|
||||
|
||||
# Модель Whisper
|
||||
DEFAULT_MODEL=turbo
|
||||
MODEL_DEVICE=cuda
|
||||
MODEL_DOWNLOAD_ROOT=./models
|
||||
|
||||
# Файлы и директории
|
||||
KEYS_FILE=./data/keys.txt
|
||||
LOG_LEVEL=info
|
||||
|
||||
# AMD GPU настройки (для ROCm)
|
||||
HSA_OVERRIDE_GFX_VERSION=10.3.0
|
||||
|
||||
# Дополнительные настройки
|
||||
AUDIO_SPEEDUP=1.25
|
||||
|
||||
# Настройки для GPU (по умолчанию CUDA)
|
||||
# Для использования CPU установите MODEL_DEVICE=cpu
|
||||
# Для AMD GPU убедитесь что установлен ROCm и используется подходящая версия PyTorch
|
||||
5
.idea/.gitignore
generated
vendored
5
.idea/.gitignore
generated
vendored
@@ -1,5 +0,0 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
7
.idea/dictionaries/project.xml
generated
Normal file
7
.idea/dictionaries/project.xml
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
<component name="ProjectDictionaryState">
|
||||
<dictionary name="project">
|
||||
<words>
|
||||
<w>конфигурироемость</w>
|
||||
</words>
|
||||
</dictionary>
|
||||
</component>
|
||||
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
12
.idea/material_theme_project_new.xml
generated
Normal file
12
.idea/material_theme_project_new.xml
generated
Normal file
@@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="MaterialThemeProjectNewConfig">
|
||||
<option name="metadata">
|
||||
<MTProjectMetadataState>
|
||||
<option name="migrated" value="true" />
|
||||
<option name="pristineConfig" value="false" />
|
||||
<option name="userId" value="-6077f146:198b84bb7ea:-7ffe" />
|
||||
</MTProjectMetadataState>
|
||||
</option>
|
||||
</component>
|
||||
</project>
|
||||
7
.idea/misc.xml
generated
Normal file
7
.idea/misc.xml
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.13 (simple-asr-server)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 (whisper)" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
7
.idea/simple-asr-server.iml
generated
Normal file
7
.idea/simple-asr-server.iml
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module version="4">
|
||||
<component name="PyDocumentationSettings">
|
||||
<option name="format" value="PLAIN" />
|
||||
<option name="myDocStringFormat" value="Plain" />
|
||||
</component>
|
||||
</module>
|
||||
6
.idea/vcs.xml
generated
Normal file
6
.idea/vcs.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
7
.idea/whisper.iml
generated
Normal file
7
.idea/whisper.iml
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module version="4">
|
||||
<component name="PyDocumentationSettings">
|
||||
<option name="format" value="PLAIN" />
|
||||
<option name="myDocStringFormat" value="Plain" />
|
||||
</component>
|
||||
</module>
|
||||
47
Dockerfile
47
Dockerfile
@@ -1,22 +1,51 @@
|
||||
FROM rocm/pytorch:rocm6.4.1_ubuntu22.04_py3.10_pytorch_release_2.6.0
|
||||
# NVIDIA
|
||||
#FROM python:3.10-slim
|
||||
|
||||
# AMD
|
||||
FROM rocm/pytorch:latest
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies including openssl
|
||||
RUN apt-get update && apt-get install -y \
|
||||
ffmpeg \
|
||||
git \
|
||||
curl \
|
||||
openssl \
|
||||
python3-pip \
|
||||
python3-venv \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Update pip
|
||||
RUN pip install --upgrade pip
|
||||
|
||||
# Copy requirements first for better caching
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir --default-timeout=100 -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
# Install Python dependencies
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application code
|
||||
COPY app.py .
|
||||
|
||||
# Copy startup script for key generation
|
||||
COPY docker-entrypoint.sh .
|
||||
RUN chmod +x docker-entrypoint.sh
|
||||
|
||||
# Create directory for models and data
|
||||
RUN mkdir -p /app/models /app/data
|
||||
|
||||
# Set environment variables
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV HOST=0.0.0.0
|
||||
ENV PORT=9854
|
||||
|
||||
# Expose port
|
||||
EXPOSE 9854
|
||||
|
||||
# Устанавливаем переменные окружения для ROCm
|
||||
ENV HSA_OVERRIDE_GFX_VERSION=10.3.0
|
||||
ENV PYTORCH_ROCM_ARCH=gfx1030
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||
CMD curl -f http://localhost:9854/health || exit 1
|
||||
|
||||
# Команда для запуска приложения
|
||||
CMD ["python3", "app.py"]
|
||||
# Run the application
|
||||
ENTRYPOINT ["./docker-entrypoint.sh"]
|
||||
|
||||
282
README.MD
Normal file
282
README.MD
Normal file
@@ -0,0 +1,282 @@
|
||||
# Simple ASR Service на базе Whisper
|
||||
|
||||
Простой сервис для распознавания речи с использованием OpenAI Whisper. Поддерживает различные форматы ответов, управление API-ключами без перезапуска и все параметры модели Whisper.
|
||||
|
||||
## Особенности
|
||||
|
||||
- 🎯 Эндпоинт `/transcribe` для распознавания речи
|
||||
- 🔑 Управление API-ключами без перезапуска сервиса
|
||||
- 📊 Три формата ответа: `json`, `simple`, `text/plain`
|
||||
- ⚙️ Поддержка всех параметров `whisper.transcribe()`
|
||||
- 🐳 Docker и native запуск
|
||||
- 🏥 Health check эндпоинт
|
||||
- 🔄 Горячая перезагрузка API-ключей
|
||||
- 🚀 GPU ускорение по умолчанию (NVIDIA/AMD)
|
||||
- ⚙️ Централизованная конфигурация через .env файл
|
||||
|
||||
## Требования
|
||||
|
||||
- Python 3.8+
|
||||
- FFmpeg (для обработки аудио)
|
||||
- Минимум 4GB RAM
|
||||
- Свободное место для моделей (turbo ~1GB, large ~3GB)
|
||||
- **GPU с поддержкой CUDA или ROCm (рекомендуется)**
|
||||
|
||||
Для Docker дополнительно:
|
||||
- Docker + Docker Compose
|
||||
- NVIDIA Docker runtime (для NVIDIA GPU)
|
||||
- ROCm (для AMD GPU)
|
||||
|
||||
## Быстрый старт
|
||||
|
||||
### Запуск через Docker
|
||||
|
||||
```bash
|
||||
git clone https://github.com/SlavaVlad/simple-asr-server.git ./asr
|
||||
cd asr
|
||||
|
||||
# Для AMD GPU оставьте как есть
|
||||
# Для NVIDIA GPU раскомментируйте соответствующую секцию в docker-compose.yml
|
||||
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Нативный запуск
|
||||
|
||||
```bash
|
||||
git clone https://github.com/SlavaVlad/simple-asr-server.git ./asr
|
||||
cd asr
|
||||
|
||||
chmod +x start_server.sh
|
||||
./start_server.sh
|
||||
```
|
||||
|
||||
### Переменные окружения
|
||||
|
||||
| Переменная | По умолчанию | Описание |
|
||||
|------------|--------------|----------|
|
||||
| `HOST` | `0.0.0.0` | IP адрес для привязки |
|
||||
| `PORT` | `9854` | Порт сервера |
|
||||
| `DEFAULT_MODEL` | `turbo` | Модель Whisper для загрузки |
|
||||
| `MODEL_DEVICE` | `cuda` | Устройство: `cuda`, `cpu`, или `auto` |
|
||||
| `MODEL_DOWNLOAD_ROOT` | `./models` | Директория для моделей |
|
||||
| `KEYS_FILE` | `./data/keys.txt` | Файл с API ключами |
|
||||
| `LOG_LEVEL` | `info` | Уровень логирования |
|
||||
| `HSA_OVERRIDE_GFX_VERSION` | `10.3.0` | Версия GPU для AMD ROCm |
|
||||
| `AUDIO_SPEEDUP` | `1.25` | Ускорение обработки аудио |
|
||||
|
||||
### Настройка GPU
|
||||
|
||||
**По умолчанию сервис настроен для работы с NVIDIA GPU.**
|
||||
|
||||
**Для использования CPU:**
|
||||
```env
|
||||
MODEL_DEVICE=cpu
|
||||
```
|
||||
|
||||
### Доступные модели Whisper
|
||||
|
||||
- `tiny` - самая быстрая, наименее точная (~40MB)
|
||||
- `base` - баланс скорости и качества (~150MB)
|
||||
- `small` - хорошее качество (~500MB)
|
||||
- `medium` - лучшее качество (~1.5GB)
|
||||
- `large` - максимальное качество (~3GB)
|
||||
- `turbo` - оптимизированная версия (~800MB, рекомендуется)
|
||||
|
||||
## Управление API-ключами
|
||||
|
||||
### Автоматическое создание ключей
|
||||
|
||||
При первом запуске:
|
||||
1. Если установлен `openssl` - генерируется безопасный 64-символьный ключ
|
||||
2. Если `openssl` отсутствует - создается пустой файл ключей
|
||||
|
||||
### Добавление/удаление ключей
|
||||
|
||||
1. Отредактируйте файл `data/keys.txt` (один ключ на строку, 64 hex символа)
|
||||
2. Вызовите эндпоинт перезагрузки:
|
||||
|
||||
```bash
|
||||
curl -X POST "http://localhost:9854/keys/reload" \
|
||||
-H "X-API-Key: your-api-key"
|
||||
```
|
||||
|
||||
Пример `data/keys.txt`:
|
||||
```
|
||||
key1
|
||||
key2
|
||||
```
|
||||
|
||||
### Генерация новых ключей
|
||||
|
||||
```bash
|
||||
# Генерация нового ключа
|
||||
openssl rand -hex 32
|
||||
|
||||
# Добавление в файл ключей
|
||||
echo "$(openssl rand -hex 32)" >> data/keys.txt
|
||||
```
|
||||
|
||||
## API Документация
|
||||
|
||||
### POST /transcribe
|
||||
|
||||
Основной эндпоинт для распознавания речи.
|
||||
|
||||
#### Параметры
|
||||
|
||||
**Обязательные:**
|
||||
- `audio_file` - аудиофайл (form-data)
|
||||
|
||||
**Опциональные:**
|
||||
- `format` - формат ответа: `json` (по умолчанию), `simple`, `text`
|
||||
- Все параметры `whisper.transcribe()`:
|
||||
- `language` - язык аудио (auto-detect по умолчанию)
|
||||
- `task` - `transcribe` или `translate`
|
||||
- `temperature` - температура для генерации (0.0-1.0)
|
||||
- `beam_size` - размер луча для поиска
|
||||
- `best_of` - количество кандидатов для выбора лучшего
|
||||
- `compression_ratio_threshold` - порог сжатия для фильтрации
|
||||
- `logprob_threshold` - порог логарифмической вероятности
|
||||
- `no_speech_threshold` - порог отсутствия речи
|
||||
- `condition_on_previous_text` - использовать предыдущий текст как контекст
|
||||
- `initial_prompt` - начальная подсказка для модели
|
||||
- `word_timestamps` - временные метки слов (true/false)
|
||||
- `prepend_punctuations` - знаки препинания для добавления в начало
|
||||
- `append_punctuations` - знаки препинания для добавления в конец
|
||||
- `clip_timestamps` - временные метки для обрезки аудио
|
||||
- `hallucination_silence_threshold` - порог тишины для отрезания галлюцинаций
|
||||
|
||||
#### Примеры запросов
|
||||
|
||||
**Простая транскрибация:**
|
||||
```bash
|
||||
curl -X POST "http://localhost:9854/transcribe" \
|
||||
-H "X-API-Key: your-api-key" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{}' \
|
||||
--form "audio_file=@audio.wav"
|
||||
```
|
||||
|
||||
**С параметрами:**
|
||||
```bash
|
||||
curl -X POST "http://localhost:9854/transcribe" \
|
||||
-H "X-API-Key: your-api-key" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"language": "ru",
|
||||
"format": "simple",
|
||||
"word_timestamps": true,
|
||||
"temperature": 0.2
|
||||
}' \
|
||||
--form "audio_file=@audio.wav"
|
||||
```
|
||||
|
||||
**Только текст:**
|
||||
```bash
|
||||
curl -X POST "http://localhost:9854/transcribe" \
|
||||
-H "X-API-Key: your-api-key" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"format": "text"}' \
|
||||
--form "audio_file=@audio.wav"
|
||||
```
|
||||
|
||||
**Расширенные параметры:**
|
||||
```bash
|
||||
curl -X POST "http://localhost:9854/transcribe" \
|
||||
-H "X-API-Key: your-api-key" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"language": "en",
|
||||
"task": "translate",
|
||||
"temperature": 0.1,
|
||||
"beam_size": 5,
|
||||
"word_timestamps": true,
|
||||
"initial_prompt": "This is a technical presentation about AI",
|
||||
"format": "json"
|
||||
}' \
|
||||
--form "audio_file=@audio.wav"
|
||||
```
|
||||
|
||||
#### Форматы ответов
|
||||
|
||||
**json (полный ответ от Whisper):**
|
||||
```json
|
||||
{
|
||||
"text": "Привет, как дела?",
|
||||
"segments": [
|
||||
{
|
||||
"start": 0.0,
|
||||
"end": 2.5,
|
||||
"text": "Привет, как дела?",
|
||||
"words": [...]
|
||||
}
|
||||
],
|
||||
"language": "ru"
|
||||
}
|
||||
```
|
||||
|
||||
**simple (только текст):**
|
||||
```json
|
||||
{
|
||||
"text": "Привет, как дела?"
|
||||
}
|
||||
```
|
||||
|
||||
**text (plain text):**
|
||||
```
|
||||
Привет, как дела?
|
||||
```
|
||||
|
||||
### GET /health
|
||||
|
||||
Проверка состояния сервиса:
|
||||
|
||||
```bash
|
||||
curl "http://localhost:9854/health"
|
||||
```
|
||||
|
||||
Ответ:
|
||||
```json
|
||||
{
|
||||
"status": "healthy",
|
||||
"model_loaded": true,
|
||||
"model_name": "turbo"
|
||||
}
|
||||
```
|
||||
|
||||
## Коды ошибок
|
||||
|
||||
- `401` - API ключ не предоставлен
|
||||
- `403` - Неверный API ключ
|
||||
- `422` - Неверные параметры запроса
|
||||
- `500` - Ошибка сервера/модели
|
||||
|
||||
## Systemd сервис
|
||||
|
||||
Для автоматического запуска создайте systemd сервис:
|
||||
|
||||
```bash
|
||||
sudo cp asr.service /etc/systemd/system/
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable asr
|
||||
sudo systemctl start asr
|
||||
```
|
||||
|
||||
## Поддерживаемые форматы аудио
|
||||
|
||||
Все форматы, поддерживаемые FFmpeg:
|
||||
- WAV, MP3, FLAC, M4A, OGG
|
||||
- Видео форматы (извлекается аудио): MP4, AVI, MKV
|
||||
|
||||
## Производительность
|
||||
|
||||
Время обработки зависит от:
|
||||
- Выбранной модели
|
||||
- Длительности аудио
|
||||
- Доступных ресурсов (CPU/GPU)
|
||||
|
||||
Примерные времена для 1 минуты аудио:
|
||||
- `tiny`: ~2-5 секунд
|
||||
- `turbo`: ~5-10 секунд
|
||||
- `large`: ~15-30 секунд
|
||||
104
README.md
104
README.md
@@ -1,104 +0,0 @@
|
||||
BASED ON https://github.com/salute-developers/GigaAM
|
||||
|
||||
# Simple ASR Server
|
||||
|
||||
This project provides a RESTful API for audio transcription using a Whisper model. The API is built with FastAPI and runs in a Docker container.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before you begin, ensure you have the following installed:
|
||||
|
||||
* [Docker](https://docs.docker.com/get-docker/)
|
||||
* [Docker Compose](https://docs.docker.com/compose/install/)
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
.
|
||||
├── app.py # Main application file with FastAPI endpoint
|
||||
├── docker-compose.yml # Docker Compose configuration
|
||||
├── Dockerfile # Dockerfile for building the application image
|
||||
├── model/ # Directory for Whisper model files
|
||||
└── requirements.txt # Python dependencies
|
||||
```
|
||||
|
||||
## Setup
|
||||
|
||||
1. **Clone the repository:**
|
||||
|
||||
```bash
|
||||
git clone https://github.com/SlavaVlad/simple-asr-server
|
||||
cd simple-asr-server
|
||||
```
|
||||
3. **Add API keys:**
|
||||
|
||||
Create a `keys.txt` file in the root of the project and add your API keys, one per line.
|
||||
|
||||
## Building and Running the Project
|
||||
|
||||
You can build and run the project using Docker Compose.
|
||||
|
||||
1. **Build the Docker image:**
|
||||
|
||||
```bash
|
||||
docker-compose build
|
||||
```
|
||||
|
||||
2. **Run the container:**
|
||||
|
||||
```bash
|
||||
docker-compose up
|
||||
```
|
||||
|
||||
The application will be available at `http://0.0.0.0:9854`.
|
||||
|
||||
## API Endpoint
|
||||
|
||||
### POST /transcribe
|
||||
|
||||
This endpoint accepts an audio file and returns the transcription.
|
||||
|
||||
* **URL:** `/transcribe`
|
||||
* **Method:** `POST`
|
||||
* **Headers:**
|
||||
* `X-API-Key`: Your API key.
|
||||
* **Form Data:**
|
||||
* `file`: The audio file to be transcribed.
|
||||
|
||||
**Example using `curl`:**
|
||||
|
||||
```bash
|
||||
curl -X POST "http://localhost:9854/transcribe" \
|
||||
-H "X-API-Key: YOUR_API_KEY" \
|
||||
-F "file=@/path/to/your/audio.wav"
|
||||
```
|
||||
|
||||
**Successful Response (200 OK):**
|
||||
|
||||
```json
|
||||
{
|
||||
"transcription": [
|
||||
{
|
||||
"start_time": 0.0,
|
||||
"end_time": 2.5,
|
||||
"transcription": "Hello world."
|
||||
}
|
||||
],
|
||||
"text": "Hello world. ",
|
||||
"metrics": {
|
||||
"processing_time": 5.2,
|
||||
"rtf": 0.5,
|
||||
"word_rate": 2.0
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Error Response (401 Unauthorized):**
|
||||
|
||||
If the API key is missing or invalid.
|
||||
|
||||
```json
|
||||
{
|
||||
"detail": "Invalid API Key"
|
||||
}
|
||||
```
|
||||
276
app.py
276
app.py
@@ -1,170 +1,176 @@
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
from os import getenv
|
||||
from typing import Dict
|
||||
import json
|
||||
import whisper
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, Set, Literal, List, Union
|
||||
from threading import Lock
|
||||
|
||||
import gigaam
|
||||
from fastapi import FastAPI, Depends, HTTPException, UploadFile, File
|
||||
from fastapi import FastAPI, Depends, HTTPException, UploadFile, File, Request, Form
|
||||
from fastapi.security import APIKeyHeader
|
||||
from fastapi.responses import PlainTextResponse
|
||||
from pydantic import BaseModel, Field
|
||||
import uvicorn
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
# Настройка логирования
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = FastAPI()
|
||||
# Pydantic модель для параметров транскрибации
|
||||
class TranscribeParams(BaseModel):
|
||||
language: Optional[str] = Field(None, description="Язык аудио (auto-detect по умолчанию)")
|
||||
task: Optional[str] = Field("transcribe", description="transcribe или translate")
|
||||
temperature: Optional[float] = Field(0.0, description="Температура для генерации (0.0-1.0)")
|
||||
beam_size: Optional[int] = Field(None, description="Размер луча для поиска")
|
||||
best_of: Optional[int] = Field(None, description="Количество кандидатов для выбора лучшего")
|
||||
compression_ratio_threshold: Optional[float] = Field(None, description="Порог сжатия для фильтрации")
|
||||
logprob_threshold: Optional[float] = Field(None, description="Порог логарифмической вероятности")
|
||||
no_speech_threshold: Optional[float] = Field(None, description="Порог детекции отсутствия речи")
|
||||
condition_on_previous_text: Optional[bool] = Field(True, description="Использовать предыдущий текст как контекст")
|
||||
initial_prompt: Optional[str] = Field(None, description="Начальная подсказка для модели")
|
||||
word_timestamps: Optional[bool] = Field(False, description="Временные метки слов")
|
||||
prepend_punctuations: Optional[str] = Field(None, description="Знаки препинания для добавления в начало")
|
||||
append_punctuations: Optional[str] = Field(None, description="Знаки препинания для добавления в конец")
|
||||
clip_timestamps: Optional[List[float]] = Field(None, description="Временные метки для обрезки аудио")
|
||||
hallucination_silence_threshold: Optional[float] = Field(None, description="Порог тишины для детекции галлюцинаций")
|
||||
format: Optional[Literal["json", "simple", "text"]] = Field("json", description="Формат ответа")
|
||||
|
||||
# API key header
|
||||
api_key_header = APIKeyHeader(name="x-api-key")
|
||||
# Глобальные переменные для модели и ключей
|
||||
model = None
|
||||
api_keys: Set[str] = set()
|
||||
keys_lock = Lock()
|
||||
keys_file_path = os.getenv("KEYS_FILE", "keys.txt")
|
||||
|
||||
# Схема безопасности
|
||||
api_key_header = APIKeyHeader(name="X-API-Key")
|
||||
|
||||
def get_keys(): # не бейте меня за это
|
||||
keys_file = "keys.txt"
|
||||
if not os.path.exists(keys_file):
|
||||
# Create a new keys file with a default key
|
||||
default_key = os.urandom(32).hex()
|
||||
with open(keys_file, "w") as f:
|
||||
f.write(default_key + "\n")
|
||||
logger.info(f"Created new keys file with default key: {default_key}")
|
||||
return [default_key]
|
||||
app = FastAPI(title="Whisper ASR Service", version="1.0.0")
|
||||
|
||||
def load_api_keys():
|
||||
"""Загружает API ключи из файла"""
|
||||
global api_keys
|
||||
try:
|
||||
if os.path.exists(keys_file_path):
|
||||
with open(keys_file_path, 'r') as f:
|
||||
keys = [line.strip() for line in f.readlines() if line.strip()]
|
||||
with keys_lock:
|
||||
api_keys = set(keys)
|
||||
logger.info(f"Загружено {len(api_keys)} API ключей")
|
||||
else:
|
||||
# Read keys from the existing file
|
||||
with open(keys_file, "r") as f:
|
||||
keys = [line.strip() for line in f if line.strip()]
|
||||
logger.info(f"Loaded {len(keys)} keys from file")
|
||||
logger.debug(f"Keys: {keys}")
|
||||
if not keys:
|
||||
raise ValueError("No keys found in keys.txt")
|
||||
return keys
|
||||
logger.warning(f"Файл ключей {keys_file_path} не найден")
|
||||
except Exception as e:
|
||||
logger.error(f"Ошибка загрузки ключей: {e}")
|
||||
|
||||
def load_model():
|
||||
"""Загружает модель Whisper"""
|
||||
global model
|
||||
model_name = os.getenv("DEFAULT_MODEL", "turbo")
|
||||
download_root = os.getenv("MODEL_DOWNLOAD_ROOT", "./models")
|
||||
device = os.getenv("MODEL_DEVICE", "cpu")
|
||||
|
||||
def convert_audio(input_path: str, output_path: str, speed: float = 1.0):
|
||||
"""
|
||||
Convert audio to compatible format and speed up if needed.
|
||||
"""
|
||||
try:
|
||||
command = [
|
||||
'ffmpeg', '-i', input_path,
|
||||
'-filter:a', f'atempo={speed}',
|
||||
'-ar', '16000',
|
||||
'-ac', '1',
|
||||
'-c:a', 'pcm_s16le',
|
||||
output_path,
|
||||
'-y'
|
||||
]
|
||||
logger.debug(f"Running FFmpeg command: {' '.join(command)}")
|
||||
subprocess.run(command, check=True, capture_output=True)
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"FFmpeg conversion failed: {e.stderr.decode()}")
|
||||
return False
|
||||
logger.info(f"Загрузка модели Whisper: {model_name}")
|
||||
model = whisper.load_model(model_name, device=device, download_root=download_root)
|
||||
logger.info("Модель успешно загружена")
|
||||
except Exception as e:
|
||||
logger.error(f"Ошибка загрузки модели: {e}")
|
||||
raise
|
||||
|
||||
def verify_api_key(api_key: str = Depends(api_key_header)) -> str:
|
||||
"""Проверяет API ключ"""
|
||||
if not api_key:
|
||||
raise HTTPException(status_code=401, detail="API ключ не предоставлен")
|
||||
|
||||
def get_audio_duration(file_path: str) -> float:
|
||||
"""Get audio duration using ffprobe"""
|
||||
cmd = [
|
||||
'ffprobe',
|
||||
'-v', 'quiet',
|
||||
'-show_entries', 'format=duration',
|
||||
'-of', 'default=noprint_wrappers=1:nokey=1',
|
||||
file_path
|
||||
]
|
||||
try:
|
||||
output = subprocess.check_output(cmd).decode().strip()
|
||||
return float(output)
|
||||
except:
|
||||
return 0.0
|
||||
# Перезагружаем ключи для проверки обновлений
|
||||
load_api_keys()
|
||||
|
||||
with keys_lock:
|
||||
if api_key not in api_keys:
|
||||
raise HTTPException(status_code=403, detail="Неверный API ключ")
|
||||
|
||||
return api_key
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
"""Инициализация при запуске"""
|
||||
load_api_keys()
|
||||
load_model()
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Проверка здоровья сервиса"""
|
||||
return {"status": "healthy", "model_loaded": model is not None, "current_model": str(model) if model else None}
|
||||
|
||||
@app.post("/transcribe")
|
||||
async def transcribe_audio(
|
||||
file: UploadFile = File(...),
|
||||
token: str = Depends(api_key_header),
|
||||
model: str = "turbo",
|
||||
verbose: Optional[bool] = None,
|
||||
temperature: Union[float, Tuple[float, ...]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
|
||||
compression_ratio_threshold: Optional[float] = 2.4,
|
||||
speed_up: Optional[float] = 1.25,
|
||||
logprob_threshold: Optional[float] = -1.0,
|
||||
no_speech_threshold: Optional[float] = 0.6,
|
||||
condition_on_previous_text: bool = True,
|
||||
initial_prompt: Optional[str] = None,
|
||||
word_timestamps: bool = False,
|
||||
prepend_punctuations: str = "\"'\"¿([{-",
|
||||
append_punctuations: str = "\"\'.。,,!!??::\")]}、",
|
||||
clip_timestamps: Union[str, List[float]] = "0",
|
||||
hallucination_silence_threshold: Optional[float] = None
|
||||
audio_file: UploadFile = File(...),
|
||||
params: TranscribeParams = Depends(),
|
||||
api_key: str = Depends(verify_api_key)
|
||||
):
|
||||
# Token validation
|
||||
if token not in get_keys():
|
||||
logger.warning(f"Invalid token attempt: {token}")
|
||||
raise HTTPException(status_code=403, detail="Forbidden")
|
||||
"""Транскрибирует аудиофайл"""
|
||||
if model is None:
|
||||
raise HTTPException(status_code=500, detail="Модель не загружена")
|
||||
|
||||
model = whisper.load_model(model) # Load the Whisper model
|
||||
# Готовим параметры для whisper.transcribe()
|
||||
whisper_params = {}
|
||||
for field_name, field_value in params.dict(exclude_none=True, exclude={'format'}).items():
|
||||
whisper_params[field_name] = field_value
|
||||
|
||||
logger.info(f"Processing file: {file.filename} with model: {model}")
|
||||
|
||||
# Save uploaded file
|
||||
temp_input_path = f"/tmp/input_{file.filename}"
|
||||
temp_output_path = f"/tmp/converted_{file.filename}.wav"
|
||||
# Формат ответа
|
||||
response_format = params.format
|
||||
|
||||
temp_file_path = None
|
||||
try:
|
||||
with open(temp_input_path, "wb") as f:
|
||||
f.write(await file.read())
|
||||
# Сохраняем временный файл
|
||||
temp_file_path = f"/tmp/{audio_file.filename}"
|
||||
with open(temp_file_path, "wb") as temp_file:
|
||||
content = await audio_file.read()
|
||||
temp_file.write(content)
|
||||
|
||||
# Convert audio if needed
|
||||
logger.debug("Converting audio file")
|
||||
if not convert_audio(temp_input_path, temp_output_path, speed_up):
|
||||
raise HTTPException(status_code=400, detail="Audio conversion failed")
|
||||
# Транскрибируем
|
||||
logger.info(f"Транскрибация файла: {audio_file.filename} с параметрами: {whisper_params}")
|
||||
result = model.transcribe(temp_file_path, **whisper_params)
|
||||
|
||||
# Get audio duration before speed up
|
||||
original_duration = get_audio_duration(temp_input_path)
|
||||
|
||||
# Transcribe
|
||||
logger.info("Starting transcription")
|
||||
if original_duration > 30:
|
||||
logger.info("Audio duration > 30 seconds, using transcribe_longform")
|
||||
transcription_result = model.transcribe_longform(
|
||||
temp_output_path
|
||||
)
|
||||
else:
|
||||
logger.info("Audio duration <= 30 seconds, using transcribe")
|
||||
transcription_result = model.transcribe(
|
||||
temp_output_path
|
||||
)
|
||||
|
||||
full_text = ""
|
||||
for part in transcription_result:
|
||||
if part["transcription"].strip() != "":
|
||||
full_text += part["transcription"].strip() + " "
|
||||
|
||||
result = {
|
||||
"transcription": transcription_result,
|
||||
"text": full_text
|
||||
}
|
||||
# Удаляем временный файл
|
||||
os.unlink(temp_file_path)
|
||||
|
||||
# Возвращаем результат в нужном формате
|
||||
if response_format == 'text':
|
||||
return PlainTextResponse(content=result['text'])
|
||||
elif response_format == 'simple':
|
||||
return {"text": result['text']}
|
||||
else: # json - полный ответ по умолчанию
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Transcription failed: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
logger.error(f"Ошибка транскрибации: {e}")
|
||||
# Удаляем временный файл в случае ошибки
|
||||
if temp_file_path and os.path.exists(temp_file_path):
|
||||
os.unlink(temp_file_path)
|
||||
raise HTTPException(status_code=500, detail=f"Ошибка транскрибации: {str(e)}")
|
||||
|
||||
finally:
|
||||
# Cleanup temporary files
|
||||
if os.path.exists(temp_input_path):
|
||||
os.remove(temp_input_path)
|
||||
if os.path.exists(temp_output_path):
|
||||
os.remove(temp_output_path)
|
||||
@app.post("/keys/reload")
|
||||
async def reload_keys(api_key: str = Depends(verify_api_key)):
|
||||
"""Перезагружает ключи из файла"""
|
||||
load_api_keys()
|
||||
with keys_lock:
|
||||
return {"message": f"Перезагружено {len(api_keys)} ключей"}
|
||||
|
||||
|
||||
def main():
|
||||
import uvicorn
|
||||
get_keys()
|
||||
uvicorn.run(app, host="0.0.0.0", port=9854, log_level="debug")
|
||||
@app.get("/keys/count")
|
||||
async def get_keys_count(api_key: str = Depends(verify_api_key)):
|
||||
"""Возвращает количество активных ключей"""
|
||||
with keys_lock:
|
||||
return {"count": len(api_keys)}
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
host = os.getenv("HOST", "0.0.0.0")
|
||||
port = int(os.getenv("PORT", "9854"))
|
||||
log_level = os.getenv("LOG_LEVEL", "info")
|
||||
|
||||
uvicorn.run(
|
||||
"app:app",
|
||||
host=host,
|
||||
port=port,
|
||||
log_level=log_level,
|
||||
reload=False
|
||||
)
|
||||
|
||||
20
asr.service
Normal file
20
asr.service
Normal file
@@ -0,0 +1,20 @@
|
||||
[Unit]
|
||||
Description=Whisper ASR Server (ROCM)
|
||||
After=network.target
|
||||
Wants=network.target
|
||||
|
||||
[Service]
|
||||
Type=exec
|
||||
User=asr
|
||||
Group=asr
|
||||
WorkingDirectory=/opt/asr
|
||||
ExecStart=/opt/asr/start_server.sh
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
SyslogIdentifier=asr
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -1,6 +1,40 @@
|
||||
services:
|
||||
whisper-app:
|
||||
asr-server:
|
||||
build: .
|
||||
ports:
|
||||
- "9854:9854"
|
||||
command: ["python", "app.py"]
|
||||
- "${PORT:-9854}:${PORT:-9854}"
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
- HOST=${HOST}
|
||||
- PORT=${PORT}
|
||||
- DEFAULT_MODEL=${DEFAULT_MODEL}
|
||||
- MODEL_DEVICE=${MODEL_DEVICE}
|
||||
- MODEL_DOWNLOAD_ROOT=/app/models
|
||||
- KEYS_FILE=/app/data/keys.txt
|
||||
- HSA_OVERRIDE_GFX_VERSION=${HSA_OVERRIDE_GFX_VERSION}
|
||||
- LOG_LEVEL=${LOG_LEVEL}
|
||||
- AUDIO_SPEEDUP=${AUDIO_SPEEDUP}
|
||||
volumes:
|
||||
- ./models:/app/models
|
||||
- ./data:/app/data
|
||||
# GPU support - раскомментируйте нужную секцию
|
||||
# Для NVIDIA GPU:
|
||||
# deploy:
|
||||
# resources:
|
||||
# reservations:
|
||||
# devices:
|
||||
# - driver: nvidia
|
||||
# count: all
|
||||
# capabilities: [gpu]
|
||||
# Для AMD GPU (ROCm):
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri:/dev/dri
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:${PORT:-9854}/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
59
docker-entrypoint.sh
Normal file
59
docker-entrypoint.sh
Normal file
@@ -0,0 +1,59 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Docker entrypoint script for ASR service
|
||||
# Generates API keys if needed and starts the server
|
||||
|
||||
set -e
|
||||
|
||||
# Function to generate a secure API key
|
||||
generate_api_key() {
|
||||
if command -v openssl >/dev/null 2>&1; then
|
||||
openssl rand -hex 32
|
||||
else
|
||||
echo ""
|
||||
fi
|
||||
}
|
||||
|
||||
# Set default values for environment variables
|
||||
export HOST=${HOST:-"0.0.0.0"}
|
||||
export PORT=${PORT:-9854}
|
||||
export DEFAULT_MODEL=${DEFAULT_MODEL:-"turbo"}
|
||||
export MODEL_DOWNLOAD_ROOT=${MODEL_DOWNLOAD_ROOT:-"/app/models"}
|
||||
export KEYS_FILE=${KEYS_FILE:-"/app/data/keys.txt"}
|
||||
export LOG_LEVEL=${LOG_LEVEL:-"info"}
|
||||
|
||||
# Create necessary directories
|
||||
mkdir -p "${MODEL_DOWNLOAD_ROOT}"
|
||||
mkdir -p "$(dirname "${KEYS_FILE}")"
|
||||
|
||||
# Check if keys file exists, create with generated key if not
|
||||
if [ ! -f "${KEYS_FILE}" ]; then
|
||||
echo "Creating default keys file..."
|
||||
|
||||
# Try to generate a secure key with openssl
|
||||
GENERATED_KEY=$(generate_api_key)
|
||||
|
||||
if [ -n "${GENERATED_KEY}" ]; then
|
||||
echo "${GENERATED_KEY}" > "${KEYS_FILE}"
|
||||
echo "Generated secure API key using openssl: ${GENERATED_KEY}"
|
||||
echo "Created keys file at: ${KEYS_FILE}"
|
||||
else
|
||||
echo "WARNING: openssl not found! Cannot generate secure API key."
|
||||
echo "Please manually add API keys to ${KEYS_FILE}"
|
||||
echo "Each key should be 64 hex characters (32 bytes) on a separate line."
|
||||
echo ""
|
||||
echo "Creating empty keys file - you must add keys manually before starting the server."
|
||||
touch "${KEYS_FILE}"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Starting Simple ASR Server in Docker..."
|
||||
echo "Host: ${HOST}"
|
||||
echo "Port: ${PORT}"
|
||||
echo "Default Model: ${DEFAULT_MODEL}"
|
||||
echo "Model Download Root: ${MODEL_DOWNLOAD_ROOT}"
|
||||
echo "Keys File: ${KEYS_FILE}"
|
||||
echo "Log Level: ${LOG_LEVEL}"
|
||||
|
||||
# Start the server
|
||||
exec python app.py
|
||||
@@ -1,8 +1,12 @@
|
||||
# PyTorch с поддержкой ROCm
|
||||
--index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
torchaudio
|
||||
|
||||
# Остальные зависимости
|
||||
fastapi
|
||||
uvicorn[standard]
|
||||
python-multipart
|
||||
gigaam
|
||||
gigaam[longform]
|
||||
ffmpeg-python
|
||||
PyYAML
|
||||
numpy<2.0.0
|
||||
openai-whisper
|
||||
python-dotenv
|
||||
pydantic
|
||||
|
||||
106
start_server.sh
Executable file
106
start_server.sh
Executable file
@@ -0,0 +1,106 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Simple ASR Server startup script for systemd
|
||||
# This script loads environment variables from .env file and starts the server
|
||||
|
||||
set -e
|
||||
|
||||
# Get the directory where this script is located
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
APP_DIR="${SCRIPT_DIR}"
|
||||
|
||||
# Set ROCm environment variables if ROCm is available
|
||||
if [ -d "/opt/rocm" ]; then
|
||||
export ROCM_PATH=${ROCM_PATH:-"/opt/rocm"}
|
||||
export PATH="${ROCM_PATH}/bin:${PATH}"
|
||||
export LD_LIBRARY_PATH="${ROCM_PATH}/lib:${LD_LIBRARY_PATH:-}"
|
||||
# Set HIP_VISIBLE_DEVICES to use all available GPUs
|
||||
export HIP_VISIBLE_DEVICES=${HIP_VISIBLE_DEVICES:-"0"}
|
||||
echo "ROCm detected, configured environment variables"
|
||||
fi
|
||||
|
||||
# Function to generate a secure API key
|
||||
generate_api_key() {
|
||||
if command -v openssl >/dev/null 2>&1; then
|
||||
openssl rand -hex 32
|
||||
else
|
||||
echo ""
|
||||
fi
|
||||
}
|
||||
|
||||
# Load environment variables from .env file if it exists
|
||||
if [ -f "${APP_DIR}/.env" ]; then
|
||||
echo "Loading environment variables from ${APP_DIR}/.env"
|
||||
set -a # automatically export all variables
|
||||
source "${APP_DIR}/.env"
|
||||
set +a
|
||||
else
|
||||
echo "Warning: .env file not found at ${APP_DIR}/.env"
|
||||
echo "Using default environment variables"
|
||||
fi
|
||||
|
||||
# Set default values if not provided in .env
|
||||
export HOST=${HOST:-"0.0.0.0"}
|
||||
export PORT=${PORT:-9854}
|
||||
export DEFAULT_MODEL=${DEFAULT_MODEL:-"turbo"}
|
||||
export MODEL_DEVICE=${MODEL_DEVICE:-"cuda"}
|
||||
export MODEL_DOWNLOAD_ROOT=${MODEL_DOWNLOAD_ROOT:-"${APP_DIR}/models"}
|
||||
export KEYS_FILE=${KEYS_FILE:-"${APP_DIR}/data/keys.txt"}
|
||||
export LOG_LEVEL=${LOG_LEVEL:-"info"}
|
||||
|
||||
# Create necessary directories
|
||||
mkdir -p "${MODEL_DOWNLOAD_ROOT}"
|
||||
mkdir -p "$(dirname "${KEYS_FILE}")"
|
||||
|
||||
# Check if keys file exists, create with generated key if not
|
||||
if [ ! -f "${KEYS_FILE}" ]; then
|
||||
echo "Creating default keys file..."
|
||||
|
||||
# Try to generate a secure key with openssl
|
||||
GENERATED_KEY=$(generate_api_key)
|
||||
|
||||
if [ -n "${GENERATED_KEY}" ]; then
|
||||
echo "${GENERATED_KEY}" > "${KEYS_FILE}"
|
||||
echo "Generated secure API key using openssl: ${GENERATED_KEY}"
|
||||
echo "Created keys file at: ${KEYS_FILE}"
|
||||
else
|
||||
echo "WARNING: openssl not found! Cannot generate secure API key."
|
||||
echo "Please manually add API keys to ${KEYS_FILE}"
|
||||
echo "Each key should be 64 hex characters (32 bytes) on a separate line."
|
||||
echo "Example key format: 0000000000000000000000000000000000000000000000000000000000000000"
|
||||
echo ""
|
||||
echo "Creating empty keys file - you must add keys manually before starting the server."
|
||||
touch "${KEYS_FILE}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check if virtual environment exists, create if not
|
||||
VENV_DIR="${APP_DIR}/venv"
|
||||
if [ ! -d "${VENV_DIR}" ]; then
|
||||
echo "Creating virtual environment..."
|
||||
python3 -m venv "${VENV_DIR}"
|
||||
fi
|
||||
|
||||
# Activate virtual environment
|
||||
echo "Activating virtual environment..."
|
||||
source "${VENV_DIR}/bin/activate"
|
||||
|
||||
# Install/upgrade dependencies
|
||||
echo "Installing/upgrading dependencies..."
|
||||
pip install --upgrade pip
|
||||
pip install -r "${APP_DIR}/requirements.txt"
|
||||
|
||||
# Change to app directory
|
||||
cd "${APP_DIR}"
|
||||
|
||||
echo "Starting Simple ASR Server..."
|
||||
echo "Host: ${HOST}"
|
||||
echo "Port: ${PORT}"
|
||||
echo "Default Model: ${DEFAULT_MODEL}"
|
||||
echo "Model Device: ${MODEL_DEVICE}"
|
||||
echo "Model Download Root: ${MODEL_DOWNLOAD_ROOT}"
|
||||
echo "Keys File: ${KEYS_FILE}"
|
||||
echo "Log Level: ${LOG_LEVEL}"
|
||||
|
||||
# Start the server
|
||||
exec python app.py
|
||||
Reference in New Issue
Block a user