Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Git
.git
.github
.gitignore
.gitattributes

# Docker
.dockerignore
Dockerfile
docker-compose.yml
docker_builder.sh

# CI/CD and development files
.circleci/
.travis.yml
.env
*.md
!README.md
!LICENSE
docs/
tests/

# Virtual environments
venv/
env/
.venv/
.env/
.python-version

# Python cache
__pycache__/
*.py[cod]
*$py.class
*.so
.pytest_cache/
.coverage
htmlcov/
.tox/
.nox/
.hypothesis/
.eggs/
*.egg-info/
*.egg

# IDE specific files
.idea/
.vscode/
*.swp
*.swo
.DS_Store

# Temporary files
temp/
tmp/
*.tmp
*.log

# API outputs (these should be created at runtime)
api/outputs/

# Local model directories (only include if specified)
# Uncomment if you never want to include models
# pretrained_models/

# Jupyter Notebooks
.ipynb_checkpoints
*.ipynb

# Large unnecessary files
*.wav
*.wav.zip
*.mp3
*.mp4
*.tar.gz
output/
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,7 @@ cython_debug/

# PyPI configuration file
.pypirc


api/.env
api/outputs/
119 changes: 119 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# Usage Instructions
# 1. Recommended way to build all images at once:
# ./docker_builder.sh
# This creates: spark-tts:latest-lite, spark-tts:latest (alias of latest-lite), and spark-tts:latest-full
#
# 2. Manual build without models:
# docker build -t spark-tts:latest-lite .
# docker tag spark-tts:latest-lite spark-tts:latest
#
# 3. Manual build with models:
# docker build --build-arg INCLUDE_MODELS=true -t spark-tts:latest-full .
#
# 4. Run container without models (needs to mount models):
# docker run -p 7860:7860 --gpus all -v /local/path/pretrained_models:/app/pretrained_models spark-tts:latest-lite
#
# 5. Run container with models:
# docker run -p 7860:7860 --gpus all spark-tts:latest-full
#
# 6. Run with API (default):
# docker run -p 7860:7860 --gpus all -e SERVICE_TYPE=api spark-tts:latest-full
#
# 7. Run with WebUI:
# docker run -p 7860:7860 --gpus all -e SERVICE_TYPE=webui spark-tts:latest-full
#
# 8. Use docker-compose for more advanced configurations:
# docker-compose up api # Run API service
# docker-compose up webui # Run WebUI service
#
# Note:
# - NVIDIA Container Toolkit must be installed on the host to support GPU
# - If using an image without models, you can provide models in the following ways:
# a) Mount the model directory from the host: docker run -p 7860:7860 --gpus all -v /local/path/pretrained_models:/app/pretrained_models spark-tts:latest-lite
# b) Download models inside the container: python -c "from huggingface_hub import snapshot_download; snapshot_download('SparkAudio/Spark-TTS-0.5B', local_dir='pretrained_models/Spark-TTS-0.5B')"

FROM python:3.12-slim

# Build argument to determine whether to include models
ARG INCLUDE_MODELS=false

# Set working directory
WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
git-lfs \
ffmpeg \
libsndfile1 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Initialize git-lfs
RUN git lfs install

# Copy dependency file
COPY requirements.txt .

# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Create model directory
RUN mkdir -p pretrained_models

# Copy project files (layered copying to optimize caching)
COPY cli/ ./cli/
COPY sparktts/ ./sparktts/
COPY src/ ./src/
COPY example/ ./example/
COPY api/ ./api/
COPY webui.py .
COPY LICENSE README.md ./

# Display build argument value
RUN echo "INCLUDE_MODELS=${INCLUDE_MODELS}"

# Copy context
COPY . /tmp/context/

# Check if model directory exists
RUN if [ -d "/tmp/context/pretrained_models" ]; then \
echo "Found pretrained_models directory"; \
else \
echo "pretrained_models directory not found"; \
fi

# Decide whether to copy model files based on INCLUDE_MODELS parameter
RUN if [ "${INCLUDE_MODELS}" = "true" ]; then \
echo "Including models in the image"; \
if [ -d "/tmp/context/pretrained_models" ]; then \
cp -r /tmp/context/pretrained_models/* /app/pretrained_models/ || echo "No model files to copy"; \
else \
echo "Warning: pretrained_models directory not found in build context"; \
fi; \
else \
echo "Models will need to be mounted at runtime"; \
fi

# Clean up temporary directory
RUN rm -rf /tmp/context

# Create outputs directory for API
RUN mkdir -p /app/api/outputs && chmod 777 /app/api/outputs

# Set environment variables
ENV PYTHONPATH=/app
ENV SERVICE_TYPE=api

# Expose single port for both WebUI and API
EXPOSE 7860

# Make run_api.sh executable
RUN chmod +x /app/api/run_api.sh

# Set container startup command
CMD if [ "$SERVICE_TYPE" = "webui" ]; then \
python webui.py --device 0; \
else \
./api/run_api.sh; \
fi
63 changes: 63 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,69 @@ For additional CLI and Web UI methods, including alternative implementations and

- [CLI and UI by AcTePuKc](https://github.com/SparkAudio/Spark-TTS/issues/10)

**API Service**

Spark-TTS provides a FastAPI-based web API service for seamless integration with other applications.

1. **Running the API service in conda environment**:
```sh
# Make sure you're in the Spark-TTS conda environment
conda activate sparktts

# Execute from the project root directory
./api/run_api.sh
```
The API will be available at http://localhost:7860 by default.

2. **Docker support**:
You can build and run the Spark-TTS API using the provided build script:
```sh
# Build Docker images (both full and lite versions)
chmod +x docker_builder.sh
./docker_builder.sh

# Run the API service in the background
docker compose up -d api
# OR for the lite version with mounted models
docker compose up -d api-lite

# Run the WebUI service in the background
docker compose up -d webui
# OR for the lite version with mounted models
docker compose up -d webui-lite

# To check running containers
docker compose ps

# To stop services
docker compose down
```

> **Note**: If you encounter YAML errors like `mapping key "<<" already defined`, it might be due to compatibility issues with YAML merge keys in your Docker Compose version. You can either:
> 1. Update Docker to the latest version
> 2. Modify the docker-compose.yml file to use a different syntax for environment variable inheritance
> 3. Use the Docker CLI directly: `docker run -p 7860:7860 --gpus all spark-tts:latest-full`

For more customization options, see the environment variables in the docker-compose.yml file.

3. **Client Example**:
The repository includes an example client script that demonstrates how to interact with the API:
```sh
# Note: The example client requires librosa, which is not in requirements.txt
pip install librosa

# Basic usage
python api/example_client.py --text "Text to synthesize"

# Voice cloning with reference audio
python api/example_client.py --text "This is voice cloning" --prompt_audio example/prompt_audio.wav

# Voice creation with parameters
python api/example_client.py --text "This is voice creation" --gender female --pitch high --speed moderate
```

For more detailed information about the API service, including all available endpoints and parameters, please refer to the [API README](api/README.md).


## Runtime

Expand Down
31 changes: 31 additions & 0 deletions api/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Spark-TTS API Environment Variable Configuration Example
# Copy this file to .env and modify the configuration as needed

# === Service Configuration ===
SPARK_TTS_API_PORT=7860
SPARK_TTS_API_HOST=0.0.0.0
SPARK_TTS_API_DEBUG=False

# === Security Configuration ===
SPARK_TTS_API_KEY_NAME=X-SPARKTTS-API-KEY
SPARK_TTS_API_KEY=

# === TTS Model Configuration ===
SPARK_TTS_MODEL_DIR=pretrained_models/Spark-TTS-0.5B
# Device configuration:
# - cpu: Use CPU for inference
# - gpu: Use default GPU for inference
# - gpu:N: Use specific GPU (N is device ID) for inference
SPARK_TTS_DEVICE=gpu:0

# === Default Prompt Configuration ===
SPARK_TTS_DEFAULT_PROMPT_TEXT=吃燕窝就选燕之屋,本节目由26年专注高品质燕窝的燕之屋冠名播出。
SPARK_TTS_DEFAULT_PROMPT_SPEECH=example/prompt_audio.wav

# === Output Configuration ===
SPARK_TTS_OUTPUT_DIR=api/outputs
SPARK_TTS_OUTPUT_URL_PREFIX=/outputs

# === Cleanup Configuration ===
SPARK_TTS_CLEANUP_INTERVAL=3600
SPARK_TTS_FILE_EXPIRY_TIME=86400
Loading