SparkAudio · breakstring · Mar 7, 2025 · Mar 7, 2025 · Mar 8, 2025 · Mar 12, 2025
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,75 @@
+# Git
+.git
+.github
+.gitignore
+.gitattributes
+
+# Docker
+.dockerignore
+Dockerfile
+docker-compose.yml
+docker_builder.sh
+
+# CI/CD and development files
+.circleci/
+.travis.yml
+.env
+*.md
+!README.md
+!LICENSE
+docs/
+tests/
+
+# Virtual environments
+venv/
+env/
+.venv/
+.env/
+.python-version
+
+# Python cache
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+.nox/
+.hypothesis/
+.eggs/
+*.egg-info/
+*.egg
+
+# IDE specific files
+.idea/
+.vscode/
+*.swp
+*.swo
+.DS_Store
+
+# Temporary files
+temp/
+tmp/
+*.tmp
+*.log
+
+# API outputs (these should be created at runtime)
+api/outputs/
+
+# Local model directories (only include if specified)
+# Uncomment if you never want to include models
+# pretrained_models/
+
+# Jupyter Notebooks
+.ipynb_checkpoints
+*.ipynb
+
+# Large unnecessary files
+*.wav
+*.wav.zip
+*.mp3
+*.mp4
+*.tar.gz
+output/ 
diff --git a/.gitignore b/.gitignore
@@ -172,3 +172,7 @@ cython_debug/
 
 # PyPI configuration file
 .pypirc
+
+
+api/.env
+api/outputs/
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,119 @@
+# Usage Instructions
+# 1. Recommended way to build all images at once:
+# ./docker_builder.sh 
+# This creates: spark-tts:latest-lite, spark-tts:latest (alias of latest-lite), and spark-tts:latest-full
+#
+# 2. Manual build without models: 
+# docker build -t spark-tts:latest-lite .
+# docker tag spark-tts:latest-lite spark-tts:latest
+#
+# 3. Manual build with models: 
+# docker build --build-arg INCLUDE_MODELS=true -t spark-tts:latest-full .
+#
+# 4. Run container without models (needs to mount models): 
+# docker run -p 7860:7860 --gpus all -v /local/path/pretrained_models:/app/pretrained_models spark-tts:latest-lite
+#
+# 5. Run container with models: 
+# docker run -p 7860:7860 --gpus all spark-tts:latest-full
+# 
+# 6. Run with API (default):
+# docker run -p 7860:7860 --gpus all -e SERVICE_TYPE=api spark-tts:latest-full
+#
+# 7. Run with WebUI:
+# docker run -p 7860:7860 --gpus all -e SERVICE_TYPE=webui spark-tts:latest-full
+#
+# 8. Use docker-compose for more advanced configurations:
+# docker-compose up api # Run API service
+# docker-compose up webui # Run WebUI service
+#
+# Note:
+# - NVIDIA Container Toolkit must be installed on the host to support GPU
+# - If using an image without models, you can provide models in the following ways:
+# a) Mount the model directory from the host: docker run -p 7860:7860 --gpus all -v /local/path/pretrained_models:/app/pretrained_models spark-tts:latest-lite
+# b) Download models inside the container: python -c "from huggingface_hub import snapshot_download; snapshot_download('SparkAudio/Spark-TTS-0.5B', local_dir='pretrained_models/Spark-TTS-0.5B')" 
+
+FROM python:3.12-slim
+
+# Build argument to determine whether to include models
+ARG INCLUDE_MODELS=false
+
+# Set working directory
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ git \
+ git-lfs \
+ ffmpeg \
+ libsndfile1 \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+# Initialize git-lfs
+RUN git lfs install
+
+# Copy dependency file
+COPY requirements.txt .
+
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Create model directory
+RUN mkdir -p pretrained_models
+
+# Copy project files (layered copying to optimize caching)
+COPY cli/ ./cli/
+COPY sparktts/ ./sparktts/
+COPY src/ ./src/
+COPY example/ ./example/
+COPY api/ ./api/
+COPY webui.py .
+COPY LICENSE README.md ./
+
+# Display build argument value
+RUN echo "INCLUDE_MODELS=${INCLUDE_MODELS}"
+
+# Copy context
+COPY . /tmp/context/
+
+# Check if model directory exists
+RUN if [ -d "/tmp/context/pretrained_models" ]; then \
+ echo "Found pretrained_models directory"; \
+else \
+ echo "pretrained_models directory not found"; \
+fi
+
+# Decide whether to copy model files based on INCLUDE_MODELS parameter
+RUN if [ "${INCLUDE_MODELS}" = "true" ]; then \
+ echo "Including models in the image"; \
+ if [ -d "/tmp/context/pretrained_models" ]; then \
+ cp -r /tmp/context/pretrained_models/* /app/pretrained_models/ || echo "No model files to copy"; \
+ else \
+ echo "Warning: pretrained_models directory not found in build context"; \
+ fi; \
+else \
+ echo "Models will need to be mounted at runtime"; \
+fi
+
+# Clean up temporary directory
+RUN rm -rf /tmp/context
+
+# Create outputs directory for API
+RUN mkdir -p /app/api/outputs && chmod 777 /app/api/outputs
+
+# Set environment variables
+ENV PYTHONPATH=/app
+ENV SERVICE_TYPE=api
+
+# Expose single port for both WebUI and API
+EXPOSE 7860
+
+# Make run_api.sh executable
+RUN chmod +x /app/api/run_api.sh
+
+# Set container startup command
+CMD if [ "$SERVICE_TYPE" = "webui" ]; then \
+ python webui.py --device 0; \
+else \
+ ./api/run_api.sh; \
+fi 
diff --git a/README.md b/README.md
@@ -142,6 +142,69 @@ For additional CLI and Web UI methods, including alternative implementations and
 
 - [CLI and UI by AcTePuKc](https://github.com/SparkAudio/Spark-TTS/issues/10)
 
+**API Service**
+
+Spark-TTS provides a FastAPI-based web API service for seamless integration with other applications.
+
+1. **Running the API service in conda environment**:
+ ```sh
+ # Make sure you're in the Spark-TTS conda environment
+ conda activate sparktts
+
+ # Execute from the project root directory
+ ./api/run_api.sh
+ ```
+ The API will be available at http://localhost:7860 by default.
+
+2. **Docker support**:
+ You can build and run the Spark-TTS API using the provided build script:
+ ```sh
+ # Build Docker images (both full and lite versions)
+ chmod +x docker_builder.sh
+ ./docker_builder.sh
+
+ # Run the API service in the background
+ docker compose up -d api
+ # OR for the lite version with mounted models
+ docker compose up -d api-lite
+
+ # Run the WebUI service in the background
+ docker compose up -d webui
+ # OR for the lite version with mounted models
+ docker compose up -d webui-lite
+
+ # To check running containers
+ docker compose ps
+
+ # To stop services
+ docker compose down
+ ```
+
+ > **Note**: If you encounter YAML errors like `mapping key "<<" already defined`, it might be due to compatibility issues with YAML merge keys in your Docker Compose version. You can either:
+ > 1. Update Docker to the latest version
+ > 2. Modify the docker-compose.yml file to use a different syntax for environment variable inheritance
+ > 3. Use the Docker CLI directly: `docker run -p 7860:7860 --gpus all spark-tts:latest-full`
+
+ For more customization options, see the environment variables in the docker-compose.yml file.
+
+3. **Client Example**:
+ The repository includes an example client script that demonstrates how to interact with the API:
+ ```sh
+ # Note: The example client requires librosa, which is not in requirements.txt
+ pip install librosa
+
+ # Basic usage
+ python api/example_client.py --text "Text to synthesize"
+
+ # Voice cloning with reference audio
+ python api/example_client.py --text "This is voice cloning" --prompt_audio example/prompt_audio.wav
+
+ # Voice creation with parameters
+ python api/example_client.py --text "This is voice creation" --gender female --pitch high --speed moderate
+ ```
+
+For more detailed information about the API service, including all available endpoints and parameters, please refer to the [API README](api/README.md).
+
 
 ## Runtime
 

diff --git a/api/.env.example b/api/.env.example
@@ -0,0 +1,31 @@
+# Spark-TTS API Environment Variable Configuration Example
+# Copy this file to .env and modify the configuration as needed
+
+# === Service Configuration ===
+SPARK_TTS_API_PORT=7860
+SPARK_TTS_API_HOST=0.0.0.0
+SPARK_TTS_API_DEBUG=False
+
+# === Security Configuration ===
+SPARK_TTS_API_KEY_NAME=X-SPARKTTS-API-KEY
+SPARK_TTS_API_KEY=
+
+# === TTS Model Configuration ===
+SPARK_TTS_MODEL_DIR=pretrained_models/Spark-TTS-0.5B
+# Device configuration: 
+# - cpu: Use CPU for inference
+# - gpu: Use default GPU for inference
+# - gpu:N: Use specific GPU (N is device ID) for inference
+SPARK_TTS_DEVICE=gpu:0
+
+# === Default Prompt Configuration ===
+SPARK_TTS_DEFAULT_PROMPT_TEXT=吃燕窝就选燕之屋，本节目由26年专注高品质燕窝的燕之屋冠名播出。
+SPARK_TTS_DEFAULT_PROMPT_SPEECH=example/prompt_audio.wav
+
+# === Output Configuration ===
+SPARK_TTS_OUTPUT_DIR=api/outputs
+SPARK_TTS_OUTPUT_URL_PREFIX=/outputs
+
+# === Cleanup Configuration ===
+SPARK_TTS_CLEANUP_INTERVAL=3600
+SPARK_TTS_FILE_EXPIRY_TIME=86400