diff --git a/builds/mineru-vllm/README.md b/builds/mineru-vllm/README.md index 50ef73a..61ea106 100644 --- a/builds/mineru-vllm/README.md +++ b/builds/mineru-vllm/README.md @@ -39,7 +39,7 @@ mineru -p demo.pdf -o ./output -b vlm-http-client -u http://localhost:30000 ## Configuration -- `MINERU_DOCKER_IMAGE`: The Docker image for MinerU, default is `alexsuntop/mineru:2.6.4`. +- `MINERU_VERSION`: The version for MinerU, default is `2.6.4`. - `MINERU_PORT_OVERRIDE_VLLM`: The host port for the VLLM server, default is `30000`. - `MINERU_PORT_OVERRIDE_API`: The host port for the API service, default is `8000`. - `MINERU_PORT_OVERRIDE_GRADIO`: The host port for the Gradio WebUI, default is `7860`. diff --git a/builds/mineru-vllm/README.zh.md b/builds/mineru-vllm/README.zh.md index dbd593c..ea0c934 100644 --- a/builds/mineru-vllm/README.zh.md +++ b/builds/mineru-vllm/README.zh.md @@ -39,7 +39,7 @@ mineru -p demo.pdf -o ./output -b vlm-http-client -u http://localhost:30000 ## 配置 -- `MINERU_DOCKER_IMAGE`: MinerU 的 Docker 镜像,默认为 `alexsuntop/mineru:2.6.4`。 +- `MINERU_VERSION`: MinerU 的 Docker 镜像版本,默认为 `2.6.4`。 - `MINERU_PORT_OVERRIDE_VLLM`: VLLM 服务器的主机端口,默认为 `30000`。 - `MINERU_PORT_OVERRIDE_API`: API 服务的主机端口,默认为 `8000`。 - `MINERU_PORT_OVERRIDE_GRADIO`: Gradio WebUI 的主机端口,默认为 `7860`。 diff --git a/builds/mineru-vllm/docker-compose.yaml b/builds/mineru-vllm/docker-compose.yaml index 8b45518..102649c 100644 --- a/builds/mineru-vllm/docker-compose.yaml +++ b/builds/mineru-vllm/docker-compose.yaml @@ -8,7 +8,7 @@ x-defaults: &defaults x-mineru-vllm: &mineru-vllm <<: *defaults - image: ${MINERU_DOCKER_IMAGE:-alexsuntop/mineru:2.6.4} + image: ${GLOBAL_REGISTRY:-}alexsuntop/mineru:${MINERU_VERSION:-2.6.4} build: context: . dockerfile: Dockerfile diff --git a/builds/nexa-sdk/.env.example b/builds/nexa-sdk/.env.example new file mode 100644 index 0000000..63e7057 --- /dev/null +++ b/builds/nexa-sdk/.env.example @@ -0,0 +1,41 @@ +# Global registry for container images (optional) +# GLOBAL_REGISTRY= + +# Nexa SDK version +NEXA_SDK_VERSION=latest + +# Timezone configuration +TZ=UTC + +# Port override for host binding +NEXA_SDK_PORT_OVERRIDE=8080 + +# Server configuration +NEXA_HOST=0.0.0.0:8080 +NEXA_KEEPALIVE=300 +NEXA_ORIGINS=* + +# HuggingFace token for accessing private models (optional) +NEXA_HFTOKEN= + +# Logging level (none, debug, info, warn, error) +NEXA_LOG=none + +# Model to run (can be any Nexa-compatible model) +# Examples: gemma-2-2b-instruct, qwen3-4b, llama-3-8b, mistral-7b +NEXA_MODEL=gemma-2-2b-instruct + +# GPU configuration (for gpu profile only) +# Number of GPU layers to offload (-1 for all layers) +NEXA_GPU_LAYERS=-1 + +# Shared memory size +NEXA_SHM_SIZE=2g + +# Resource limits +NEXA_SDK_CPU_LIMIT=4.0 +NEXA_SDK_MEMORY_LIMIT=8G + +# Resource reservations +NEXA_SDK_CPU_RESERVATION=2.0 +NEXA_SDK_MEMORY_RESERVATION=4G diff --git a/builds/nexa-sdk/Dockerfile b/builds/nexa-sdk/Dockerfile index c7332e4..6e2ca15 100644 --- a/builds/nexa-sdk/Dockerfile +++ b/builds/nexa-sdk/Dockerfile @@ -1,6 +1,8 @@ # https://github.com/NexaAI/nexa-sdk/issues/684 -FROM ubuntu:24.04 -#FROM nvidia/cuda:12.4.1-base-ubuntu22.04 +FROM ubuntu:22.04 RUN apt update && apt install -y libgomp1 curl ffmpeg sox RUN curl -fsSL https://github.com/NexaAI/nexa-sdk/releases/latest/download/nexa-cli_linux_x86_64.sh | sh + +EXPOSE 8080 +CMD [ "nexa", "serve", "--host", "0.0.0.0:8080" ] diff --git a/builds/nexa-sdk/Dockerfile.cuda b/builds/nexa-sdk/Dockerfile.cuda new file mode 100644 index 0000000..8fe5df7 --- /dev/null +++ b/builds/nexa-sdk/Dockerfile.cuda @@ -0,0 +1,8 @@ +# https://github.com/NexaAI/nexa-sdk/issues/684 +FROM nvidia/cuda:12.8.1-cudnn-runtime-ubuntu22.04 + +RUN apt update && apt install -y libgomp1 curl ffmpeg sox +RUN curl -fsSL https://github.com/NexaAI/nexa-sdk/releases/latest/download/nexa-cli_linux_x86_64.sh | sh + +EXPOSE 8080 +CMD [ "nexa", "serve", "--host", "0.0.0.0:8080" ] diff --git a/builds/nexa-sdk/README.md b/builds/nexa-sdk/README.md new file mode 100644 index 0000000..687c657 --- /dev/null +++ b/builds/nexa-sdk/README.md @@ -0,0 +1,233 @@ +# Nexa SDK + +Nexa SDK is a comprehensive toolkit for running AI models locally. It provides inference for various model types including LLM, VLM (Vision Language Models), TTS (Text-to-Speech), ASR (Automatic Speech Recognition), and more. Built with performance in mind, it supports both CPU and GPU acceleration. + +## Features + +- **Multi-Model Support**: Run LLM, VLM, TTS, ASR, embedding, reranking, and image generation models +- **OpenAI-Compatible API**: Provides standard OpenAI API endpoints for easy integration +- **GPU Acceleration**: Optional GPU support via NVIDIA CUDA for faster inference +- **Resource Management**: Configurable CPU/memory limits and GPU layer offloading +- **Model Caching**: Persistent model storage for faster startup +- **Profile Support**: Easy switching between CPU-only and GPU-accelerated modes + +## Quick Start + +### Prerequisites + +- Docker and Docker Compose +- For GPU support: NVIDIA Docker runtime and compatible GPU + +### Basic Usage (CPU) + +```bash +# Copy environment file +cp .env.example .env + +# Edit .env to configure your model and settings +# NEXA_MODEL=gemma-2-2b-instruct + +# Start the service with CPU profile +docker compose --profile cpu up -d +``` + +### GPU-Accelerated Usage + +```bash +# Copy environment file +cp .env.example .env + +# Configure for GPU usage +# NEXA_MODEL=gemma-2-2b-instruct +# NEXA_GPU_LAYERS=-1 # -1 means all layers on GPU + +# Start the service with GPU profile +docker compose --profile gpu up -d +``` + +## Configuration + +### Environment Variables + +| Variable | Default | Description | +| ------------------------ | --------------------- | ------------------------------------------------------ | +| `NEXA_SDK_VERSION` | `latest` | Nexa SDK Docker image version | +| `NEXA_SDK_PORT_OVERRIDE` | `8080` | Host port for API access | +| `NEXA_MODEL` | `gemma-2-2b-instruct` | Model to load (e.g., qwen3-4b, llama-3-8b, mistral-7b) | +| `NEXA_HOST` | `0.0.0.0:8080` | Server bind address | +| `NEXA_KEEPALIVE` | `300` | Model keepalive timeout in seconds | +| `NEXA_ORIGINS` | `*` | CORS allowed origins | +| `NEXA_HFTOKEN` | - | HuggingFace token for private models | +| `NEXA_LOG` | `none` | Logging level (none, debug, info, warn, error) | +| `NEXA_GPU_LAYERS` | `-1` | GPU layers to offload (-1 = all, 0 = CPU only) | +| `NEXA_SHM_SIZE` | `2g` | Shared memory size | +| `TZ` | `UTC` | Container timezone | + +### Resource Limits + +| Variable | Default | Description | +| ----------------------------- | ------- | ------------------ | +| `NEXA_SDK_CPU_LIMIT` | `4.0` | Maximum CPU cores | +| `NEXA_SDK_MEMORY_LIMIT` | `8G` | Maximum memory | +| `NEXA_SDK_CPU_RESERVATION` | `2.0` | Reserved CPU cores | +| `NEXA_SDK_MEMORY_RESERVATION` | `4G` | Reserved memory | + +### Profiles + +- `cpu`: Run with CPU-only inference (default profile needed) +- `gpu`: Run with GPU acceleration (requires NVIDIA GPU) + +## Usage Examples + +### Test the API + +```bash +# Check available models +curl http://localhost:8080/v1/models + +# Chat completion +curl http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gemma-2-2b-instruct", + "messages": [ + {"role": "user", "content": "Hello!"} + ] + }' +``` + +### Using Different Models + +Edit `.env` to change the model: + +```bash +# Small models for limited resources +NEXA_MODEL=gemma-2-2b-instruct +# or +NEXA_MODEL=qwen3-4b + +# Larger models for better quality +NEXA_MODEL=llama-3-8b +# or +NEXA_MODEL=mistral-7b +``` + +### GPU Configuration + +For GPU acceleration, adjust the number of layers: + +```bash +# Offload all layers to GPU (fastest) +NEXA_GPU_LAYERS=-1 + +# Offload 30 layers (hybrid mode) +NEXA_GPU_LAYERS=30 + +# CPU only +NEXA_GPU_LAYERS=0 +``` + +## Model Management + +Models are automatically downloaded on first run and cached in the `nexa_models` volume. The default cache location inside the container is `/root/.cache/nexa`. + +To use a different model: + +1. Update `NEXA_MODEL` in `.env` +2. Restart the service: `docker compose --profile restart` + +## API Endpoints + +Nexa SDK provides OpenAI-compatible API endpoints: + +- `GET /v1/models` - List available models +- `POST /v1/chat/completions` - Chat completions +- `POST /v1/completions` - Text completions +- `POST /v1/embeddings` - Text embeddings +- `GET /health` - Health check +- `GET /docs` - API documentation (Swagger UI) + +## Troubleshooting + +### Out of Memory + +Increase memory limits or use a smaller model: + +```bash +NEXA_SDK_MEMORY_LIMIT=16G +NEXA_SDK_MEMORY_RESERVATION=8G +# Or switch to a smaller model +NEXA_MODEL=gemma-2-2b-instruct +``` + +### GPU Not Detected + +Ensure NVIDIA Docker runtime is installed: + +```bash +# Check GPU availability +docker run --rm --gpus all nvidia/cuda:12.8.1-base-ubuntu22.04 nvidia-smi +``` + +### Model Download Issues + +Set HuggingFace token if accessing private models: + +```bash +NEXA_HFTOKEN=your_hf_token_here +``` + +### Slow Performance + +- Use GPU profile for better performance +- Increase `NEXA_GPU_LAYERS` to offload more computation to GPU +- Allocate more resources or use a smaller model + +## Advanced Configuration + +### Custom Model Path + +If you want to use local model files, mount them as a volume: + +```yaml +volumes: + - ./models:/models + - nexa_models:/root/.cache/nexa +``` + +Then reference the model by its path in the command. + +### HTTPS Configuration + +Set environment variables for HTTPS: + +```bash +NEXA_ENABLEHTTPS=true +``` + +Mount certificate files: + +```yaml +volumes: + - ./certs/cert.pem:/app/cert.pem:ro + - ./certs/key.pem:/app/key.pem:ro +``` + +## Health Check + +The service includes a health check that verifies the API is responding: + +```bash +curl http://localhost:8080/v1/models +``` + +## License + +Nexa SDK is developed by Nexa AI. Please refer to the [official repository](https://github.com/NexaAI/nexa-sdk) for license information. + +## Links + +- [Official Repository](https://github.com/NexaAI/nexa-sdk) +- [Nexa AI Website](https://nexa.ai) +- [Documentation](https://docs.nexa.ai) +- [Model Hub](https://sdk.nexa.ai) diff --git a/builds/nexa-sdk/README.zh.md b/builds/nexa-sdk/README.zh.md new file mode 100644 index 0000000..18b1bb3 --- /dev/null +++ b/builds/nexa-sdk/README.zh.md @@ -0,0 +1,233 @@ +# Nexa SDK + +Nexa SDK 是一个功能全面的本地 AI 模型运行工具包。它支持多种模型类型的推理,包括 LLM、VLM(视觉语言模型)、TTS(文本转语音)、ASR(自动语音识别)等。该工具专注于性能优化,支持 CPU 和 GPU 加速。 + +## 特性 + +- **多模型支持**:运行 LLM、VLM、TTS、ASR、嵌入、重排序和图像生成模型 +- **OpenAI 兼容 API**:提供标准的 OpenAI API 端点,便于集成 +- **GPU 加速**:通过 NVIDIA CUDA 提供可选的 GPU 支持,实现更快的推理速度 +- **资源管理**:可配置的 CPU/内存限制和 GPU 层卸载 +- **模型缓存**:持久化模型存储,加快启动速度 +- **配置文件支持**:轻松在 CPU 模式和 GPU 加速模式之间切换 + +## 快速开始 + +### 前置要求 + +- Docker 和 Docker Compose +- GPU 支持需要:NVIDIA Docker runtime 和兼容的 GPU + +### 基本使用(CPU) + +```bash +# 复制环境配置文件 +cp .env.example .env + +# 编辑 .env 配置模型和设置 +# NEXA_MODEL=gemma-2-2b-instruct + +# 使用 CPU 配置文件启动服务 +docker compose --profile cpu up -d +``` + +### GPU 加速使用 + +```bash +# 复制环境配置文件 +cp .env.example .env + +# 配置 GPU 使用 +# NEXA_MODEL=gemma-2-2b-instruct +# NEXA_GPU_LAYERS=-1 # -1 表示所有层都在 GPU 上 + +# 使用 GPU 配置文件启动服务 +docker compose --profile gpu up -d +``` + +## 配置 + +### 环境变量 + +| 变量 | 默认值 | 说明 | +| ------------------------ | --------------------- | --------------------------------------------------- | +| `NEXA_SDK_VERSION` | `latest` | Nexa SDK Docker 镜像版本 | +| `NEXA_SDK_PORT_OVERRIDE` | `8080` | API 访问的主机端口 | +| `NEXA_MODEL` | `gemma-2-2b-instruct` | 要加载的模型(如 qwen3-4b、llama-3-8b、mistral-7b) | +| `NEXA_HOST` | `0.0.0.0:8080` | 服务器绑定地址 | +| `NEXA_KEEPALIVE` | `300` | 模型保活超时时间(秒) | +| `NEXA_ORIGINS` | `*` | CORS 允许的源 | +| `NEXA_HFTOKEN` | - | 用于私有模型的 HuggingFace 令牌 | +| `NEXA_LOG` | `none` | 日志级别(none、debug、info、warn、error) | +| `NEXA_GPU_LAYERS` | `-1` | 卸载到 GPU 的层数(-1 = 全部,0 = 仅 CPU) | +| `NEXA_SHM_SIZE` | `2g` | 共享内存大小 | +| `TZ` | `UTC` | 容器时区 | + +### 资源限制 + +| 变量 | 默认值 | 说明 | +| ----------------------------- | ------ | --------------- | +| `NEXA_SDK_CPU_LIMIT` | `4.0` | 最大 CPU 核心数 | +| `NEXA_SDK_MEMORY_LIMIT` | `8G` | 最大内存 | +| `NEXA_SDK_CPU_RESERVATION` | `2.0` | 预留 CPU 核心数 | +| `NEXA_SDK_MEMORY_RESERVATION` | `4G` | 预留内存 | + +### 配置文件 + +- `cpu`:使用 CPU 推理运行(需要指定默认配置文件) +- `gpu`:使用 GPU 加速运行(需要 NVIDIA GPU) + +## 使用示例 + +### 测试 API + +```bash +# 检查可用模型 +curl http://localhost:8080/v1/models + +# 聊天完成 +curl http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gemma-2-2b-instruct", + "messages": [ + {"role": "user", "content": "你好!"} + ] + }' +``` + +### 使用不同的模型 + +编辑 `.env` 更改模型: + +```bash +# 资源受限时使用小模型 +NEXA_MODEL=gemma-2-2b-instruct +# 或 +NEXA_MODEL=qwen3-4b + +# 追求更好质量时使用大模型 +NEXA_MODEL=llama-3-8b +# 或 +NEXA_MODEL=mistral-7b +``` + +### GPU 配置 + +对于 GPU 加速,调整层数: + +```bash +# 将所有层卸载到 GPU(最快) +NEXA_GPU_LAYERS=-1 + +# 卸载 30 层(混合模式) +NEXA_GPU_LAYERS=30 + +# 仅 CPU +NEXA_GPU_LAYERS=0 +``` + +## 模型管理 + +模型会在首次运行时自动下载,并缓存在 `nexa_models` 卷中。容器内的默认缓存位置是 `/root/.cache/nexa`。 + +要使用不同的模型: + +1. 在 `.env` 中更新 `NEXA_MODEL` +2. 重启服务:`docker compose --profile restart` + +## API 端点 + +Nexa SDK 提供 OpenAI 兼容的 API 端点: + +- `GET /v1/models` - 列出可用模型 +- `POST /v1/chat/completions` - 聊天完成 +- `POST /v1/completions` - 文本完成 +- `POST /v1/embeddings` - 文本嵌入 +- `GET /health` - 健康检查 +- `GET /docs` - API 文档(Swagger UI) + +## 故障排除 + +### 内存不足 + +增加内存限制或使用更小的模型: + +```bash +NEXA_SDK_MEMORY_LIMIT=16G +NEXA_SDK_MEMORY_RESERVATION=8G +# 或切换到更小的模型 +NEXA_MODEL=gemma-2-2b-instruct +``` + +### GPU 未检测到 + +确保已安装 NVIDIA Docker runtime: + +```bash +# 检查 GPU 可用性 +docker run --rm --gpus all nvidia/cuda:12.8.1-base-ubuntu22.04 nvidia-smi +``` + +### 模型下载问题 + +如果访问私有模型,设置 HuggingFace 令牌: + +```bash +NEXA_HFTOKEN=your_hf_token_here +``` + +### 性能缓慢 + +- 使用 GPU 配置文件以获得更好的性能 +- 增加 `NEXA_GPU_LAYERS` 以将更多计算卸载到 GPU +- 分配更多资源或使用更小的模型 + +## 高级配置 + +### 自定义模型路径 + +如果要使用本地模型文件,将它们挂载为卷: + +```yaml +volumes: + - ./models:/models + - nexa_models:/root/.cache/nexa +``` + +然后在命令中通过路径引用模型。 + +### HTTPS 配置 + +设置 HTTPS 的环境变量: + +```bash +NEXA_ENABLEHTTPS=true +``` + +挂载证书文件: + +```yaml +volumes: + - ./certs/cert.pem:/app/cert.pem:ro + - ./certs/key.pem:/app/key.pem:ro +``` + +## 健康检查 + +服务包含验证 API 是否响应的健康检查: + +```bash +curl http://localhost:8080/v1/models +``` + +## 许可证 + +Nexa SDK 由 Nexa AI 开发。许可证信息请参考[官方仓库](https://github.com/NexaAI/nexa-sdk)。 + +## 链接 + +- [官方仓库](https://github.com/NexaAI/nexa-sdk) +- [Nexa AI 网站](https://nexa.ai) +- [文档](https://docs.nexa.ai) +- [模型中心](https://sdk.nexa.ai) diff --git a/builds/nexa-sdk/docker-compose.yaml b/builds/nexa-sdk/docker-compose.yaml new file mode 100644 index 0000000..a9ad3f1 --- /dev/null +++ b/builds/nexa-sdk/docker-compose.yaml @@ -0,0 +1,94 @@ +x-defaults: &defaults + restart: unless-stopped + logging: + driver: json-file + options: + max-size: 100m + max-file: "3" + +services: + nexa-sdk: + <<: *defaults + build: + context: . + dockerfile: Dockerfile + image: ${GLOBAL_REGISTRY:-}alexsuntop/nexa-sdk:${NEXA_SDK_CPU_VERSION:-0.2.57} + ports: + - "${NEXA_SDK_PORT_OVERRIDE:-8080}:8080" + volumes: + - nexa_models:/root/.cache/nexa + environment: + - TZ=${TZ:-UTC} + - NEXA_HOST=${NEXA_HOST:-0.0.0.0:8080} + - NEXA_KEEPALIVE=${NEXA_KEEPALIVE:-300} + - NEXA_ORIGINS=${NEXA_ORIGINS:-*} + - NEXA_HFTOKEN=${NEXA_HFTOKEN:-} + - NEXA_LOG=${NEXA_LOG:-none} + command: > + nexa server + ${NEXA_MODEL:-gemma-2-2b-instruct} + ipc: host + shm_size: ${NEXA_SHM_SIZE:-2g} + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/v1/models"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + deploy: + resources: + limits: + cpus: ${NEXA_SDK_CPU_LIMIT:-4.0} + memory: ${NEXA_SDK_MEMORY_LIMIT:-8G} + reservations: + cpus: ${NEXA_SDK_CPU_RESERVATION:-2.0} + memory: ${NEXA_SDK_MEMORY_RESERVATION:-4G} + profiles: + - cpu + + nexa-sdk-cuda: + <<: *defaults + build: + context: . + dockerfile: Dockerfile.cuda + image: ${GLOBAL_REGISTRY:-}alexsuntop/nexa-sdk:${NEXA_SDK_CUDA_VERSION:-0.2.57-cuda} + ports: + - "${NEXA_SDK_PORT_OVERRIDE:-8080}:8080" + volumes: + - nexa_models:/root/.cache/nexa + environment: + - TZ=${TZ:-UTC} + - NEXA_HOST=${NEXA_HOST:-0.0.0.0:8080} + - NEXA_KEEPALIVE=${NEXA_KEEPALIVE:-300} + - NEXA_ORIGINS=${NEXA_ORIGINS:-*} + - NEXA_HFTOKEN=${NEXA_HFTOKEN:-} + - NEXA_LOG=${NEXA_LOG:-none} + command: > + nexa server + ${NEXA_MODEL:-gemma-2-2b-instruct} + -ngl ${NEXA_GPU_LAYERS:--1} + ipc: host + shm_size: ${NEXA_SHM_SIZE:-2g} + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/v1/models"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + deploy: + resources: + limits: + cpus: ${NEXA_SDK_CPU_LIMIT:-4.0} + memory: ${NEXA_SDK_MEMORY_LIMIT:-8G} + reservations: + cpus: ${NEXA_SDK_CPU_RESERVATION:-2.0} + memory: ${NEXA_SDK_MEMORY_RESERVATION:-4G} + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] + profiles: + - cuda + +volumes: + nexa_models: