feat: add phoenix & trigger.dev

2025-12-09 15:19:11 +08:00
parent 0b11022ef8
commit 8f30f94184
20 changed files with 1726 additions and 617 deletions
--- a/src/nexa-sdk/.env.example
+++ b/src/nexa-sdk/.env.example
@@ -0,0 +1,20 @@
+# NexaSDK Docker Configuration
+
+# Image version (e.g., v0.2.62, v0.2.62-cuda, latest, latest-cuda)
+NEXA_SDK_VERSION=v0.2.62
+
+# Host port for NexaSDK REST API
+NEXA_SDK_PORT_OVERRIDE=18181
+
+# Nexa API token (required for model access)
+# Obtain from https://sdk.nexa.ai -> Deployment -> Create Token
+NEXA_TOKEN=
+
+# Timezone
+TZ=UTC
+
+# Resource limits
+NEXA_SDK_CPU_LIMIT=4.0
+NEXA_SDK_MEMORY_LIMIT=8G
+NEXA_SDK_CPU_RESERVATION=1.0
+NEXA_SDK_MEMORY_RESERVATION=2G
--- a/src/nexa-sdk/README.md
+++ b/src/nexa-sdk/README.md
@@ -0,0 +1,105 @@
+# NexaSDK
+
+[English](./README.md) | [中文](./README.zh.md)
+
+This service deploys NexaSDK Docker for running AI models with OpenAI-compatible REST API. Supports LLM, Embeddings, Reranking, Computer Vision, and ASR models.
+
+## Features
+
+- **OpenAI-compatible API**: Drop-in replacement for OpenAI API endpoints
+- **Multiple Model Types**: LLM, VLM, Embeddings, Reranking, CV, ASR
+- **GPU Acceleration**: CUDA support for NVIDIA GPUs
+- **NPU Support**: Optimized for Qualcomm NPU on ARM64
+
+## Supported Models
+
+| Modality      | Models                                                  |
+| ------------- | ------------------------------------------------------- |
+| **LLM**       | `NexaAI/LFM2-1.2B-npu`, `NexaAI/Granite-4.0-h-350M-NPU` |
+| **VLM**       | `NexaAI/OmniNeural-4B`                                  |
+| **Embedding** | `NexaAI/embeddinggemma-300m-npu`, `NexaAI/EmbedNeural`  |
+| **Rerank**    | `NexaAI/jina-v2-rerank-npu`                             |
+| **CV**        | `NexaAI/yolov12-npu`, `NexaAI/convnext-tiny-npu-IoT`    |
+| **ASR**       | `NexaAI/parakeet-tdt-0.6b-v3-npu`                       |
+
+## Usage
+
+### CPU Mode
+
+```bash
+docker compose up -d
+```
+
+### GPU Mode (CUDA)
+
+```bash
+docker compose --profile gpu up -d nexa-sdk-cuda
+```
+
+### Pull a Model
+
+```bash
+docker exec -it nexa-sdk nexa pull NexaAI/Granite-4.0-h-350M-NPU
+```
+
+### Interactive CLI
+
+```bash
+docker exec -it nexa-sdk nexa infer NexaAI/Granite-4.0-h-350M-NPU
+```
+
+### API Examples
+
+- Chat completions:
+
+  ```bash
+  curl -X POST http://localhost:18181/v1/chat/completions \
+    -H "Content-Type: application/json" \
+    -d '{
+      "model": "NexaAI/Granite-4.0-h-350M-NPU",
+      "messages": [{"role": "user", "content": "Hello!"}]
+    }'
+  ```
+
+- Embeddings:
+
+  ```bash
+  curl -X POST http://localhost:18181/v1/embeddings \
+    -H "Content-Type: application/json" \
+    -d '{
+      "model": "NexaAI/EmbedNeural",
+      "input": "Hello, world!"
+    }'
+  ```
+
+- Swagger UI: Visit `http://localhost:18181/docs/ui`
+
+## Services
+
+- `nexa-sdk`: CPU-based NexaSDK service (default)
+- `nexa-sdk-cuda`: GPU-accelerated service with CUDA support (profile: `gpu`)
+
+## Configuration
+
+| Variable                 | Description               | Default   |
+| ------------------------ | ------------------------- | --------- |
+| `NEXA_SDK_VERSION`       | NexaSDK image version     | `v0.2.62` |
+| `NEXA_SDK_PORT_OVERRIDE` | Host port for REST API    | `18181`   |
+| `NEXA_TOKEN`             | Nexa API token (required) | -         |
+| `TZ`                     | Timezone                  | `UTC`     |
+
+## Volumes
+
+- `nexa_data`: Volume for storing downloaded models and data
+
+## Getting a Token
+
+1. Create an account at [sdk.nexa.ai](https://sdk.nexa.ai)
+2. Go to **Deployment → Create Token**
+3. Copy the token to your `.env` file
+
+## References
+
+- [NexaSDK Documentation](https://docs.nexa.ai/nexa-sdk-docker/overview)
+- [Docker Hub](https://hub.docker.com/r/nexa4ai/nexasdk)
+- [Supported Models](https://docs.nexa.ai/nexa-sdk-docker/overview#supported-models)
--- a/src/nexa-sdk/README.zh.md
+++ b/src/nexa-sdk/README.zh.md
@@ -0,0 +1,105 @@
+# NexaSDK
+
+[English](./README.md) | [中文](./README.zh.md)
+
+此服务用于部署 NexaSDK Docker，运行兼容 OpenAI 的 REST API 的 AI 模型。支持 LLM、Embeddings、Reranking、计算机视觉和 ASR 模型。
+
+## 特性
+
+- **OpenAI 兼容 API**：可直接替换 OpenAI API 端点
+- **多种模型类型**：LLM、VLM、Embeddings、Reranking、CV、ASR
+- **GPU 加速**：支持 NVIDIA GPU 的 CUDA 加速
+- **NPU 支持**：针对 ARM64 上的 Qualcomm NPU 优化
+
+## 支持的模型
+
+| 类型          | 模型                                                    |
+| ------------- | ------------------------------------------------------- |
+| **LLM**       | `NexaAI/LFM2-1.2B-npu`、`NexaAI/Granite-4.0-h-350M-NPU` |
+| **VLM**       | `NexaAI/OmniNeural-4B`                                  |
+| **Embedding** | `NexaAI/embeddinggemma-300m-npu`、`NexaAI/EmbedNeural`  |
+| **Rerank**    | `NexaAI/jina-v2-rerank-npu`                             |
+| **CV**        | `NexaAI/yolov12-npu`、`NexaAI/convnext-tiny-npu-IoT`    |
+| **ASR**       | `NexaAI/parakeet-tdt-0.6b-v3-npu`                       |
+
+## 用法
+
+### CPU 模式
+
+```bash
+docker compose up -d
+```
+
+### GPU 模式（CUDA）
+
+```bash
+docker compose --profile gpu up -d nexa-sdk-cuda
+```
+
+### 拉取模型
+
+```bash
+docker exec -it nexa-sdk nexa pull NexaAI/Granite-4.0-h-350M-NPU
+```
+
+### 交互式 CLI
+
+```bash
+docker exec -it nexa-sdk nexa infer NexaAI/Granite-4.0-h-350M-NPU
+```
+
+### API 示例
+
+- 聊天补全：
+
+  ```bash
+  curl -X POST http://localhost:18181/v1/chat/completions \
+    -H "Content-Type: application/json" \
+    -d '{
+      "model": "NexaAI/Granite-4.0-h-350M-NPU",
+      "messages": [{"role": "user", "content": "Hello!"}]
+    }'
+  ```
+
+- Embeddings：
+
+  ```bash
+  curl -X POST http://localhost:18181/v1/embeddings \
+    -H "Content-Type: application/json" \
+    -d '{
+      "model": "NexaAI/EmbedNeural",
+      "input": "Hello, world!"
+    }'
+  ```
+
+- Swagger UI：访问 `http://localhost:18181/docs/ui`
+
+## 服务
+
+- `nexa-sdk`：基于 CPU 的 NexaSDK 服务（默认）
+- `nexa-sdk-cuda`：支持 CUDA 的 GPU 加速服务（profile：`gpu`）
+
+## 配置
+
+| 变量                     | 描述                  | 默认值    |
+| ------------------------ | --------------------- | --------- |
+| `NEXA_SDK_VERSION`       | NexaSDK 镜像版本      | `v0.2.62` |
+| `NEXA_SDK_PORT_OVERRIDE` | REST API 的主机端口   | `18181`   |
+| `NEXA_TOKEN`             | Nexa API 令牌（必需） | -         |
+| `TZ`                     | 时区                  | `UTC`     |
+
+## 卷
+
+- `nexa_data`：用于存储下载的模型和数据的卷
+
+## 获取令牌
+
+1. 在 [sdk.nexa.ai](https://sdk.nexa.ai) 创建账户
+2. 进入 **Deployment → Create Token**
+3. 将令牌复制到 `.env` 文件中
+
+## 参考资料
+
+- [NexaSDK 文档](https://docs.nexa.ai/nexa-sdk-docker/overview)
+- [Docker Hub](https://hub.docker.com/r/nexa4ai/nexasdk)
+- [支持的模型](https://docs.nexa.ai/nexa-sdk-docker/overview#supported-models)
--- a/src/nexa-sdk/docker-compose.yaml
+++ b/src/nexa-sdk/docker-compose.yaml
@@ -0,0 +1,74 @@
+# NexaSDK Docker Compose Configuration
+# OpenAI-compatible API for LLM, Embeddings, Reranking, and more
+# Supports both CPU and GPU (CUDA/NPU) acceleration
+
+x-defaults: &defaults
+  restart: unless-stopped
+  logging:
+    driver: json-file
+    options:
+      max-size: 100m
+      max-file: "3"
+
+services:
+  nexa-sdk:
+    <<: *defaults
+    image: ${GLOBAL_REGISTRY:-}nexa4ai/nexasdk:${NEXA_SDK_VERSION:-v0.2.62}
+    ports:
+      - "${NEXA_SDK_PORT_OVERRIDE:-18181}:18181"
+    volumes:
+      - nexa_data:/data
+    environment:
+      - TZ=${TZ:-UTC}
+      - NEXA_TOKEN=${NEXA_TOKEN:-}
+    command: serve
+    healthcheck:
+      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:18181/docs/ui"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 30s
+    deploy:
+      resources:
+        limits:
+          cpus: ${NEXA_SDK_CPU_LIMIT:-4.0}
+          memory: ${NEXA_SDK_MEMORY_LIMIT:-8G}
+        reservations:
+          cpus: ${NEXA_SDK_CPU_RESERVATION:-1.0}
+          memory: ${NEXA_SDK_MEMORY_RESERVATION:-2G}
+
+  # GPU-accelerated service with CUDA support
+  nexa-sdk-cuda:
+    <<: *defaults
+    profiles:
+      - gpu
+    image: ${GLOBAL_REGISTRY:-}nexa4ai/nexasdk:${NEXA_SDK_VERSION:-v0.2.62}-cuda
+    ports:
+      - "${NEXA_SDK_PORT_OVERRIDE:-18181}:18181"
+    volumes:
+      - nexa_data:/data
+    environment:
+      - TZ=${TZ:-UTC}
+      - NEXA_TOKEN=${NEXA_TOKEN:-}
+    command: serve
+    healthcheck:
+      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:18181/docs/ui"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 60s
+    deploy:
+      resources:
+        limits:
+          cpus: ${NEXA_SDK_CPU_LIMIT:-8.0}
+          memory: ${NEXA_SDK_MEMORY_LIMIT:-16G}
+        reservations:
+          cpus: ${NEXA_SDK_CPU_RESERVATION:-2.0}
+          memory: ${NEXA_SDK_MEMORY_RESERVATION:-4G}
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+
+volumes:
+  nexa_data: