feat: add nexa-sdk

2025-11-16 00:12:14 +08:00
parent 5f9820e7db
commit 1c42cb2800
9 changed files with 616 additions and 5 deletions
--- a/builds/nexa-sdk/docker-compose.yaml
+++ b/builds/nexa-sdk/docker-compose.yaml
@@ -0,0 +1,94 @@
+x-defaults: &defaults
+  restart: unless-stopped
+  logging:
+    driver: json-file
+    options:
+      max-size: 100m
+      max-file: "3"
+
+services:
+  nexa-sdk:
+    <<: *defaults
+    build:
+      context: .
+      dockerfile: Dockerfile
+    image: ${GLOBAL_REGISTRY:-}alexsuntop/nexa-sdk:${NEXA_SDK_CPU_VERSION:-0.2.57}
+    ports:
+      - "${NEXA_SDK_PORT_OVERRIDE:-8080}:8080"
+    volumes:
+      - nexa_models:/root/.cache/nexa
+    environment:
+      - TZ=${TZ:-UTC}
+      - NEXA_HOST=${NEXA_HOST:-0.0.0.0:8080}
+      - NEXA_KEEPALIVE=${NEXA_KEEPALIVE:-300}
+      - NEXA_ORIGINS=${NEXA_ORIGINS:-*}
+      - NEXA_HFTOKEN=${NEXA_HFTOKEN:-}
+      - NEXA_LOG=${NEXA_LOG:-none}
+    command: >
+      nexa server
+      ${NEXA_MODEL:-gemma-2-2b-instruct}
+    ipc: host
+    shm_size: ${NEXA_SHM_SIZE:-2g}
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8080/v1/models"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 60s
+    deploy:
+      resources:
+        limits:
+          cpus: ${NEXA_SDK_CPU_LIMIT:-4.0}
+          memory: ${NEXA_SDK_MEMORY_LIMIT:-8G}
+        reservations:
+          cpus: ${NEXA_SDK_CPU_RESERVATION:-2.0}
+          memory: ${NEXA_SDK_MEMORY_RESERVATION:-4G}
+    profiles:
+      - cpu
+
+  nexa-sdk-cuda:
+    <<: *defaults
+    build:
+      context: .
+      dockerfile: Dockerfile.cuda
+    image: ${GLOBAL_REGISTRY:-}alexsuntop/nexa-sdk:${NEXA_SDK_CUDA_VERSION:-0.2.57-cuda}
+    ports:
+      - "${NEXA_SDK_PORT_OVERRIDE:-8080}:8080"
+    volumes:
+      - nexa_models:/root/.cache/nexa
+    environment:
+      - TZ=${TZ:-UTC}
+      - NEXA_HOST=${NEXA_HOST:-0.0.0.0:8080}
+      - NEXA_KEEPALIVE=${NEXA_KEEPALIVE:-300}
+      - NEXA_ORIGINS=${NEXA_ORIGINS:-*}
+      - NEXA_HFTOKEN=${NEXA_HFTOKEN:-}
+      - NEXA_LOG=${NEXA_LOG:-none}
+    command: >
+      nexa server
+      ${NEXA_MODEL:-gemma-2-2b-instruct}
+      -ngl ${NEXA_GPU_LAYERS:--1}
+    ipc: host
+    shm_size: ${NEXA_SHM_SIZE:-2g}
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8080/v1/models"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 60s
+    deploy:
+      resources:
+        limits:
+          cpus: ${NEXA_SDK_CPU_LIMIT:-4.0}
+          memory: ${NEXA_SDK_MEMORY_LIMIT:-8G}
+        reservations:
+          cpus: ${NEXA_SDK_CPU_RESERVATION:-2.0}
+          memory: ${NEXA_SDK_MEMORY_RESERVATION:-4G}
+          devices:
+            - driver: nvidia
+              device_ids: ['0']
+              capabilities: [gpu]
+    profiles:
+      - cuda
+
+volumes:
+  nexa_models: