Files
compose-anything/builds/nexa-sdk/docker-compose.yaml
2025-11-16 00:12:14 +08:00

95 lines
2.5 KiB
YAML

x-defaults: &defaults
restart: unless-stopped
logging:
driver: json-file
options:
max-size: 100m
max-file: "3"
services:
nexa-sdk:
<<: *defaults
build:
context: .
dockerfile: Dockerfile
image: ${GLOBAL_REGISTRY:-}alexsuntop/nexa-sdk:${NEXA_SDK_CPU_VERSION:-0.2.57}
ports:
- "${NEXA_SDK_PORT_OVERRIDE:-8080}:8080"
volumes:
- nexa_models:/root/.cache/nexa
environment:
- TZ=${TZ:-UTC}
- NEXA_HOST=${NEXA_HOST:-0.0.0.0:8080}
- NEXA_KEEPALIVE=${NEXA_KEEPALIVE:-300}
- NEXA_ORIGINS=${NEXA_ORIGINS:-*}
- NEXA_HFTOKEN=${NEXA_HFTOKEN:-}
- NEXA_LOG=${NEXA_LOG:-none}
command: >
nexa server
${NEXA_MODEL:-gemma-2-2b-instruct}
ipc: host
shm_size: ${NEXA_SHM_SIZE:-2g}
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/v1/models"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
deploy:
resources:
limits:
cpus: ${NEXA_SDK_CPU_LIMIT:-4.0}
memory: ${NEXA_SDK_MEMORY_LIMIT:-8G}
reservations:
cpus: ${NEXA_SDK_CPU_RESERVATION:-2.0}
memory: ${NEXA_SDK_MEMORY_RESERVATION:-4G}
profiles:
- cpu
nexa-sdk-cuda:
<<: *defaults
build:
context: .
dockerfile: Dockerfile.cuda
image: ${GLOBAL_REGISTRY:-}alexsuntop/nexa-sdk:${NEXA_SDK_CUDA_VERSION:-0.2.57-cuda}
ports:
- "${NEXA_SDK_PORT_OVERRIDE:-8080}:8080"
volumes:
- nexa_models:/root/.cache/nexa
environment:
- TZ=${TZ:-UTC}
- NEXA_HOST=${NEXA_HOST:-0.0.0.0:8080}
- NEXA_KEEPALIVE=${NEXA_KEEPALIVE:-300}
- NEXA_ORIGINS=${NEXA_ORIGINS:-*}
- NEXA_HFTOKEN=${NEXA_HFTOKEN:-}
- NEXA_LOG=${NEXA_LOG:-none}
command: >
nexa server
${NEXA_MODEL:-gemma-2-2b-instruct}
-ngl ${NEXA_GPU_LAYERS:--1}
ipc: host
shm_size: ${NEXA_SHM_SIZE:-2g}
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/v1/models"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
deploy:
resources:
limits:
cpus: ${NEXA_SDK_CPU_LIMIT:-4.0}
memory: ${NEXA_SDK_MEMORY_LIMIT:-8G}
reservations:
cpus: ${NEXA_SDK_CPU_RESERVATION:-2.0}
memory: ${NEXA_SDK_MEMORY_RESERVATION:-4G}
devices:
- driver: nvidia
device_ids: ['0']
capabilities: [gpu]
profiles:
- cuda
volumes:
nexa_models: