x-defaults: &defaults restart: unless-stopped logging: driver: json-file options: max-size: 100m max-file: "3" services: nexa-sdk: <<: *defaults build: context: . dockerfile: Dockerfile image: ${GLOBAL_REGISTRY:-}alexsuntop/nexa-sdk:${NEXA_SDK_CPU_VERSION:-0.2.57} ports: - "${NEXA_SDK_PORT_OVERRIDE:-8080}:8080" volumes: - nexa_models:/root/.cache/nexa environment: - TZ=${TZ:-UTC} - NEXA_HOST=${NEXA_HOST:-0.0.0.0:8080} - NEXA_KEEPALIVE=${NEXA_KEEPALIVE:-300} - NEXA_ORIGINS=${NEXA_ORIGINS:-*} - NEXA_HFTOKEN=${NEXA_HFTOKEN:-} - NEXA_LOG=${NEXA_LOG:-none} command: > nexa server ${NEXA_MODEL:-gemma-2-2b-instruct} ipc: host shm_size: ${NEXA_SHM_SIZE:-2g} healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/v1/models"] interval: 30s timeout: 10s retries: 3 start_period: 60s deploy: resources: limits: cpus: ${NEXA_SDK_CPU_LIMIT:-4.0} memory: ${NEXA_SDK_MEMORY_LIMIT:-8G} reservations: cpus: ${NEXA_SDK_CPU_RESERVATION:-2.0} memory: ${NEXA_SDK_MEMORY_RESERVATION:-4G} profiles: - cpu nexa-sdk-cuda: <<: *defaults build: context: . dockerfile: Dockerfile.cuda image: ${GLOBAL_REGISTRY:-}alexsuntop/nexa-sdk:${NEXA_SDK_CUDA_VERSION:-0.2.57-cuda} ports: - "${NEXA_SDK_PORT_OVERRIDE:-8080}:8080" volumes: - nexa_models:/root/.cache/nexa environment: - TZ=${TZ:-UTC} - NEXA_HOST=${NEXA_HOST:-0.0.0.0:8080} - NEXA_KEEPALIVE=${NEXA_KEEPALIVE:-300} - NEXA_ORIGINS=${NEXA_ORIGINS:-*} - NEXA_HFTOKEN=${NEXA_HFTOKEN:-} - NEXA_LOG=${NEXA_LOG:-none} command: > nexa server ${NEXA_MODEL:-gemma-2-2b-instruct} -ngl ${NEXA_GPU_LAYERS:--1} ipc: host shm_size: ${NEXA_SHM_SIZE:-2g} healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/v1/models"] interval: 30s timeout: 10s retries: 3 start_period: 60s deploy: resources: limits: cpus: ${NEXA_SDK_CPU_LIMIT:-4.0} memory: ${NEXA_SDK_MEMORY_LIMIT:-8G} reservations: cpus: ${NEXA_SDK_CPU_RESERVATION:-2.0} memory: ${NEXA_SDK_MEMORY_RESERVATION:-4G} devices: - driver: nvidia device_ids: ['0'] capabilities: [gpu] profiles: - cuda volumes: nexa_models: