x-defaults: &defaults
  restart: unless-stopped
  logging:
    driver: json-file
    options:
      max-size: 100m
      max-file: "3"

services:
  nexa-sdk:
    <<: *defaults
    build:
      context: .
      dockerfile: Dockerfile
    image: ${GLOBAL_REGISTRY:-}alexsuntop/nexa-sdk:${NEXA_SDK_CPU_VERSION:-0.2.57}
    ports:
      - "${NEXA_SDK_PORT_OVERRIDE:-8080}:8080"
    volumes:
      - nexa_models:/root/.cache/nexa
    environment:
      - TZ=${TZ:-UTC}
      - NEXA_HOST=${NEXA_HOST:-0.0.0.0:8080}
      - NEXA_KEEPALIVE=${NEXA_KEEPALIVE:-300}
      - NEXA_ORIGINS=${NEXA_ORIGINS:-*}
      - NEXA_HFTOKEN=${NEXA_HFTOKEN:-}
      - NEXA_LOG=${NEXA_LOG:-none}
    command: >
      nexa server
      ${NEXA_MODEL:-gemma-2-2b-instruct}
    ipc: host
    shm_size: ${NEXA_SHM_SIZE:-2g}
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/v1/models"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
    deploy:
      resources:
        limits:
          cpus: ${NEXA_SDK_CPU_LIMIT:-4.0}
          memory: ${NEXA_SDK_MEMORY_LIMIT:-8G}
        reservations:
          cpus: ${NEXA_SDK_CPU_RESERVATION:-2.0}
          memory: ${NEXA_SDK_MEMORY_RESERVATION:-4G}
    profiles:
      - cpu

  nexa-sdk-cuda:
    <<: *defaults
    build:
      context: .
      dockerfile: Dockerfile.cuda
    image: ${GLOBAL_REGISTRY:-}alexsuntop/nexa-sdk:${NEXA_SDK_CUDA_VERSION:-0.2.57-cuda}
    ports:
      - "${NEXA_SDK_PORT_OVERRIDE:-8080}:8080"
    volumes:
      - nexa_models:/root/.cache/nexa
    environment:
      - TZ=${TZ:-UTC}
      - NEXA_HOST=${NEXA_HOST:-0.0.0.0:8080}
      - NEXA_KEEPALIVE=${NEXA_KEEPALIVE:-300}
      - NEXA_ORIGINS=${NEXA_ORIGINS:-*}
      - NEXA_HFTOKEN=${NEXA_HFTOKEN:-}
      - NEXA_LOG=${NEXA_LOG:-none}
    command: >
      nexa server
      ${NEXA_MODEL:-gemma-2-2b-instruct}
      -ngl ${NEXA_GPU_LAYERS:--1}
    ipc: host
    shm_size: ${NEXA_SHM_SIZE:-2g}
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/v1/models"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
    deploy:
      resources:
        limits:
          cpus: ${NEXA_SDK_CPU_LIMIT:-4.0}
          memory: ${NEXA_SDK_MEMORY_LIMIT:-8G}
        reservations:
          cpus: ${NEXA_SDK_CPU_RESERVATION:-2.0}
          memory: ${NEXA_SDK_MEMORY_RESERVATION:-4G}
          devices:
            - driver: nvidia
              device_ids: ['0']
              capabilities: [gpu]
    profiles:
      - cuda

volumes:
  nexa_models: