42 lines
884 B
Plaintext
42 lines
884 B
Plaintext
# Global registry for container images (optional)
|
|
# GLOBAL_REGISTRY=
|
|
|
|
# Nexa SDK version
|
|
NEXA_SDK_VERSION=latest
|
|
|
|
# Timezone configuration
|
|
TZ=UTC
|
|
|
|
# Port override for host binding
|
|
NEXA_SDK_PORT_OVERRIDE=8080
|
|
|
|
# Server configuration
|
|
NEXA_HOST=0.0.0.0:8080
|
|
NEXA_KEEPALIVE=300
|
|
NEXA_ORIGINS=*
|
|
|
|
# HuggingFace token for accessing private models (optional)
|
|
NEXA_HFTOKEN=
|
|
|
|
# Logging level (none, debug, info, warn, error)
|
|
NEXA_LOG=none
|
|
|
|
# Model to run (can be any Nexa-compatible model)
|
|
# Examples: gemma-2-2b-instruct, qwen3-4b, llama-3-8b, mistral-7b
|
|
NEXA_MODEL=gemma-2-2b-instruct
|
|
|
|
# GPU configuration (for gpu profile only)
|
|
# Number of GPU layers to offload (-1 for all layers)
|
|
NEXA_GPU_LAYERS=-1
|
|
|
|
# Shared memory size
|
|
NEXA_SHM_SIZE=2g
|
|
|
|
# Resource limits
|
|
NEXA_SDK_CPU_LIMIT=4.0
|
|
NEXA_SDK_MEMORY_LIMIT=8G
|
|
|
|
# Resource reservations
|
|
NEXA_SDK_CPU_RESERVATION=2.0
|
|
NEXA_SDK_MEMORY_RESERVATION=4G
|