14 lines
257 B
Plaintext
14 lines
257 B
Plaintext
# vLLM version
|
|
VLLM_VERSION="v0.8.0"
|
|
|
|
# Model configuration
|
|
VLLM_MODEL="facebook/opt-125m"
|
|
VLLM_MAX_MODEL_LEN=2048
|
|
VLLM_GPU_MEMORY_UTIL=0.9
|
|
|
|
# Hugging Face token for model downloads
|
|
HF_TOKEN=""
|
|
|
|
# Port to bind to on the host machine
|
|
VLLM_PORT_OVERRIDE=8000
|