chore: update versions
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
# vLLM version
|
||||
VLLM_VERSION="v0.12.0"
|
||||
VLLM_VERSION="v0.13.0"
|
||||
|
||||
# Model configuration
|
||||
VLLM_MODEL="facebook/opt-125m"
|
||||
|
||||
@@ -12,7 +12,7 @@ This service deploys vLLM, a high-throughput and memory-efficient inference and
|
||||
|
||||
| Variable Name | Description | Default Value |
|
||||
| -------------------- | -------------------------------------- | ------------------- |
|
||||
| VLLM_VERSION | vLLM image version | `v0.12.0` |
|
||||
| VLLM_VERSION | vLLM image version | `v0.13.0` |
|
||||
| VLLM_MODEL | Model name or path | `facebook/opt-125m` |
|
||||
| VLLM_MAX_MODEL_LEN | Maximum context length | `2048` |
|
||||
| VLLM_GPU_MEMORY_UTIL | GPU memory utilization (0.0-1.0) | `0.9` |
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
|
||||
| 变量名 | 说明 | 默认值 |
|
||||
| ---------------------- | -------------------------------- | ------------------- |
|
||||
| `VLLM_VERSION` | vLLM 镜像版本 | `v0.12.0` |
|
||||
| `VLLM_VERSION` | vLLM 镜像版本 | `v0.13.0` |
|
||||
| `VLLM_MODEL` | 模型名称或路径 | `facebook/opt-125m` |
|
||||
| `VLLM_MAX_MODEL_LEN` | 最大上下文长度 | `2048` |
|
||||
| `VLLM_GPU_MEMORY_UTIL` | GPU 内存利用率(0.0-1.0) | `0.9` |
|
||||
|
||||
@@ -9,7 +9,7 @@ x-defaults: &defaults
|
||||
services:
|
||||
vllm:
|
||||
<<: *defaults
|
||||
image: ${GLOBAL_REGISTRY:-}vllm/vllm-openai:${VLLM_VERSION:-v0.12.0}
|
||||
image: ${GLOBAL_REGISTRY:-}vllm/vllm-openai:${VLLM_VERSION:-v0.13.0}
|
||||
ports:
|
||||
- "${VLLM_PORT_OVERRIDE:-8000}:8000"
|
||||
volumes:
|
||||
@@ -42,7 +42,7 @@ services:
|
||||
capabilities: [gpu]
|
||||
shm_size: 4g
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8000/health"]
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
|
||||
Reference in New Issue
Block a user