chore: update versions

2025-12-30 11:25:14 +08:00
parent cdc76a8ee5
commit b8cb5eeea1
10 changed files with 21 additions and 13 deletions
@@ -1,5 +1,5 @@
 # vLLM version
-VLLM_VERSION="v0.12.0"
+VLLM_VERSION="v0.13.0"

 # Model configuration
 VLLM_MODEL="facebook/opt-125m"
@@ -12,7 +12,7 @@ This service deploys vLLM, a high-throughput and memory-efficient inference and

 | Variable Name        | Description                            | Default Value       |
 | -------------------- | -------------------------------------- | ------------------- |
-| VLLM_VERSION         | vLLM image version                     | `v0.12.0`           |
+| VLLM_VERSION         | vLLM image version                     | `v0.13.0`           |
 | VLLM_MODEL           | Model name or path                     | `facebook/opt-125m` |
 | VLLM_MAX_MODEL_LEN   | Maximum context length                 | `2048`              |
 | VLLM_GPU_MEMORY_UTIL | GPU memory utilization (0.0-1.0)       | `0.9`               |
@@ -12,7 +12,7 @@

 | 变量名                 | 说明                             | 默认值              |
 | ---------------------- | -------------------------------- | ------------------- |
-| `VLLM_VERSION`         | vLLM 镜像版本                    | `v0.12.0`           |
+| `VLLM_VERSION`         | vLLM 镜像版本                    | `v0.13.0`           |
 | `VLLM_MODEL`           | 模型名称或路径                   | `facebook/opt-125m` |
 | `VLLM_MAX_MODEL_LEN`   | 最大上下文长度                   | `2048`              |
 | `VLLM_GPU_MEMORY_UTIL` | GPU 内存利用率（0.0-1.0）        | `0.9`               |
@@ -9,7 +9,7 @@ x-defaults: &defaults
 services:
  vllm:
    <<: *defaults
-    image: ${GLOBAL_REGISTRY:-}vllm/vllm-openai:${VLLM_VERSION:-v0.12.0}
+    image: ${GLOBAL_REGISTRY:-}vllm/vllm-openai:${VLLM_VERSION:-v0.13.0}
    ports:
      - "${VLLM_PORT_OVERRIDE:-8000}:8000"
    volumes:
@@ -42,7 +42,7 @@ services:
              capabilities: [gpu]
    shm_size: 4g
    healthcheck:
-      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8000/health"]
+      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 3