style: lint code
This commit is contained in:
@@ -30,14 +30,14 @@ Please modify the `.env` file as needed for your use case.
|
||||
This service requires NVIDIA GPU to run properly. Uncomment the GPU configuration in `docker-compose.yaml`:
|
||||
|
||||
```yaml
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
runtime: nvidia
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
runtime: nvidia
|
||||
```
|
||||
|
||||
## Usage
|
||||
@@ -117,7 +117,7 @@ VLLM_MAX_MODEL_LEN=4096 # Support up to 4K tokens
|
||||
For larger models, increase shared memory:
|
||||
|
||||
```yaml
|
||||
shm_size: 8g # Increase to 8GB
|
||||
shm_size: 8g # Increase to 8GB
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
@@ -30,14 +30,14 @@
|
||||
此服务需要 NVIDIA GPU 才能正常运行。在 `docker-compose.yaml` 中取消注释 GPU 配置:
|
||||
|
||||
```yaml
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
runtime: nvidia
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
runtime: nvidia
|
||||
```
|
||||
|
||||
## 使用方法
|
||||
@@ -117,7 +117,7 @@ VLLM_MAX_MODEL_LEN=4096 # 支持最多 4K tokens
|
||||
对于更大的模型,增加共享内存:
|
||||
|
||||
```yaml
|
||||
shm_size: 8g # 增加到 8GB
|
||||
shm_size: 8g # 增加到 8GB
|
||||
```
|
||||
|
||||
## 注意事项
|
||||
|
||||
@@ -4,14 +4,14 @@ x-defaults: &defaults
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: 100m
|
||||
max-file: "3"
|
||||
max-file: '3'
|
||||
|
||||
services:
|
||||
vllm:
|
||||
<<: *defaults
|
||||
image: ${GLOBAL_REGISTRY:-}vllm/vllm-openai:${VLLM_VERSION:-v0.13.0}
|
||||
ports:
|
||||
- "${VLLM_PORT_OVERRIDE:-8000}:8000"
|
||||
- '${VLLM_PORT_OVERRIDE:-8000}:8000'
|
||||
volumes:
|
||||
- vllm_models:/root/.cache/huggingface
|
||||
environment:
|
||||
@@ -21,13 +21,13 @@ services:
|
||||
- --model
|
||||
- ${VLLM_MODEL:-facebook/opt-125m}
|
||||
- --host
|
||||
- "0.0.0.0"
|
||||
- 0.0.0.0
|
||||
- --port
|
||||
- "8000"
|
||||
- '8000'
|
||||
- --max-model-len
|
||||
- "${VLLM_MAX_MODEL_LEN:-2048}"
|
||||
- '${VLLM_MAX_MODEL_LEN:-2048}'
|
||||
- --gpu-memory-utilization
|
||||
- "${VLLM_GPU_MEMORY_UTIL:-0.9}"
|
||||
- '${VLLM_GPU_MEMORY_UTIL:-0.9}'
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
@@ -42,7 +42,7 @@ services:
|
||||
capabilities: [gpu]
|
||||
shm_size: 4g
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||
test: [CMD, curl, -f, 'http://localhost:8000/health']
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
|
||||
Reference in New Issue
Block a user