feat: add portkey-gateway/libreoffice/jodconverter/bolt-diy
This commit is contained in:
@@ -1,25 +1,72 @@
|
||||
# Firecrawl version
|
||||
FIRECRAWL_VERSION="v1.16.0"
|
||||
FIRECRAWL_VERSION="latest"
|
||||
|
||||
# Redis version
|
||||
REDIS_VERSION="7.4.2-alpine"
|
||||
REDIS_VERSION="alpine"
|
||||
|
||||
# Playwright version
|
||||
PLAYWRIGHT_VERSION="latest"
|
||||
|
||||
# PostgreSQL version (official Firecrawl nuq-postgres image)
|
||||
NUQ_POSTGRES_VERSION="latest"
|
||||
|
||||
# PostgreSQL configuration
|
||||
POSTGRES_USER="postgres"
|
||||
POSTGRES_PASSWORD="postgres"
|
||||
POSTGRES_DB="postgres"
|
||||
POSTGRES_PORT_OVERRIDE=5432
|
||||
|
||||
# Redis configuration
|
||||
REDIS_PASSWORD="firecrawl"
|
||||
# REDIS_URL is auto-configured by docker-compose
|
||||
# REDIS_URL="redis://redis:6379"
|
||||
|
||||
# Firecrawl configuration
|
||||
NUM_WORKERS_PER_QUEUE=8
|
||||
SCRAPE_RATE_LIMIT_TOKEN_BUCKET_SIZE=20
|
||||
SCRAPE_RATE_LIMIT_TOKEN_BUCKET_REFILL=1
|
||||
# Firecrawl API configuration
|
||||
INTERNAL_PORT=3002
|
||||
FIRECRAWL_PORT_OVERRIDE=3002
|
||||
EXTRACT_WORKER_PORT=3004
|
||||
WORKER_PORT=3005
|
||||
|
||||
# Playwright configuration (optional)
|
||||
# Database authentication
|
||||
USE_DB_AUTHENTICATION="false"
|
||||
|
||||
# AI features (Optional)
|
||||
# OPENAI_API_KEY=""
|
||||
# OPENAI_BASE_URL=""
|
||||
# MODEL_NAME=""
|
||||
# MODEL_EMBEDDING_NAME=""
|
||||
# OLLAMA_BASE_URL=""
|
||||
|
||||
# Admin and security
|
||||
BULL_AUTH_KEY="@"
|
||||
# TEST_API_KEY=""
|
||||
|
||||
# Monitoring (Optional)
|
||||
# SLACK_WEBHOOK_URL=""
|
||||
# POSTHOG_API_KEY=""
|
||||
# POSTHOG_HOST=""
|
||||
|
||||
# Supabase authentication (Optional)
|
||||
# SUPABASE_ANON_TOKEN=""
|
||||
# SUPABASE_URL=""
|
||||
# SUPABASE_SERVICE_TOKEN=""
|
||||
|
||||
# Webhook configuration (Optional)
|
||||
# SELF_HOSTED_WEBHOOK_URL=""
|
||||
|
||||
# Search API keys (Optional)
|
||||
# SERPER_API_KEY=""
|
||||
# SEARCHAPI_API_KEY=""
|
||||
|
||||
# Logging
|
||||
LOGGING_LEVEL="info"
|
||||
|
||||
# Playwright proxy configuration (Optional)
|
||||
PROXY_SERVER=""
|
||||
PROXY_USERNAME=""
|
||||
PROXY_PASSWORD=""
|
||||
BLOCK_MEDIA="true"
|
||||
|
||||
# Port overrides
|
||||
FIRECRAWL_PORT_OVERRIDE=3002
|
||||
# SearXNG configuration (Optional)
|
||||
# SEARXNG_ENDPOINT=""
|
||||
# SEARXNG_ENGINES=""
|
||||
# SEARXNG_CATEGORIES=""
|
||||
|
||||
@@ -6,39 +6,66 @@ This service deploys Firecrawl, a web scraping and crawling API powered by Playw
|
||||
|
||||
## Services
|
||||
|
||||
- `firecrawl`: The main Firecrawl API server.
|
||||
- `redis`: Redis for job queue and caching.
|
||||
- `playwright`: Playwright service for browser automation.
|
||||
- `api`: The main Firecrawl API server with integrated workers
|
||||
- `redis`: Redis for job queue and caching
|
||||
- `playwright-service`: Playwright service for browser automation
|
||||
- `nuq-postgres`: PostgreSQL database for queue management and data storage
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable Name | Description | Default Value |
|
||||
| ------------------------------------- | ----------------------------------- | -------------- |
|
||||
| FIRECRAWL_VERSION | Firecrawl image version | `v1.16.0` |
|
||||
| REDIS_VERSION | Redis image version | `7.4.2-alpine` |
|
||||
| PLAYWRIGHT_VERSION | Playwright service version | `latest` |
|
||||
| REDIS_PASSWORD | Redis password | `firecrawl` |
|
||||
| NUM_WORKERS_PER_QUEUE | Number of workers per queue | `8` |
|
||||
| SCRAPE_RATE_LIMIT_TOKEN_BUCKET_SIZE | Token bucket size for rate limiting | `20` |
|
||||
| SCRAPE_RATE_LIMIT_TOKEN_BUCKET_REFILL | Token refill rate per second | `1` |
|
||||
| PROXY_SERVER | Proxy server URL (optional) | `""` |
|
||||
| PROXY_USERNAME | Proxy username (optional) | `""` |
|
||||
| PROXY_PASSWORD | Proxy password (optional) | `""` |
|
||||
| BLOCK_MEDIA | Block media content | `true` |
|
||||
| FIRECRAWL_PORT_OVERRIDE | Firecrawl API port | `3002` |
|
||||
| Variable Name | Description | Default Value |
|
||||
| ----------------------- | ------------------------------------------ | ------------- |
|
||||
| FIRECRAWL_VERSION | Firecrawl image version | `latest` |
|
||||
| REDIS_VERSION | Redis image version | `alpine` |
|
||||
| PLAYWRIGHT_VERSION | Playwright service version | `latest` |
|
||||
| NUQ_POSTGRES_VERSION | NUQ PostgreSQL image version | `latest` |
|
||||
| POSTGRES_USER | PostgreSQL username | `postgres` |
|
||||
| POSTGRES_PASSWORD | PostgreSQL password | `postgres` |
|
||||
| POSTGRES_DB | PostgreSQL database name | `postgres` |
|
||||
| POSTGRES_PORT_OVERRIDE | PostgreSQL port mapping | `5432` |
|
||||
| INTERNAL_PORT | Internal API port | `3002` |
|
||||
| FIRECRAWL_PORT_OVERRIDE | External API port mapping | `3002` |
|
||||
| EXTRACT_WORKER_PORT | Extract worker port | `3004` |
|
||||
| WORKER_PORT | Worker port | `3005` |
|
||||
| USE_DB_AUTHENTICATION | Enable database authentication | `false` |
|
||||
| OPENAI_API_KEY | OpenAI API key for AI features (optional) | `""` |
|
||||
| OPENAI_BASE_URL | OpenAI API base URL (optional) | `""` |
|
||||
| MODEL_NAME | AI model name (optional) | `""` |
|
||||
| MODEL_EMBEDDING_NAME | Embedding model name (optional) | `""` |
|
||||
| OLLAMA_BASE_URL | Ollama base URL (optional) | `""` |
|
||||
| BULL_AUTH_KEY | Bull queue admin panel authentication key | `@` |
|
||||
| TEST_API_KEY | Test API key (optional) | `""` |
|
||||
| SLACK_WEBHOOK_URL | Slack webhook for notifications (optional) | `""` |
|
||||
| POSTHOG_API_KEY | PostHog API key (optional) | `""` |
|
||||
| POSTHOG_HOST | PostHog host (optional) | `""` |
|
||||
| SUPABASE_ANON_TOKEN | Supabase anonymous token (optional) | `""` |
|
||||
| SUPABASE_URL | Supabase URL (optional) | `""` |
|
||||
| SUPABASE_SERVICE_TOKEN | Supabase service token (optional) | `""` |
|
||||
| SELF_HOSTED_WEBHOOK_URL | Self-hosted webhook URL (optional) | `""` |
|
||||
| SERPER_API_KEY | Serper API key for search (optional) | `""` |
|
||||
| SEARCHAPI_API_KEY | SearchAPI key (optional) | `""` |
|
||||
| LOGGING_LEVEL | Logging level | `info` |
|
||||
| PROXY_SERVER | Proxy server URL (optional) | `""` |
|
||||
| PROXY_USERNAME | Proxy username (optional) | `""` |
|
||||
| PROXY_PASSWORD | Proxy password (optional) | `""` |
|
||||
| BLOCK_MEDIA | Block media content | `true` |
|
||||
| SEARXNG_ENDPOINT | SearXNG endpoint (optional) | `""` |
|
||||
| SEARXNG_ENGINES | SearXNG engines (optional) | `""` |
|
||||
| SEARXNG_CATEGORIES | SearXNG categories (optional) | `""` |
|
||||
|
||||
Please modify the `.env` file as needed for your use case.
|
||||
|
||||
## Volumes
|
||||
|
||||
- `redis_data`: Redis data storage for job queues and caching.
|
||||
- `redis_data`: Redis data storage for job queues and caching
|
||||
- `postgres_data`: PostgreSQL data storage for queue management and metadata
|
||||
|
||||
## Usage
|
||||
|
||||
### Start the Services
|
||||
|
||||
```bash
|
||||
docker-compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Access the API
|
||||
@@ -49,12 +76,22 @@ The Firecrawl API will be available at:
|
||||
http://localhost:3002
|
||||
```
|
||||
|
||||
### Admin Panel
|
||||
|
||||
Access the Bull queue admin panel at:
|
||||
|
||||
```text
|
||||
http://localhost:3002/admin/@/queues
|
||||
```
|
||||
|
||||
Replace `@` with your `BULL_AUTH_KEY` value if changed.
|
||||
|
||||
### Example API Calls
|
||||
|
||||
**Scrape a Single Page:**
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:3002/v0/scrape \
|
||||
curl -X POST http://localhost:3002/v1/scrape \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"url": "https://example.com"
|
||||
@@ -64,12 +101,27 @@ curl -X POST http://localhost:3002/v0/scrape \
|
||||
**Crawl a Website:**
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:3002/v0/crawl \
|
||||
curl -X POST http://localhost:3002/v1/crawl \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"url": "https://example.com",
|
||||
"crawlerOptions": {
|
||||
"limit": 100
|
||||
"limit": 100
|
||||
}'
|
||||
```
|
||||
|
||||
**Extract Structured Data:**
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:3002/v1/extract \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"urls": ["https://example.com"],
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"description": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}'
|
||||
```
|
||||
@@ -80,16 +132,31 @@ curl -X POST http://localhost:3002/v0/crawl \
|
||||
- **Web Crawling**: Recursively crawl entire websites
|
||||
- **JavaScript Rendering**: Full support for dynamic JavaScript-rendered pages
|
||||
- **Markdown Output**: Clean markdown conversion of web content
|
||||
- **Rate Limiting**: Built-in rate limiting to prevent abuse
|
||||
- **Structured Data Extraction**: Extract data using JSON schemas
|
||||
- **Queue Management**: Built-in job queue with Bull
|
||||
- **Rate Limiting**: Configurable rate limiting
|
||||
- **Proxy Support**: Optional proxy configuration for all requests
|
||||
- **AI-Powered Features**: Optional OpenAI integration for advanced extraction
|
||||
|
||||
## Architecture
|
||||
|
||||
This deployment uses the official Firecrawl architecture:
|
||||
|
||||
- **API Server**: Handles HTTP requests and manages the job queue
|
||||
- **Workers**: Built into the main container, processes scraping jobs
|
||||
- **PostgreSQL**: Stores queue metadata and job information
|
||||
- **Redis**: Handles job queue and caching
|
||||
- **Playwright Service**: Provides browser automation capabilities
|
||||
|
||||
## Notes
|
||||
|
||||
- The service uses Playwright for browser automation, supporting complex web pages
|
||||
- Redis is used for job queuing and caching
|
||||
- Rate limiting is configurable via environment variables
|
||||
- For production use, consider scaling the number of workers
|
||||
- BLOCK_MEDIA can reduce memory usage by blocking images/videos
|
||||
- The service uses the official `ghcr.io/firecrawl/firecrawl` image
|
||||
- PostgreSQL uses the official `ghcr.io/firecrawl/nuq-postgres` image for queue management (NUQ - Not Quite Bull)
|
||||
- Redis is used for job queuing without password by default (runs on private network)
|
||||
- For production use, enable `USE_DB_AUTHENTICATION` and configure Supabase
|
||||
- The `BULL_AUTH_KEY` should be changed in production deployments
|
||||
- AI features require an `OPENAI_API_KEY` or `OLLAMA_BASE_URL`
|
||||
- All workers run within the single API container using the harness mode
|
||||
|
||||
## License
|
||||
|
||||
|
||||
@@ -6,39 +6,66 @@
|
||||
|
||||
## 服务
|
||||
|
||||
- `firecrawl`: Firecrawl API 主服务器。
|
||||
- `redis`: 用于作业队列和缓存的 Redis。
|
||||
- `playwright`: 用于浏览器自动化的 Playwright 服务。
|
||||
- `api`: Firecrawl API 主服务器,集成了工作进程
|
||||
- `redis`: 用于作业队列和缓存的 Redis
|
||||
- `playwright-service`: 用于浏览器自动化的 Playwright 服务
|
||||
- `nuq-postgres`: 用于队列管理和数据存储的 PostgreSQL 数据库
|
||||
|
||||
## 环境变量
|
||||
|
||||
| 变量名 | 说明 | 默认值 |
|
||||
| ------------------------------------- | ---------------------- | -------------- |
|
||||
| FIRECRAWL_VERSION | Firecrawl 镜像版本 | `v1.16.0` |
|
||||
| REDIS_VERSION | Redis 镜像版本 | `7.4.2-alpine` |
|
||||
| PLAYWRIGHT_VERSION | Playwright 服务版本 | `latest` |
|
||||
| REDIS_PASSWORD | Redis 密码 | `firecrawl` |
|
||||
| NUM_WORKERS_PER_QUEUE | 每个队列的工作进程数 | `8` |
|
||||
| SCRAPE_RATE_LIMIT_TOKEN_BUCKET_SIZE | 速率限制的令牌桶大小 | `20` |
|
||||
| SCRAPE_RATE_LIMIT_TOKEN_BUCKET_REFILL | 每秒令牌填充速率 | `1` |
|
||||
| PROXY_SERVER | 代理服务器 URL(可选) | `""` |
|
||||
| PROXY_USERNAME | 代理用户名(可选) | `""` |
|
||||
| PROXY_PASSWORD | 代理密码(可选) | `""` |
|
||||
| BLOCK_MEDIA | 阻止媒体内容 | `true` |
|
||||
| FIRECRAWL_PORT_OVERRIDE | Firecrawl API 端口 | `3002` |
|
||||
| 变量名 | 说明 | 默认值 |
|
||||
| ----------------------- | ----------------------------- | ---------- |
|
||||
| FIRECRAWL_VERSION | Firecrawl 镜像版本 | `latest` |
|
||||
| REDIS_VERSION | Redis 镜像版本 | `alpine` |
|
||||
| PLAYWRIGHT_VERSION | Playwright 服务版本 | `latest` |
|
||||
| NUQ_POSTGRES_VERSION | NUQ PostgreSQL 镜像版本 | `latest` |
|
||||
| POSTGRES_USER | PostgreSQL 用户名 | `postgres` |
|
||||
| POSTGRES_PASSWORD | PostgreSQL 密码 | `postgres` |
|
||||
| POSTGRES_DB | PostgreSQL 数据库名称 | `postgres` |
|
||||
| POSTGRES_PORT_OVERRIDE | PostgreSQL 端口映射 | `5432` |
|
||||
| INTERNAL_PORT | 内部 API 端口 | `3002` |
|
||||
| FIRECRAWL_PORT_OVERRIDE | 外部 API 端口映射 | `3002` |
|
||||
| EXTRACT_WORKER_PORT | 提取工作进程端口 | `3004` |
|
||||
| WORKER_PORT | 工作进程端口 | `3005` |
|
||||
| USE_DB_AUTHENTICATION | 启用数据库身份验证 | `false` |
|
||||
| OPENAI_API_KEY | OpenAI API 密钥(可选) | `""` |
|
||||
| OPENAI_BASE_URL | OpenAI API 基础 URL(可选) | `""` |
|
||||
| MODEL_NAME | AI 模型名称(可选) | `""` |
|
||||
| MODEL_EMBEDDING_NAME | 嵌入模型名称(可选) | `""` |
|
||||
| OLLAMA_BASE_URL | Ollama 基础 URL(可选) | `""` |
|
||||
| BULL_AUTH_KEY | Bull 队列管理面板身份验证密钥 | `@` |
|
||||
| TEST_API_KEY | 测试 API 密钥(可选) | `""` |
|
||||
| SLACK_WEBHOOK_URL | Slack Webhook 通知(可选) | `""` |
|
||||
| POSTHOG_API_KEY | PostHog API 密钥(可选) | `""` |
|
||||
| POSTHOG_HOST | PostHog 主机(可选) | `""` |
|
||||
| SUPABASE_ANON_TOKEN | Supabase 匿名令牌(可选) | `""` |
|
||||
| SUPABASE_URL | Supabase URL(可选) | `""` |
|
||||
| SUPABASE_SERVICE_TOKEN | Supabase 服务令牌(可选) | `""` |
|
||||
| SELF_HOSTED_WEBHOOK_URL | 自托管 Webhook URL(可选) | `""` |
|
||||
| SERPER_API_KEY | Serper 搜索 API 密钥(可选) | `""` |
|
||||
| SEARCHAPI_API_KEY | SearchAPI 密钥(可选) | `""` |
|
||||
| LOGGING_LEVEL | 日志级别 | `info` |
|
||||
| PROXY_SERVER | 代理服务器 URL(可选) | `""` |
|
||||
| PROXY_USERNAME | 代理用户名(可选) | `""` |
|
||||
| PROXY_PASSWORD | 代理密码(可选) | `""` |
|
||||
| BLOCK_MEDIA | 阻止媒体内容 | `true` |
|
||||
| SEARXNG_ENDPOINT | SearXNG 端点(可选) | `""` |
|
||||
| SEARXNG_ENGINES | SearXNG 引擎(可选) | `""` |
|
||||
| SEARXNG_CATEGORIES | SearXNG 分类(可选) | `""` |
|
||||
|
||||
请根据实际需求修改 `.env` 文件。
|
||||
|
||||
## 卷
|
||||
|
||||
- `redis_data`: 用于作业队列和缓存的 Redis 数据存储。
|
||||
- `redis_data`: 用于作业队列和缓存的 Redis 数据存储
|
||||
- `postgres_data`: 用于队列管理和元数据的 PostgreSQL 数据存储
|
||||
|
||||
## 使用方法
|
||||
|
||||
### 启动服务
|
||||
|
||||
```bash
|
||||
docker-compose up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### 访问 API
|
||||
@@ -49,12 +76,22 @@ Firecrawl API 可在以下地址访问:
|
||||
http://localhost:3002
|
||||
```
|
||||
|
||||
### 管理面板
|
||||
|
||||
访问 Bull 队列管理面板:
|
||||
|
||||
```text
|
||||
http://localhost:3002/admin/@/queues
|
||||
```
|
||||
|
||||
如果修改了 `BULL_AUTH_KEY`,请将 `@` 替换为您的值。
|
||||
|
||||
### API 调用示例
|
||||
|
||||
**抓取单个页面:**
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:3002/v0/scrape \
|
||||
curl -X POST http://localhost:3002/v1/scrape \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"url": "https://example.com"
|
||||
@@ -64,12 +101,27 @@ curl -X POST http://localhost:3002/v0/scrape \
|
||||
**爬取网站:**
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:3002/v0/crawl \
|
||||
curl -X POST http://localhost:3002/v1/crawl \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"url": "https://example.com",
|
||||
"crawlerOptions": {
|
||||
"limit": 100
|
||||
"limit": 100
|
||||
}'
|
||||
```
|
||||
|
||||
**提取结构化数据:**
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:3002/v1/extract \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"urls": ["https://example.com"],
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"description": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}'
|
||||
```
|
||||
@@ -80,16 +132,31 @@ curl -X POST http://localhost:3002/v0/crawl \
|
||||
- **网站爬取**: 递归爬取整个网站
|
||||
- **JavaScript 渲染**: 完全支持动态 JavaScript 渲染的页面
|
||||
- **Markdown 输出**: 将网页内容清晰地转换为 markdown
|
||||
- **速率限制**: 内置速率限制以防止滥用
|
||||
- **结构化数据提取**: 使用 JSON Schema 提取数据
|
||||
- **队列管理**: 内置 Bull 作业队列
|
||||
- **速率限制**: 可配置的速率限制
|
||||
- **代理支持**: 所有请求的可选代理配置
|
||||
- **AI 驱动功能**: 可选的 OpenAI 集成以进行高级提取
|
||||
|
||||
## 架构
|
||||
|
||||
此部署使用官方 Firecrawl 架构:
|
||||
|
||||
- **API 服务器**: 处理 HTTP 请求并管理作业队列
|
||||
- **工作进程**: 内置于主容器中,处理抓取作业
|
||||
- **PostgreSQL**: 存储队列元数据和作业信息
|
||||
- **Redis**: 处理作业队列和缓存
|
||||
- **Playwright 服务**: 提供浏览器自动化功能
|
||||
|
||||
## 注意事项
|
||||
|
||||
- 该服务使用 Playwright 进行浏览器自动化,支持复杂的网页
|
||||
- Redis 用于作业队列和缓存
|
||||
- 速率限制可通过环境变量配置
|
||||
- 对于生产环境,考虑扩展工作进程数量
|
||||
- BLOCK_MEDIA 可以通过阻止图像/视频来减少内存使用
|
||||
- 该服务使用官方的 `ghcr.io/firecrawl/firecrawl` 镜像
|
||||
- PostgreSQL 使用官方的 `ghcr.io/firecrawl/nuq-postgres` 镜像进行队列管理(NUQ - Not Quite Bull)
|
||||
- Redis 默认不使用密码(运行在私有网络上)
|
||||
- 对于生产环境,启用 `USE_DB_AUTHENTICATION` 并配置 Supabase
|
||||
- 在生产部署中应更改 `BULL_AUTH_KEY`
|
||||
- AI 功能需要 `OPENAI_API_KEY` 或 `OLLAMA_BASE_URL`
|
||||
- 所有工作进程都在单个 API 容器中使用 harness 模式运行
|
||||
|
||||
## 许可证
|
||||
|
||||
|
||||
@@ -6,68 +6,41 @@ x-default: &default
|
||||
max-size: 100m
|
||||
max-file: "3"
|
||||
|
||||
x-common-env: &common-env
|
||||
REDIS_URL: ${REDIS_URL:-redis://redis:6379}
|
||||
REDIS_RATE_LIMIT_URL: ${REDIS_URL:-redis://redis:6379}
|
||||
PLAYWRIGHT_MICROSERVICE_URL: ${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000/scrape}
|
||||
NUQ_DATABASE_URL: ${NUQ_DATABASE_URL:-postgres://postgres:postgres@nuq-postgres:5432/postgres}
|
||||
USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION:-false}
|
||||
OPENAI_API_KEY: ${OPENAI_API_KEY:-}
|
||||
OPENAI_BASE_URL: ${OPENAI_BASE_URL:-}
|
||||
MODEL_NAME: ${MODEL_NAME:-}
|
||||
MODEL_EMBEDDING_NAME: ${MODEL_EMBEDDING_NAME:-}
|
||||
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-}
|
||||
SLACK_WEBHOOK_URL: ${SLACK_WEBHOOK_URL:-}
|
||||
BULL_AUTH_KEY: ${BULL_AUTH_KEY:-@}
|
||||
TEST_API_KEY: ${TEST_API_KEY:-}
|
||||
POSTHOG_API_KEY: ${POSTHOG_API_KEY:-}
|
||||
POSTHOG_HOST: ${POSTHOG_HOST:-}
|
||||
SUPABASE_ANON_TOKEN: ${SUPABASE_ANON_TOKEN:-}
|
||||
SUPABASE_URL: ${SUPABASE_URL:-}
|
||||
SUPABASE_SERVICE_TOKEN: ${SUPABASE_SERVICE_TOKEN:-}
|
||||
SELF_HOSTED_WEBHOOK_URL: ${SELF_HOSTED_WEBHOOK_URL:-}
|
||||
SERPER_API_KEY: ${SERPER_API_KEY:-}
|
||||
SEARCHAPI_API_KEY: ${SEARCHAPI_API_KEY:-}
|
||||
LOGGING_LEVEL: ${LOGGING_LEVEL:-info}
|
||||
PROXY_SERVER: ${PROXY_SERVER:-}
|
||||
PROXY_USERNAME: ${PROXY_USERNAME:-}
|
||||
PROXY_PASSWORD: ${PROXY_PASSWORD:-}
|
||||
SEARXNG_ENDPOINT: ${SEARXNG_ENDPOINT:-}
|
||||
SEARXNG_ENGINES: ${SEARXNG_ENGINES:-}
|
||||
SEARXNG_CATEGORIES: ${SEARXNG_CATEGORIES:-}
|
||||
|
||||
services:
|
||||
firecrawl:
|
||||
playwright-service:
|
||||
<<: *default
|
||||
image: mendableai/firecrawl:${FIRECRAWL_VERSION:-v1.16.0}
|
||||
ports:
|
||||
- "${FIRECRAWL_PORT_OVERRIDE:-3002}:3002"
|
||||
image: ghcr.io/firecrawl/playwright-service:${PLAYWRIGHT_VERSION:-latest}
|
||||
environment:
|
||||
TZ: ${TZ:-UTC}
|
||||
REDIS_URL: redis://:${REDIS_PASSWORD:-firecrawl}@redis:6379
|
||||
PLAYWRIGHT_MICROSERVICE_URL: http://playwright:3000
|
||||
PORT: 3002
|
||||
NUM_WORKERS_PER_QUEUE: ${NUM_WORKERS_PER_QUEUE:-8}
|
||||
SCRAPE_RATE_LIMIT_TOKEN_BUCKET_SIZE: ${SCRAPE_RATE_LIMIT_TOKEN_BUCKET_SIZE:-20}
|
||||
SCRAPE_RATE_LIMIT_TOKEN_BUCKET_REFILL: ${SCRAPE_RATE_LIMIT_TOKEN_BUCKET_REFILL:-1}
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
playwright:
|
||||
condition: service_started
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '2.0'
|
||||
memory: 4G
|
||||
reservations:
|
||||
cpus: '1.0'
|
||||
memory: 2G
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3002/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
|
||||
redis:
|
||||
<<: *default
|
||||
image: redis:${REDIS_VERSION:-7.4.2-alpine}
|
||||
command: redis-server --requirepass ${REDIS_PASSWORD:-firecrawl} --appendonly yes
|
||||
environment:
|
||||
- TZ=${TZ:-UTC}
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1.0'
|
||||
memory: 512M
|
||||
reservations:
|
||||
cpus: '0.5'
|
||||
memory: 256M
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 3s
|
||||
retries: 3
|
||||
start_period: 5s
|
||||
|
||||
playwright:
|
||||
<<: *default
|
||||
image: mendableai/firecrawl-playwright:${PLAYWRIGHT_VERSION:-latest}
|
||||
environment:
|
||||
TZ: ${TZ:-UTC}
|
||||
PORT: 3000
|
||||
PROXY_SERVER: ${PROXY_SERVER:-}
|
||||
PROXY_USERNAME: ${PROXY_USERNAME:-}
|
||||
@@ -76,11 +49,102 @@ services:
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '2.0'
|
||||
memory: 2G
|
||||
reservations:
|
||||
cpus: '1.0'
|
||||
cpus: "1.0"
|
||||
memory: 1G
|
||||
reservations:
|
||||
cpus: "0.5"
|
||||
memory: 512M
|
||||
|
||||
api:
|
||||
<<: *default
|
||||
image: ghcr.io/firecrawl/firecrawl:${FIRECRAWL_VERSION:-latest}
|
||||
environment:
|
||||
<<: *common-env
|
||||
HOST: 0.0.0.0
|
||||
PORT: ${INTERNAL_PORT:-3002}
|
||||
EXTRACT_WORKER_PORT: ${EXTRACT_WORKER_PORT:-3004}
|
||||
WORKER_PORT: ${WORKER_PORT:-3005}
|
||||
ENV: local
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
playwright-service:
|
||||
condition: service_started
|
||||
nuq-postgres:
|
||||
condition: service_started
|
||||
ports:
|
||||
- "${FIRECRAWL_PORT_OVERRIDE:-3002}:${INTERNAL_PORT:-3002}"
|
||||
command: node dist/src/harness.js --start-docker
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: "2.0"
|
||||
memory: 4G
|
||||
reservations:
|
||||
cpus: "1.0"
|
||||
memory: 2G
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD",
|
||||
"wget",
|
||||
"--no-verbose",
|
||||
"--tries=1",
|
||||
"--spider",
|
||||
"http://localhost:3002/health",
|
||||
]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
|
||||
redis:
|
||||
<<: *default
|
||||
image: redis:${REDIS_VERSION:-alpine}
|
||||
command: redis-server --bind 0.0.0.0
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: "0.5"
|
||||
memory: 512M
|
||||
reservations:
|
||||
cpus: "0.25"
|
||||
memory: 256M
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 3s
|
||||
retries: 3
|
||||
start_period: 5s
|
||||
|
||||
nuq-postgres:
|
||||
<<: *default
|
||||
image: ghcr.io/firecrawl/nuq-postgres:${NUQ_POSTGRES_VERSION:-latest}
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER:-postgres}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres}
|
||||
POSTGRES_DB: ${POSTGRES_DB:-postgres}
|
||||
ports:
|
||||
- "${POSTGRES_PORT_OVERRIDE:-5432}:5432"
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: "1.0"
|
||||
memory: 1G
|
||||
reservations:
|
||||
cpus: "0.5"
|
||||
memory: 512M
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres}"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 10s
|
||||
|
||||
volumes:
|
||||
redis_data:
|
||||
postgres_data:
|
||||
|
||||
Reference in New Issue
Block a user