packages/shared: - Zod v4 schemas for TopicConfig, ProxyConfig, CrawlJob, SearchQuery - Config loader with defaults - Utility functions (createId, formatBytes, normalizeUrl) packages/core: - WebProxyServer: HTTP forward proxy using http-proxy-3 - CacheStore: LRU-based in-memory + disk cache for proxied responses - WarcWriter: WARC file archiving for all proxied content - HTTPS CONNECT tunneling for SSL passthrough - Admin API with /api/status, /api/cache/stats, /api/config packages/indexer: - TopicCrawler: Crawlee CheerioCrawler for topic-based web crawling - ContentExtractor: @mozilla/readability + turndown for clean text/markdown - SearchClient: MeiliSearch integration for full-text search - CrawlScheduler: Interval-based crawl job scheduling apps/proxy: - Main entry point orchestrating all components - Graceful shutdown handling - Proxy-only mode when no topics configured All packages type-check clean. Next.js build passes. Co-Authored-By: UnicornDev <noreply@unicorndev.wtf>
51 lines
1.1 KiB
JSON
51 lines
1.1 KiB
JSON
{
|
|
"server": {
|
|
"host": "0.0.0.0",
|
|
"port": 8080,
|
|
"adminPort": 8081
|
|
},
|
|
"cache": {
|
|
"dir": "./data/cache",
|
|
"maxSizeBytes": 10737418240,
|
|
"maxAge": 86400000,
|
|
"cleanupIntervalMs": 3600000
|
|
},
|
|
"warc": {
|
|
"dir": "./data/warc",
|
|
"maxFileSize": 1073741824,
|
|
"compress": false
|
|
},
|
|
"search": {
|
|
"host": "http://localhost:7700",
|
|
"apiKey": "",
|
|
"indexName": "webproxy-pages"
|
|
},
|
|
"topics": [
|
|
{
|
|
"id": "topic_example",
|
|
"name": "Example Topic",
|
|
"keywords": ["typescript", "node.js"],
|
|
"seedUrls": ["https://nodejs.org/en", "https://www.typescriptlang.org/docs"],
|
|
"allowedDomains": ["nodejs.org", "typescriptlang.org"],
|
|
"blockedDomains": [],
|
|
"schedule": {
|
|
"intervalMinutes": 360,
|
|
"maxPagesPerCrawl": 50,
|
|
"maxDepth": 2,
|
|
"respectRobotsTxt": true,
|
|
"userAgent": "WebProxy/0.1"
|
|
},
|
|
"enabled": false
|
|
}
|
|
],
|
|
"proxy": {
|
|
"timeout": 30000,
|
|
"followRedirects": true,
|
|
"maxRedirects": 5,
|
|
"allowedPorts": [80, 443, 8080, 8443]
|
|
},
|
|
"logging": {
|
|
"level": "info"
|
|
}
|
|
}
|