packages/shared: - Zod v4 schemas for TopicConfig, ProxyConfig, CrawlJob, SearchQuery - Config loader with defaults - Utility functions (createId, formatBytes, normalizeUrl) packages/core: - WebProxyServer: HTTP forward proxy using http-proxy-3 - CacheStore: LRU-based in-memory + disk cache for proxied responses - WarcWriter: WARC file archiving for all proxied content - HTTPS CONNECT tunneling for SSL passthrough - Admin API with /api/status, /api/cache/stats, /api/config packages/indexer: - TopicCrawler: Crawlee CheerioCrawler for topic-based web crawling - ContentExtractor: @mozilla/readability + turndown for clean text/markdown - SearchClient: MeiliSearch integration for full-text search - CrawlScheduler: Interval-based crawl job scheduling apps/proxy: - Main entry point orchestrating all components - Graceful shutdown handling - Proxy-only mode when no topics configured All packages type-check clean. Next.js build passes. Co-Authored-By: UnicornDev <noreply@unicorndev.wtf>
29 lines
650 B
JSON
29 lines
650 B
JSON
{
|
|
"name": "@webproxy/indexer",
|
|
"version": "0.1.0",
|
|
"private": true,
|
|
"main": "./src/index.ts",
|
|
"types": "./src/index.ts",
|
|
"dependencies": {
|
|
"@crawlee/cheerio": "^3.16.0",
|
|
"@crawlee/http": "^3.16.0",
|
|
"@mozilla/readability": "^0.6.0",
|
|
"@webproxy/shared": "workspace:*",
|
|
"cheerio": "^1.2.0",
|
|
"crawlee": "^3.16.0",
|
|
"meilisearch": "^0.55.0",
|
|
"turndown": "^7.2.2",
|
|
"zod": "^4.3.6"
|
|
},
|
|
"scripts": {
|
|
"typecheck": "tsc --noEmit",
|
|
"clean": "rm -rf dist"
|
|
},
|
|
"devDependencies": {
|
|
"@types/node": "^25.3.2",
|
|
"@types/turndown": "^5.0.6",
|
|
"jsdom": "^28.1.0",
|
|
"typescript": "^5.9.3"
|
|
}
|
|
}
|