diff --git a/.claude/launch.json b/.claude/launch.json new file mode 100644 index 0000000..d1246c9 --- /dev/null +++ b/.claude/launch.json @@ -0,0 +1,11 @@ +{ + "version": "0.0.1", + "configurations": [ + { + "name": "nextjs-dev", + "runtimeExecutable": "cmd.exe", + "runtimeArgs": ["/c", "npx next dev --port 3001"], + "port": 3001 + } + ] +} diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 3dfee79..796d7d6 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -43,7 +43,47 @@ "Bash(gh run:*)", "Bash(npx next lint:*)", "Bash(npx eslint .)", - "Bash(echo:*)" + "Bash(echo:*)", + "Bash(npx next build:*)", + "Bash(npx eslint:*)", + "Bash(git add:*)", + "Bash(git commit -m \"$\\(cat <<''EOF''\nfix: suppress remaining ESLint warnings blocking CI\n\n- Disable react-hooks/incompatible-library warnings for RHF watch\\(\\)\n and TanStack useReactTable\\(\\) — these are false positives from the\n React Compiler plugin\n- Remove unused useDebounce import and variable from vendor-table\n\nCo-Authored-By: Claude Opus 4.6 \nEOF\n\\)\")", + "Bash(git push:*)", + "Bash(git fetch:*)", + "Bash(git commit -m \"$\\(cat <<''EOF''\nfix: track migration_lock.toml required by prisma migrate deploy\n\nThe migration_lock.toml was excluded by a gitignore pattern\n\\(prisma/migrations/**/migration_lock.toml\\) which caused\n`prisma migrate deploy` to fail in CI with no lock file present.\nPrisma requires this file to be version-controlled.\n\nCo-Authored-By: Claude Opus 4.6 \nEOF\n\\)\")", + "Bash(git commit -m \"$\\(cat <<''EOF''\nfix: make DATABASE_URL available to all CI steps and add prisma verify\n\n- Move DATABASE_URL to workflow-level env so all steps can access it\n- Add verification step to confirm prisma generate creates output files\n- This should fix TS2307 ''Cannot find module @/generated/prisma'' in CI\n\nCo-Authored-By: Claude Opus 4.6 \nEOF\n\\)\")", + "Bash(git commit:*)", + "Bash(cd:*)", + "Bash(find:*)", + "Bash(curl.exe:*)", + "Bash(powershell -Command \"\\(Invoke-WebRequest -Uri ''http://localhost:3000/telegram'' -UseBasicParsing\\).Content.Length\")", + "Bash(powershell -Command \"\\(Invoke-WebRequest -Uri ''http://localhost:3000/telegram'' -UseBasicParsing\\).Content\")", + "Bash(powershell -Command \"\\(\\(Invoke-WebRequest -Uri ''http://localhost:3000/telegram'' -UseBasicParsing\\).Content\\) -match ''Telegram''\")", + "Bash(powershell -Command \"$c = \\(Invoke-WebRequest -Uri ''http://localhost:3000/telegram'' -UseBasicParsing\\).Content; @\\(''Telegram'',''Manage Telegram'',''Accounts'',''Channels'',''Add Account'',''Sync All'',''Add Channel'',''No accounts configured'',''No channels configured''\\) | ForEach-Object { if \\($c -match [regex]::Escape\\($_\\) { Write-Host \"\"FOUND: $_\" } else { Write-Host \"\"MISSING: $_\" } }\")", + "Bash(node test-ux.mjs:*)", + "Bash(npx prisma db execute:*)", + "Bash(docker.exe ps:*)", + "Bash(node test-content.mjs:*)", + "Bash(node:*)", + "Bash(tee:*)", + "Bash(git ls-remote:*)", + "Bash(npm info:*)", + "Bash(npm view:*)", + "WebFetch(domain:core.telegram.org)", + "Bash(Remove-Item -Recurse -Force \"C:\\\\Users\\\\A00963355\\\\OneDrive - Amaris Zorggroep\\\\Documents\\\\VScodeProjects\\\\DragonsStash\\\\.next\")", + "Bash(Write-Output \"Done\")", + "Bash(powershell -Command \"Remove-Item -Recurse -Force ''C:\\\\Users\\\\A00963355\\\\OneDrive - Amaris Zorggroep\\\\Documents\\\\VScodeProjects\\\\DragonsStash\\\\.next''\")", + "Bash(1:*)", + "Bash(findstr:*)", + "Bash(git reset:*)", + "Bash(git rm:*)", + "Bash(taskkill /F /FI \"WINDOWTITLE eq npm*\")", + "Bash(taskkill:*)", + "Bash(wmic process where \"name=''node.exe''\" get ProcessId,CommandLine)", + "Bash(git -C /mnt/c/Users/A00963355/OneDrive - Amaris Zorggroep/Documents/VScodeProjects/DragonsStash log --oneline -10)", + "Bash(git -C \"C:/Users/A00963355/OneDrive - Amaris Zorggroep/Documents/VScodeProjects/DragonsStash\" status --short)", + "Bash(timeout:*)", + "mcp__Claude_Preview__preview_start" ] } } diff --git a/README.md b/README.md index c85770d..19c982f 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,11 @@ # Dragon's Stash -A self-hosted inventory management system for 3D printing filament, SLA resin, and miniature paints. Built with a dark, data-dense UI inspired by [Spoolman](https://github.com/Donkie/Spoolman). +A self-hosted inventory management system for 3D printing filament, SLA resin, and miniature paints — with an integrated Telegram archive worker that ingests, indexes, and redistributes archive files. Built with a dark, data-dense UI inspired by [Spoolman](https://github.com/Donkie/Spoolman). ## Features +### Inventory Management + - **Filament tracking** with spool weight, material type, color swatches, and usage logging - **SLA resin management** with bottle sizes, resin types, and remaining volume tracking - **Miniature paint inventory** with product lines, finishes, and volume tracking @@ -13,7 +15,18 @@ A self-hosted inventory management system for 3D printing filament, SLA resin, a - **Low-stock alerts** with configurable threshold percentage - **Dark theme** optimized for workshop environments - **Role-based auth** with admin and user roles -- **Docker-ready** for easy self-hosting + +### Telegram Archive Worker + +- **Channel scanning** — monitors configured Telegram channels (including forum topics) for archive files (ZIP, RAR, 7z) +- **Multipart detection** — automatically groups related multipart archives (`.part01.rar`, `.z01`, `.001`, etc.) +- **Content indexing** — extracts file listings from archives and stores them in the database +- **Destination upload** — re-uploads processed archives to a configured destination channel +- **Byte-level splitting** — splits files exceeding Telegram's 2GB limit into uploadable chunks +- **Full repack** — concatenates and re-splits multipart sets where any single part exceeds 2GB +- **Progress tracking** — resumes from the last successfully processed message on each run +- **Upload verification** — confirms files reached the destination before marking them complete +- **Preview matching** — associates photo messages with their corresponding archive sets ## Tech Stack @@ -24,6 +37,8 @@ A self-hosted inventory management system for 3D printing filament, SLA resin, a - **UI**: Tailwind CSS, shadcn/ui, Lucide icons - **Tables**: TanStack Table v8 with server-side pagination - **Validation**: Zod v4 + React Hook Form +- **Worker**: Node.js + TDLib (via tdl) +- **Archive handling**: unrar, zlib ## Quick Start @@ -31,6 +46,7 @@ A self-hosted inventory management system for 3D printing filament, SLA resin, a - Node.js 20+ - PostgreSQL 16+ (or Docker) +- Telegram API credentials (for the worker — get from [my.telegram.org/apps](https://my.telegram.org/apps)) ### Development Setup @@ -50,7 +66,7 @@ npm install 3. Start a PostgreSQL database (using Docker): ```bash -docker compose -f docker-compose.dev.yml up -d +docker compose -f docker-compose.dev.yml up -d db ``` 4. Copy the environment file and update values: @@ -62,8 +78,8 @@ cp .env.example .env.local 5. Run database migrations and seed: ```bash -npx prisma migrate dev -npx prisma db seed +npx prisma migrate dev # Run migrations +npx prisma db seed # Seed with sample data (admin/user accounts + inventory) ``` 6. Start the development server: @@ -76,20 +92,75 @@ npm run dev - **Admin**: admin@dragonsstash.local / password123 - **User**: user@dragonsstash.local / password123 -### Docker Deployment +### Running the Worker in Development + +To also run the Telegram worker alongside the dev database: ```bash +docker compose -f docker-compose.dev.yml up -d +``` + +This starts both the PostgreSQL database and the worker container. The worker reads `TELEGRAM_API_ID` and `TELEGRAM_API_HASH` from your `.env.local` file. + +## Docker Deployment + +### Full Stack (App + Worker + Database) + +Run the entire application from Docker: + +```bash +cp .env.example .env +# Edit .env — set TELEGRAM_API_ID, TELEGRAM_API_HASH, and a secure AUTH_SECRET docker compose up -d ``` -This starts both the application and PostgreSQL database. The app will be available at `http://localhost:3000`. +The app will be available at [http://localhost:3000](http://localhost:3000). -To seed the database on first run: +### Seeding the Database + +To seed the database with sample data on first run: ```bash SEED_DATABASE=true docker compose up -d ``` +This creates default admin/user accounts and sample inventory data. The seed runs once during the app container's entrypoint (before the Next.js server starts). On subsequent runs without `SEED_DATABASE=true`, seeding is skipped automatically. + +You can also seed manually at any time: + +```bash +npx prisma db seed +``` + +### Development Mode (DB + Worker Only) + +If you prefer to run the Next.js app locally with hot reload: + +```bash +docker compose -f docker-compose.dev.yml up -d # Start DB + worker +npm run dev # Start Next.js locally +``` + +### Rebuilding After Code Changes + +```bash +docker compose build && docker compose up -d --force-recreate +``` + +To rebuild only the worker: + +```bash +docker compose build worker && docker compose up -d worker --force-recreate +``` + +### Viewing Logs + +```bash +docker compose logs -f worker # Worker logs +docker compose logs -f app # App logs +docker compose logs -f db # Database logs +``` + ## Project Structure ``` @@ -116,6 +187,16 @@ src/ lib/ # Auth config, Prisma client, constants schemas/ # Zod validation schemas types/ # TypeScript type definitions +worker/ + src/ + archive/ # Archive detection, multipart grouping, byte-level splitting + db/ # Prisma queries for packages, progress tracking + preview/ # Preview image matching + tdlib/ # TDLib client, channel scanning, topic/forum handling + upload/ # Telegram upload logic + util/ # Config, logger + worker.ts # Main processing pipeline + index.ts # Entry point + scheduler prisma/ schema.prisma # Database schema seed.ts # Seed data @@ -125,6 +206,8 @@ prisma/ Environment variables (see `.env.example`): +### Application + | Variable | Description | Default | |----------|-------------|---------| | `DATABASE_URL` | PostgreSQL connection string | Required | @@ -133,6 +216,20 @@ Environment variables (see `.env.example`): | `AUTH_GITHUB_ID` | GitHub OAuth client ID | Optional | | `AUTH_GITHUB_SECRET` | GitHub OAuth client secret | Optional | | `NEXT_PUBLIC_APP_URL` | Public application URL | `http://localhost:3000` | +| `SEED_DATABASE` | Seed the database on app container start | `false` | + +### Telegram Worker + +| Variable | Description | Default | +|----------|-------------|---------| +| `TELEGRAM_API_ID` | Telegram API ID (from [my.telegram.org](https://my.telegram.org/apps)) | Required | +| `TELEGRAM_API_HASH` | Telegram API hash | Required | +| `WORKER_INTERVAL_MINUTES` | Scan interval in minutes | `60` | +| `WORKER_TEMP_DIR` | Temp directory for downloads | `/tmp/zips` | +| `TDLIB_STATE_DIR` | TDLib session state persistence directory | `/data/tdlib` | +| `WORKER_MAX_ZIP_SIZE_MB` | Max archive size to process (MB) | `4096` | +| `MULTIPART_TIMEOUT_HOURS` | Max time span for multipart set parts (0 = no limit) | `0` | +| `LOG_LEVEL` | Worker log level (`debug`, `info`, `warn`, `error`) | `info` | ## Health Check diff --git a/TELEGRAM_INTEGRATION_PLAN.md b/TELEGRAM_INTEGRATION_PLAN.md new file mode 100644 index 0000000..df6524e --- /dev/null +++ b/TELEGRAM_INTEGRATION_PLAN.md @@ -0,0 +1,1192 @@ +# Telegram ZIP Ingestion & Indexing — Integration Plan + +> **Status:** Planning phase — no implementation code yet +> **Date:** 2026-02-24 +> **Base system:** DragonsStash — Next.js 16 / Prisma 7.4 / PostgreSQL 16 / Docker + +--- + +## 1. Architecture Summary + +### Current State + +DragonsStash is a monolithic Next.js 16 App Router application for 3D printing inventory management. It uses: + +- **Prisma 7.4** with `@prisma/adapter-pg` and native `pg.Pool` connection pooling +- **NextAuth.js 5 beta** with JWT strategy (credentials + optional GitHub OAuth) +- **Docker** multi-stage build (`node:20-alpine`), standalone output +- **PostgreSQL 16-alpine** via docker-compose +- **No background job infrastructure** — all mutations are synchronous Server Actions + +### Proposed Architecture + +``` +┌─────────────────────────────────────────────────┐ +│ Docker Compose │ +│ │ +│ ┌──────────────┐ ┌────────────────────────┐ │ +│ │ next-app │ │ telegram-worker │ │ +│ │ (control │ │ (data plane) │ │ +│ │ plane) │ │ │ │ +│ │ Port 3000 │ │ - TDLib per account │ │ +│ │ │ │ - ZIP processing │ │ +│ │ - Admin UI │ │ - Upload to channel │ │ +│ │ - API routes│ │ - Metadata indexing │ │ +│ │ - Auth │ │ │ │ +│ └──────┬───────┘ └───────────┬────────────┘ │ +│ │ │ │ +│ └──────────┬───────────┘ │ +│ │ │ +│ ┌──────────▼──────────┐ │ +│ │ PostgreSQL 16 │ │ +│ │ (shared state) │ │ +│ └─────────────────────┘ │ +│ │ +│ Volumes: │ +│ - postgres_data (persistent) │ +│ - tdlib_state (persistent per account) │ +│ - tmp_zips (ephemeral, bounded) │ +└─────────────────────────────────────────────────┘ +``` + +**Key principle:** The Next.js app is the **control plane** (UI, API, scheduling triggers). The worker container is the **data plane** (TDLib sessions, ZIP download/hash/upload). They communicate exclusively through PostgreSQL. + +--- + +## 2. Proposed Folder Structure + +``` +DragonsStash/ +├── src/ # Existing Next.js app (unchanged) +│ ├── app/ +│ │ ├── (app)/ +│ │ │ ├── telegram/ # NEW — admin UI pages +│ │ │ │ ├── accounts/ # Manage Telegram accounts +│ │ │ │ │ └── [id]/ +│ │ │ │ │ └── auth/ # [Q2] Phone code entry UI for TDLib auth +│ │ │ │ ├── channels/ # Manage source/destination channels +│ │ │ │ ├── packages/ # Browse indexed ZIPs +│ │ │ │ └── ingestion/ # Ingestion run history & status +│ │ │ └── ...existing... +│ │ └── api/ +│ │ ├── ...existing... +│ │ ├── zips/ # NEW — ZIP query endpoints +│ │ │ ├── route.ts # GET /api/zips +│ │ │ ├── search/ +│ │ │ │ └── route.ts # GET /api/zips/search +│ │ │ └── [id]/ +│ │ │ ├── route.ts # GET /api/zips/:id +│ │ │ └── files/ +│ │ │ └── route.ts # GET /api/zips/:id/files +│ │ └── ingestion/ # NEW — ingestion control endpoints +│ │ ├── trigger/ +│ │ │ └── route.ts # POST /api/ingestion/trigger +│ │ └── status/ +│ │ └── route.ts # GET /api/ingestion/status +│ ├── lib/ +│ │ ├── ...existing... +│ │ └── telegram/ # NEW — shared types & DB queries +│ │ ├── queries.ts # Prisma queries for telegram models +│ │ └── types.ts # Shared TypeScript types +│ └── schemas/ +│ ├── ...existing... +│ └── telegram.ts # NEW — Zod schemas for telegram models +│ +├── worker/ # NEW — separate process, NOT bundled by Next.js +│ ├── Dockerfile # Worker-specific Dockerfile (Debian, not Alpine) +│ ├── package.json # Worker-only dependencies (tdl, node-stream-zip, unrar, etc.) +│ ├── tsconfig.json # Worker TS config (Node target, not bundler) +│ ├── src/ +│ │ ├── index.ts # Entry point — spawns per-account workers +│ │ ├── scheduler.ts # Hourly scheduler with jitter +│ │ ├── worker.ts # Single-account worker loop +│ │ ├── tdlib/ +│ │ │ ├── client.ts # TDLib client wrapper +│ │ │ └── download.ts # File download logic +│ │ ├── archive/ # Renamed from zip/ — handles ZIP + RAR +│ │ │ ├── hash.ts # Streaming SHA-256 (single + concatenated multipart) +│ │ │ ├── detect.ts # Archive type & multipart detection +│ │ │ ├── zip-reader.ts # ZIP central directory reader (yauzl) +│ │ │ ├── rar-reader.ts # RAR metadata reader (via unrar binary) +│ │ │ ├── multipart.ts # Multipart grouping & concatenation logic +│ │ │ └── split.ts # Byte-level splitting for >2GB re-upload +│ │ ├── upload/ +│ │ │ └── channel.ts # Upload to private channel +│ │ ├── db/ +│ │ │ ├── client.ts # Prisma client (shared schema) +│ │ │ ├── locks.ts # Advisory lock helpers +│ │ │ └── queries.ts # Worker-specific DB operations +│ │ └── util/ +│ │ ├── logger.ts # Structured logging +│ │ └── config.ts # Environment config +│ └── tests/ +│ └── ... +│ +├── prisma/ +│ ├── schema.prisma # MODIFIED — add telegram models +│ └── migrations/ # NEW migration(s) added +│ +├── docker-compose.yml # MODIFIED — add worker service +├── docker-compose.dev.yml # MODIFIED — add worker service for dev +└── ...existing config files... +``` + +### Boundary Rules + +| Concern | Lives in | Reason | +|---------|----------|--------| +| Telegram admin UI | `src/app/(app)/telegram/` | Part of existing authenticated app | +| API routes for querying ZIPs | `src/app/api/zips/`, `src/app/api/ingestion/` | Served by Next.js, uses existing auth | +| Shared Prisma schema | `prisma/schema.prisma` | Single source of truth for all models | +| Worker process | `worker/` | Separate Node.js process, own Dockerfile, own dependencies | +| TDLib native bindings | `worker/` only | Never in the Next.js bundle | +| ZIP processing | `worker/` only | I/O-heavy, must not block Next.js | + +### Why not a monorepo / separate package? + +- The project is a single repo today. Adding a `worker/` directory is the lightest change. +- The worker shares the Prisma schema but has its own `package.json` — no dependency contamination. +- No need for turborepo/nx complexity for two processes. + +--- + +## 3. Database Schema Proposal + +### New Models + +All new tables are prefixed with `telegram_` or `tg_` to avoid collision with existing models. Added to `prisma/schema.prisma`. + +```prisma +// ────────────────────────────────────────────── +// Telegram Accounts +// ────────────────────────────────────────────── + +model TelegramAccount { + id String @id @default(cuid()) + phone String @unique // Phone number (encrypted at rest recommended) + displayName String? // Friendly label + apiId Int // Telegram API credentials + apiHash String // Telegram API credentials + sessionPath String // Path to TDLib session directory + isActive Boolean @default(true) // Enabled/disabled toggle + authState AuthState @default(PENDING) // [Q2] TDLib auth state for admin UI flow + authCode String? // Temporary: phone code entered via admin UI + lastSeenAt DateTime? // Last successful TDLib connection + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + + // Relations + channelMaps AccountChannelMap[] + ingestionRuns IngestionRun[] + + @@index([isActive]) + @@map("telegram_accounts") +} + +enum AuthState { + PENDING // Account created, not yet authenticated + AWAITING_CODE // Worker requested code, waiting for admin to enter it + AWAITING_PASSWORD // 2FA password needed + AUTHENTICATED // Session active + EXPIRED // Session expired, needs re-auth +} + +// ────────────────────────────────────────────── +// Source & Destination Channels +// ────────────────────────────────────────────── + +model TelegramChannel { + id String @id @default(cuid()) + telegramId BigInt @unique // Telegram's numeric channel ID + title String // Channel title (display only) + type ChannelType // SOURCE or DESTINATION + isActive Boolean @default(true) + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + + // Relations + accountMaps AccountChannelMap[] + packages Package[] // ZIPs sourced from / uploaded to + + @@index([type, isActive]) + @@map("telegram_channels") +} + +enum ChannelType { + SOURCE + DESTINATION +} + +// ────────────────────────────────────────────── +// Account ↔ Channel Mapping (many-to-many) +// ────────────────────────────────────────────── + +model AccountChannelMap { + id String @id @default(cuid()) + accountId String + channelId String + role ChannelRole @default(READER) // READER for source, WRITER for destination + lastProcessedMessageId BigInt? // [Q3] Last Telegram message ID processed for this account+channel + createdAt DateTime @default(now()) + + account TelegramAccount @relation(fields: [accountId], references: [id], onDelete: Cascade) + channel TelegramChannel @relation(fields: [channelId], references: [id], onDelete: Cascade) + + @@unique([accountId, channelId]) + @@index([accountId]) + @@index([channelId]) + @@map("account_channel_map") +} + +enum ChannelRole { + READER + WRITER +} + +// ────────────────────────────────────────────── +// Packages (indexed archives — ZIP + RAR) +// ────────────────────────────────────────────── + +model Package { + id String @id @default(cuid()) + contentHash String @unique // SHA-256 of full content (concatenated for multipart) + fileName String // Original filename (first part if multipart) + fileSize BigInt // Total size in bytes (sum of all parts) + archiveType ArchiveType // ZIP or RAR + sourceChannelId String // Channel it was found in + sourceMessageId BigInt // Telegram message ID (first part if multipart) + destChannelId String? // Channel it was re-uploaded to + destMessageId BigInt? // Telegram message ID after upload (first part) + isMultipart Boolean @default(false) // Was this a multipart archive? + partCount Int @default(1) // Number of parts (1 if single) + fileCount Int @default(0) // Number of entries inside archive + indexedAt DateTime @default(now()) + createdAt DateTime @default(now()) + + // Relations + sourceChannel TelegramChannel @relation(fields: [sourceChannelId], references: [id]) + files PackageFile[] + ingestionRun IngestionRun? @relation(fields: [ingestionRunId], references: [id]) + ingestionRunId String? + + @@index([sourceChannelId]) + @@index([destChannelId]) + @@index([fileName]) + @@index([indexedAt]) + @@index([archiveType]) + @@index([contentHash]) // Already unique, but explicit for search + @@map("packages") +} + +enum ArchiveType { + ZIP + RAR +} + +// ────────────────────────────────────────────── +// Package Files (metadata only — no binary storage) +// ────────────────────────────────────────────── + +model PackageFile { + id String @id @default(cuid()) + packageId String + path String // Full path inside archive + fileName String // Leaf filename + extension String? // Lowercase file extension + compressedSize BigInt @default(0) // Compressed size (from ZIP central dir or RAR header) + uncompressedSize BigInt @default(0) // Uncompressed size + crc32 String? // CRC-32 (available in both ZIP and RAR) + + package Package @relation(fields: [packageId], references: [id], onDelete: Cascade) + + @@index([packageId]) + @@index([extension]) + @@index([fileName]) + @@map("package_files") +} + +// ────────────────────────────────────────────── +// Ingestion Runs (observability) +// ────────────────────────────────────────────── + +model IngestionRun { + id String @id @default(cuid()) + accountId String + status IngestionStatus @default(RUNNING) + startedAt DateTime @default(now()) + finishedAt DateTime? + messagesScanned Int @default(0) + zipsFound Int @default(0) + zipsDuplicate Int @default(0) + zipsIngested Int @default(0) + errorMessage String? + + account TelegramAccount @relation(fields: [accountId], references: [id]) + packages Package[] + + @@index([accountId]) + @@index([status]) + @@index([startedAt]) + @@map("ingestion_runs") +} + +enum IngestionStatus { + RUNNING + COMPLETED + FAILED + CANCELLED +} +``` + +### Index Strategy + +| Table | Index | Purpose | +|-------|-------|---------| +| `packages` | `contentHash` (UNIQUE) | Global deduplication — the core constraint | +| `packages` | `sourceChannelId` | Filter ZIPs by source channel | +| `packages` | `fileName` | Search by filename | +| `packages` | `indexedAt` | Sort by recency | +| `package_files` | `packageId` | Lookup files per package | +| `package_files` | `extension` | Filter by file type (e.g., `.stl`, `.gcode`) | +| `package_files` | `fileName` | Full-text-like search on filenames | +| `ingestion_runs` | `accountId` + `status` | Find running jobs per account | +| `telegram_accounts` | `isActive` | Filter active accounts | +| `telegram_channels` | `type` + `isActive` | Filter active source/destination channels | + +### Full-Text Search Consideration + +For `GET /api/zips/search?q=`, Prisma's `contains` with `mode: insensitive` is sufficient for moderate data volumes (<100k packages). If search becomes a bottleneck: +- Add a PostgreSQL `GIN` index with `pg_trgm` on `package_files.fileName` +- This can be done via a raw SQL migration later without schema changes + +### Migration Approach + +1. Create a new Prisma migration: `npx prisma migrate dev --name add_telegram_models` +2. This is purely additive — no existing tables are modified +3. Deploy with existing `docker-entrypoint.sh` which already runs `prisma migrate deploy` +4. No data migration needed — all new tables start empty + +--- + +## 4. Worker Lifecycle Design + +### 4.1 Process Model + +``` +telegram-worker container +│ +├── index.ts (main process) +│ ├── Reads active accounts from DB +│ ├── Starts scheduler +│ └── Handles SIGTERM/SIGINT for graceful shutdown +│ +├── scheduler.ts +│ ├── Runs on configurable interval (default: 60 min) +│ ├── Adds random jitter (0–5 min) to avoid thundering herd +│ └── For each active account → enqueue work +│ +└── worker.ts (per-account execution) + ├── Acquires PostgreSQL advisory lock (account-specific) + ├── If lock fails → skip (another instance is running) + ├── Creates TDLib client for account + ├── Iterates source channels + ├── For each new message with ZIP attachment: + │ ├── Download ZIP to temp directory + │ ├── Stream SHA-256 hash + │ ├── Check contentHash uniqueness in DB + │ ├── If duplicate → delete temp file, record skip + │ ├── If new: + │ │ ├── Read central directory for metadata + │ │ ├── If >2GB → repack into parts + │ │ ├── Upload to destination channel via TDLib + │ │ ├── Insert Package + PackageFile rows + │ │ └── Delete temp file + │ └── Update ingestion run counters + ├── Finalize ingestion run (status = COMPLETED or FAILED) + └── Release advisory lock +``` + +### 4.2 Advisory Lock Strategy + +PostgreSQL advisory locks prevent concurrent ingestion for the same account, even across multiple worker containers (for future horizontal scaling). + +``` +Lock ID derivation: + lock_id = hash(account.id) → stable 64-bit integer + +Acquisition: + SELECT pg_try_advisory_lock($lock_id) + → Returns true if acquired, false if held by another session + +Release: + SELECT pg_advisory_unlock($lock_id) + → Explicitly released at end of worker run + +Crash recovery: + Advisory locks are session-scoped — if the worker process dies, + the DB connection closes and the lock is automatically released. +``` + +### 4.3 Worker Loop Pseudocode + +``` +async function runWorkerForAccount(accountId: string) { + const lockId = stableHash(accountId) + + // 1. Acquire lock + const acquired = await db.$queryRaw`SELECT pg_try_advisory_lock(${lockId})` + if (!acquired) { + log.info(`Account ${accountId} already locked, skipping`) + return + } + + try { + // 2. Create ingestion run record + const run = await db.ingestionRun.create({ + data: { accountId, status: 'RUNNING' } + }) + + // 3. Initialize TDLib client + const client = await createTdlibClient(account) + + // 4. Get assigned source channels + const channels = await getSourceChannels(accountId) + + for (const channel of channels) { + // 5. Get messages since last processed message + const mapping = await getChannelMapping(accountId, channel.id) + const messages = await getChannelMessages(client, channel.telegramId, mapping.lastProcessedMessageId) + + // 6. Detect archives and group multipart sets + const archiveSets = groupArchiveSets(messages) + // archiveSets = [{ type: 'ZIP'|'RAR', parts: [msg, msg, ...], baseName: '...' }, ...] + + for (const archiveSet of archiveSets) { + run.messagesScanned += archiveSet.parts.length + const tempPaths: string[] = [] + + try { + // 7. Download all parts + for (const part of archiveSet.parts) { + const tempPath = path.join(TEMP_DIR, `${run.id}_${part.id}_${part.fileName}`) + await downloadFile(client, part.fileId, tempPath) + tempPaths.push(tempPath) + } + + // 8. Concatenated SHA-256 hash (streams all parts in order) + const contentHash = await hashParts(tempPaths) + + // 9. Deduplicate + const exists = await db.package.findUnique({ where: { contentHash } }) + if (exists) { + run.zipsDuplicate++ + continue // temp files deleted in finally + } + + // 10. Read archive metadata (without extraction) + let entries: FileEntry[] = [] + if (archiveSet.type === 'ZIP') { + // Read central directory from last part (or reassembled file) + entries = await readZipCentralDirectory(tempPaths) + } else { + // RAR: unrar l -v on first part auto-discovers other parts + entries = await readRarContents(tempPaths[0]) + } + + // 11. Prepare upload — byte-level split if single file >2GB + const totalSize = archiveSet.parts.reduce((sum, p) => sum + p.fileSize, 0n) + let uploadPaths = tempPaths + if (!archiveSet.isMultipart && totalSize > 2n * 1024n * 1024n * 1024n) { + uploadPaths = await byteLevelSplit(tempPaths[0]) + } + + // 12. Upload to destination channel + const destResult = await uploadToChannel(client, destChannel, uploadPaths) + + // 13. Persist metadata + await db.package.create({ + data: { + contentHash, + fileName: archiveSet.parts[0].fileName, + fileSize: totalSize, + archiveType: archiveSet.type, + sourceChannelId: channel.id, + sourceMessageId: archiveSet.parts[0].id, + destChannelId: destChannel.id, + destMessageId: destResult.messageId, + isMultipart: archiveSet.parts.length > 1 || uploadPaths.length > 1, + partCount: uploadPaths.length, + fileCount: entries.length, + ingestionRunId: run.id, + files: { + create: entries.map(e => ({ + path: e.path, + fileName: e.fileName, + extension: e.extension, + compressedSize: e.compressedSize, + uncompressedSize: e.uncompressedSize, + crc32: e.crc32, + })) + } + } + }) + + run.zipsIngested++ + } finally { + // 14. ALWAYS delete all temp files + await deleteFiles(...tempPaths, ...splitPaths) + } + } + + // 15. Update last processed message ID + const lastMsg = messages[messages.length - 1] + if (lastMsg) { + await db.accountChannelMap.update({ + where: { id: mapping.id }, + data: { lastProcessedMessageId: lastMsg.id } + }) + } + } + + // 14. Finalize run + await db.ingestionRun.update({ + where: { id: run.id }, + data: { status: 'COMPLETED', finishedAt: new Date(), ...run.counters } + }) + + } catch (error) { + // 15. Record failure + await db.ingestionRun.update({ + where: { id: run.id }, + data: { status: 'FAILED', finishedAt: new Date(), errorMessage: error.message } + }) + } finally { + // 16. Release lock + await db.$queryRaw`SELECT pg_advisory_unlock(${lockId})` + // 17. Destroy TDLib client + await client?.close() + } +} +``` + +### 4.4 Crash Recovery + +| Scenario | Recovery | +|----------|----------| +| Worker process crashes mid-ingestion | Advisory lock auto-released on DB disconnect. Next scheduled run picks up. Partial `IngestionRun` with `RUNNING` status is detected on startup and marked `FAILED`. | +| DB connection lost | Worker catches error, marks run as `FAILED`, exits. Scheduler retries on next cycle. | +| TDLib rate-limited (420/429) | Exponential backoff with max 5 retries. If exhausted, marks run as `FAILED` with error message. | +| Temp file left on disk | On worker startup, sweep `TEMP_DIR` and delete all files (no state depends on temp files). | +| Duplicate detection race | `contentHash` UNIQUE constraint is the final guard — `INSERT` will fail with unique violation, which is caught and treated as a duplicate. | + +### 4.5 Scheduler Design + +``` +┌─────────────────────────────────────────────┐ +│ Scheduler (runs in main worker process) │ +│ │ +│ setInterval(runCycle, INTERVAL_MS) │ +│ + random jitter: Math.random() * 5min │ +│ │ +│ runCycle(): │ +│ accounts = db.telegramAccount.findMany({ │ +│ where: { isActive: true } │ +│ }) │ +│ for (account of accounts): │ +│ // Sequential, not parallel │ +│ await runWorkerForAccount(account.id) │ +│ │ +│ // Also responds to manual triggers: │ +│ // Polls ingestion_trigger table or uses │ +│ // PostgreSQL LISTEN/NOTIFY │ +└─────────────────────────────────────────────┘ +``` + +**Manual trigger mechanism:** The `POST /api/ingestion/trigger` API route writes a row to a lightweight `ingestion_triggers` table (or uses `pg_notify`). The worker polls this table or listens on a channel. + +--- + +## 5. Docker Strategy + +### 5.1 Recommended Architecture: Separate Containers + +**Reason:** TDLib requires Debian/Ubuntu (not Alpine) and native compilation. The Next.js app uses `node:20-alpine`. Mixing them bloats the app image and introduces risk. + +### 5.2 Worker Dockerfile + +```dockerfile +# worker/Dockerfile +FROM node:20-bookworm-slim AS base + +# TDLib system dependencies + unrar for RAR metadata reading +RUN apt-get update && apt-get install -y \ + libssl-dev \ + zlib1g-dev \ + unrar \ + && rm -rf /var/lib/apt/lists/* + +# Pre-built TDLib binary (or build from source in multi-stage) +COPY --from=tdlib-builder /usr/local/lib/libtdjson.so /usr/local/lib/ +RUN ldconfig + +WORKDIR /app + +COPY package.json package-lock.json ./ +RUN npm ci --production + +COPY prisma/ ./prisma/ +RUN npx prisma generate + +COPY dist/ ./dist/ + +# Non-root user +RUN addgroup --system worker && adduser --system --ingroup worker worker +USER worker + +# Volumes +VOLUME ["/data/tdlib", "/tmp/zips"] + +CMD ["node", "dist/index.js"] +``` + +### 5.3 Updated docker-compose.yml + +```yaml +services: + app: + # ...existing config unchanged... + depends_on: + db: + condition: service_healthy + + worker: + build: + context: . + dockerfile: worker/Dockerfile + environment: + DATABASE_URL: ${DATABASE_URL} + WORKER_INTERVAL_MINUTES: 60 + WORKER_TEMP_DIR: /tmp/zips + TDLIB_STATE_DIR: /data/tdlib + LOG_LEVEL: info + volumes: + - tdlib_state:/data/tdlib # Persistent TDLib sessions + - tmp_zips:/tmp/zips # Ephemeral ZIP processing + depends_on: + db: + condition: service_healthy + restart: unless-stopped + # Resource limits (optional but recommended) + deploy: + resources: + limits: + memory: 1G + reservations: + memory: 256M + + db: + # ...existing config unchanged... + +volumes: + postgres_data: + tdlib_state: + tmp_zips: # Disk-backed (not tmpfs) — 16GB RAM is not enough for large archives +``` + +### 5.4 Volume Plan + +| Volume | Type | Purpose | Lifecycle | +|--------|------|---------|-----------| +| `postgres_data` | Persistent | Database storage | Permanent | +| `tdlib_state` | Persistent | TDLib session databases (one subdirectory per account) | Permanent — losing this requires re-authentication | +| `tmp_zips` | Disk-backed volume | Temporary archive download/processing | Worker sweeps on startup + deletes after each archive. Not RAM-bound. | + +### 5.5 Environment Variable Separation + +| Variable | App | Worker | Description | +|----------|-----|--------|-------------| +| `DATABASE_URL` | Yes | Yes | Shared PostgreSQL connection | +| `AUTH_SECRET` | Yes | No | NextAuth session secret | +| `NEXT_PUBLIC_APP_URL` | Yes | No | Public URL | +| `WORKER_INTERVAL_MINUTES` | No | Yes | Scheduler interval | +| `WORKER_TEMP_DIR` | No | Yes | Temp ZIP storage path | +| `TDLIB_STATE_DIR` | No | Yes | TDLib session storage path | +| `WORKER_MAX_ZIP_SIZE_MB` | No | Yes | Max ZIP size before rejecting (default: 4096) | +| `TELEGRAM_API_KEY` | Yes | No | [Q4] Static API key for external app access (MVP) | +| `LOG_LEVEL` | Optional | Yes | Logging verbosity | + +--- + +## 6. Archive Processing Strategy + +Supports **ZIP** and **RAR** archives, including multipart variants of both. + +### 6.1 Supported Archive Formats + +| Format | Single file | Multipart patterns | Metadata reader | +|--------|------------|-------------------|-----------------| +| ZIP | `.zip` | `.zip.001`/`.002`/... or `.z01`/`.z02`/...+`.zip` | `yauzl` — reads central directory without extraction | +| RAR | `.rar` | `.part1.rar`/`.part2.rar`/... or `.r00`/`.r01`/...+`.rar` | `unrar l -v` — lists contents via CLI binary | + +### 6.2 Processing Pipeline + +``` +Messages scanned in source channel + │ + ▼ +Detect archive attachments (.zip, .rar, .z01, .r01, .part1.rar, etc.) + │ + ▼ +Group multipart sets (by filename pattern + message proximity) + │ + ├── Single-file archive → download one file + └── Multipart set → download ALL parts + │ + ▼ +Concatenated SHA-256 hash (stream all parts in order through hasher) + │ ┌──────────────────────┐ + ▼ │ contentHash exists? │ +Check contentHash against packages │ YES → delete all temp │ + │ │ files & skip │ + │ NO (new archive) └──────────────────────┘ + ▼ +Reassemble if multipart (concatenate parts into single file) + │ + ▼ +Read archive metadata + ├── ZIP → yauzl central directory reader (no extraction) + └── RAR → `unrar l -v ` (lists contents without extraction) + │ + ▼ +Prepare for upload + ├── Total size ≤2GB → upload as-is (single file) + ├── Total size >2GB → byte-level split into ≤2GB parts + └── Originally multipart → re-upload original parts as-is + │ + ▼ +Upload to destination channel via TDLib + │ + ▼ +Insert Package + PackageFile rows in single transaction + │ + ▼ +DELETE all temp files immediately (in finally block) +``` + +### 6.3 Multipart Grouping Logic + +Archives split into multiple parts arrive as **separate Telegram messages**. The worker must group them before processing. + +**Detection rules:** + +``` +For a message with filename "pack.zip.003": + → base = "pack.zip", part = 3, type = ZIP_NUMBERED + +For a message with filename "pack.z02": + → base = "pack", part = 2, type = ZIP_LEGACY (final part is "pack.zip") + +For a message with filename "pack.part2.rar": + → base = "pack", part = 2, type = RAR_PART + +For a message with filename "pack.r01": + → base = "pack", part = 1, type = RAR_LEGACY (final part is "pack.rar") +``` + +**Grouping strategy:** +1. Scan channel messages and build a map: `base_name → [parts]` +2. A multipart set is complete when parts form a contiguous sequence (1..N) +3. **Timeout:** If parts span >24 hours of messages, treat as incomplete — log warning, skip +4. Incomplete sets are retried on next ingestion run (parts may still be uploading to source) + +### 6.4 Concatenated Hashing + +For multipart archives, all parts are streamed through a single SHA-256 hasher **in order**: + +```typescript +import { createReadStream } from 'fs' +import { createHash } from 'crypto' +import { pipeline } from 'stream/promises' +import { PassThrough } from 'stream' + +async function hashParts(filePaths: string[]): Promise { + const hash = createHash('sha256') + for (const filePath of filePaths) { + await pipeline(createReadStream(filePath), new PassThrough({ transform(chunk, _, cb) { + hash.update(chunk) + cb() + }})) + } + return hash.digest('hex') +} +``` + +- Memory: O(1) — streams 64KB chunks regardless of total size +- For single files, this is equivalent to hashing one file +- Part order is determined by the numeric suffix (sorted ascending) + +### 6.5 Metadata Reading + +**ZIP (via `yauzl`):** +- Opens the (reassembled) ZIP file +- Iterates central directory entries at the end of the file — **no extraction** +- Collects: `path`, `fileName`, `extension`, `compressedSize`, `uncompressedSize`, `crc32` +- Memory: O(n) where n = number of entries (metadata only, typically <1MB) + +**RAR (via `unrar` binary):** +- Runs `unrar l -v ` as a child process +- Parses stdout for file list with sizes and CRC +- **No extraction** — `l` (list) mode only +- Collects same fields: `path`, `fileName`, `extension`, `compressedSize`, `uncompressedSize`, `crc32` +- Requires `unrar` installed in worker Docker image + +**Fallback:** If metadata reading fails (corrupted archive, unsupported format), the package is still ingested with `fileCount = 0` and no `PackageFile` rows. A warning is logged. The archive is still hashed, uploaded, and deduplicated — just without internal file listing. + +### 6.6 Re-upload Strategy + +| Scenario | Action | +|----------|--------| +| Single file ≤2GB | Upload as-is | +| Single file >2GB | Byte-level split into ≤2GB chunks, upload each as separate message | +| Originally multipart, each part ≤2GB | Re-upload each original part as-is (preserving original split) | +| Originally multipart, any part >2GB | This shouldn't happen (Telegram's own limit) — log error, skip | + +**Byte-level splitting** uses `fs.createReadStream` with `start`/`end` byte offsets. Parts are named `filename.zip.001`, `.002`, etc. No decompression or recompression involved. + +### 6.7 Disk Usage Guarantees + +- **Bounded by `WORKER_MAX_ZIP_SIZE_MB` env var** (default: 4096MB per archive set) +- **One archive set per worker at a time** (sequential per account) +- **Immediate deletion** of all temp files after upload or on any error (in `finally` block) +- **Startup cleanup:** Worker sweeps `TEMP_DIR` on boot + +**Worst-case disk usage scenarios:** + +| Scenario | Temp disk needed | Notes | +|----------|-----------------|-------| +| Single 2GB ZIP | 2GB | Trivial | +| Single 10GB ZIP → split for upload | ~20GB (original + parts) | Needs free disk space | +| Multipart RAR (10 × 2GB parts) | 20GB (parts) | No reassembly needed for RAR | +| Multipart ZIP (10 × 2GB parts, no reassembly) | 20GB (parts only) | Central dir read from last part | +| Multipart ZIP (10 × 2GB parts) + reassembly fallback | ~40GB (parts + reassembled) | Only if last-part read fails | + +Disk space is bounded per-archive-set, not globally. Worker processes one set at a time and deletes everything before moving to the next. Ensure the host has sufficient free disk space for the largest expected archive set. + +**Optimization for multipart archives:** Avoid reassembly into a single file when possible: +- **Hashing:** Stream parts in order — no reassembly needed +- **ZIP metadata:** Read central directory from last part only (it's stored at the end) — avoids full reassembly in most cases +- **RAR metadata:** Run `unrar l -v` on the first part (it auto-discovers subsequent parts if co-located) — no reassembly needed +- **Full reassembly** only needed if the above approaches fail (corrupted or non-standard split) + +--- + +## 7. API Route Plan + +### 7.1 Endpoint List + +| Method | Path | Description | Auth Required | +|--------|------|-------------|---------------| +| `GET` | `/api/zips` | List packages with pagination & filters | Yes | +| `GET` | `/api/zips/:id` | Get single package details | Yes | +| `GET` | `/api/zips/:id/files` | List files inside a package | Yes | +| `GET` | `/api/zips/search?q=` | Search packages/files by name | Yes | +| `POST` | `/api/ingestion/trigger` | Trigger manual ingestion for account(s) | Yes (ADMIN) | +| `GET` | `/api/ingestion/status` | Get current ingestion status | Yes | + +### 7.2 Endpoint Details + +#### `GET /api/zips` + +**Query Parameters:** +``` +?page=1 (default: 1) +&limit=25 (default: 25, max: 100) +&channelId=... (filter by source channel) +&sortBy=indexedAt (indexedAt | fileName | fileSize) +&order=desc (asc | desc) +``` + +**Response:** +```json +{ + "items": [ + { + "id": "clx...", + "fileName": "model-pack-v2.zip", + "fileSize": 1073741824, + "contentHash": "a1b2c3...", + "archiveType": "ZIP", + "fileCount": 47, + "sourceChannel": { "id": "...", "title": "3D Models Group" }, + "isMultipart": false, + "indexedAt": "2026-02-24T10:00:00Z" + } + ], + "pagination": { + "page": 1, + "limit": 25, + "total": 1234, + "totalPages": 50 + } +} +``` + +#### `GET /api/zips/:id` + +**Response:** +```json +{ + "id": "clx...", + "fileName": "model-pack-v2.zip", + "fileSize": 1073741824, + "contentHash": "a1b2c3d4...", + "archiveType": "ZIP", + "fileCount": 47, + "sourceChannel": { "id": "...", "title": "3D Models Group" }, + "destChannel": { "id": "...", "title": "Archive Channel" }, + "destMessageId": 12345, + "isMultipart": false, + "partCount": 1, + "indexedAt": "2026-02-24T10:00:00Z", + "ingestionRun": { "id": "...", "startedAt": "..." } +} +``` + +#### `GET /api/zips/:id/files` + +**Query Parameters:** +``` +?page=1 +&limit=50 (default: 50, max: 500) +&extension=stl (filter by extension) +``` + +**Response:** +```json +{ + "items": [ + { + "id": "clx...", + "path": "models/dragon/body.stl", + "fileName": "body.stl", + "extension": "stl", + "compressedSize": 524288, + "uncompressedSize": 1048576, + "crc32": "deadbeef" + } + ], + "pagination": { ... } +} +``` + +#### `GET /api/zips/search?q=` + +**Query Parameters:** +``` +?q=dragon (search term — matches filename, file paths) +&page=1 +&limit=25 +&searchIn=files (packages | files | both; default: both) +``` + +**Response:** Same format as `GET /api/zips` but with additional `matchedFiles` count per package. + +#### `POST /api/ingestion/trigger` + +**Request Body:** +```json +{ + "accountId": "clx..." // Optional — omit to trigger all active accounts +} +``` + +**Response:** +```json +{ + "triggered": true, + "accountIds": ["clx..."], + "message": "Ingestion queued for 1 account(s)" +} +``` + +**Implementation:** Inserts into a `ingestion_triggers` table or sends `pg_notify('ingestion_trigger', accountId)`. Returns immediately — does NOT wait for ingestion to complete. + +#### `GET /api/ingestion/status` + +**Response:** +```json +{ + "accounts": [ + { + "id": "clx...", + "displayName": "Bot Account 1", + "isActive": true, + "lastRun": { + "id": "clx...", + "status": "COMPLETED", + "startedAt": "2026-02-24T09:00:00Z", + "finishedAt": "2026-02-24T09:12:34Z", + "messagesScanned": 150, + "zipsFound": 12, + "zipsDuplicate": 3, + "zipsIngested": 9 + }, + "currentRun": null + } + ] +} +``` + +### 7.3 Authentication Strategy + +**For existing admin UI routes:** Use the existing NextAuth.js session — these routes are already behind the middleware auth check. + +**For external app API access (MVP):** Single static API key via `TELEGRAM_API_KEY` env var. API route middleware checks `X-API-Key` header against this value first, then falls back to NextAuth session. No DB table needed. Upgrade to dynamic key management later if needed. + +### 7.4 Security Considerations + +- All endpoints require authentication (no public access) +- `POST /api/ingestion/trigger` requires ADMIN role +- Rate limiting on search endpoint (prevent abuse) +- No binary data returned — metadata only +- Input validation with Zod on all query parameters +- Pagination enforced with max limits to prevent large responses + +--- + +## 8. Environment Audit Checklist + +### Node.js & Runtime + +| Check | Status | Notes | +|-------|--------|-------| +| Node.js version | **20.x** (current) | Compatible with TDLib bindings. Node 20 is LTS until 2026-10. | +| `node:20-alpine` for Next.js | **OK** | Keep as-is for app container | +| `node:20-bookworm-slim` for worker | **Required** | TDLib needs glibc, not musl (Alpine). Debian Bookworm is the right base. | +| ES module support | **OK** | tsconfig targets ES2017, worker can use same | + +### TDLib + +| Check | Status | Notes | +|-------|--------|-------| +| TDLib Node.js binding | Use `tdl` npm package | Wraps `libtdjson.so` via FFI | +| `libtdjson.so` availability | Must compile or use pre-built | Pre-built for Debian available via GitHub releases | +| Required OS packages | `libssl-dev`, `zlib1g-dev`, `unrar` | TDLib runtime + RAR metadata reading. Build needs `cmake`, `g++`, `git` (multi-stage). | +| TDLib state persistence | Volume-mount `/data/tdlib` | One subdirectory per account. Losing this = re-auth required. | +| TDLib version | Use latest stable (1.8.x+) | Check `tdl` compatibility matrix | + +### PostgreSQL + +| Check | Status | Notes | +|-------|--------|-------| +| PostgreSQL version | **16-alpine** (current) | Fully compatible, supports advisory locks, `pg_trgm`, `BigInt` | +| Connection pooling | **`pg.Pool`** via `@prisma/adapter-pg` | Worker needs its own pool (separate process). Default pool size = 10 is fine. | +| Max connections | Check `max_connections` | Default is 100. App + worker + Prisma Studio = ~30 connections typical. Safe. | +| Advisory lock support | **Built-in** | `pg_try_advisory_lock()` / `pg_advisory_unlock()` — no extensions needed | +| `BigInt` column support | **OK** | Prisma 7.4 supports `BigInt` natively. Telegram IDs need `BigInt`. | +| `pg_trgm` extension | **Not installed** | Optional — only needed if full-text search on filenames becomes a requirement | + +### Docker + +| Check | Status | Notes | +|-------|--------|-------| +| Multi-service compose | **Supported** | Current compose already has `app` + `db`. Adding `worker` is straightforward. | +| tmpfs volume | **Supported** | For bounded temp ZIP storage | +| Health checks | **Exists for `db`** | Add health check for worker (e.g., check DB connectivity) | +| Resource limits | **Not set** | Recommend adding `memory: 1G` limit for worker | + +### Disk I/O + +| Check | Status | Notes | +|-------|--------|-------| +| Temp archive storage | Disk-backed Docker volume | Not RAM-bound. Cleaned by worker on startup + after each set. | +| Max single set I/O | Depends on archive size | One set at a time. Bounded by `WORKER_MAX_ZIP_SIZE_MB`. | +| TDLib state I/O | Low | Session DB is small (<10MB per account) | +| PostgreSQL I/O | Moderate | Package metadata is small. 10k packages ≈ few MB. | + +### Logging + +| Check | Status | Notes | +|-------|--------|-------| +| App logging | Next.js default (console) | No change needed | +| Worker logging | **Needs structured logging** | Use `pino` for JSON-structured logs. Docker captures stdout. | +| Log volume | Moderate | Log ingestion run summaries, not per-message details | + +--- + +## 9. Risk Assessment + +### Risk Matrix + +| # | Risk | Likelihood | Impact | Mitigation | +|---|------|-----------|--------|------------| +| R1 | **Telegram rate limiting (420 FLOOD_WAIT)** | High | Medium — ingestion paused | Exponential backoff with jitter. Respect `retry_after` from Telegram. Sequential processing per account. Configurable inter-message delay (default: 1s). | +| R2 | **DB contention on `packages.contentHash` unique check** | Low | Low — single writer per account | Advisory locks serialize writes per account. Unique constraint handles races at DB level. No read contention (separate queries). | +| R3 | **Multi-account race on same ZIP** | Medium | Low — duplicate insert fails safely | `contentHash` UNIQUE constraint is the ultimate guard. Worker catches unique violation and treats as duplicate. No data corruption possible. | +| R4 | **TDLib session invalidation** | Medium | High — account becomes unusable | Monitor `lastSeenAt`. Alert in admin UI when >2 hours stale. Document re-authentication procedure. Store session in persistent volume. | +| R5 | **Worker OOM on large ZIP** | Low | Medium — worker crashes | Streaming hash (O(1) memory). Central directory reading is O(entries) not O(file-size). tmpfs bound prevents unbounded growth. Container memory limit enforced. | +| R6 | **Temp files not cleaned up** | Low | Low — bounded by tmpfs | `finally` blocks on all paths. Startup sweep of temp dir. tmpfs auto-cleared on container restart. | +| R7 | **TDLib native dependency breakage on upgrade** | Medium | High — worker won't start | Pin TDLib version. Test upgrades in CI. Multi-stage Docker build isolates build dependencies. | +| R8 | **PostgreSQL connection exhaustion** | Low | High — all services affected | Worker uses own pool (max 5 connections). App pool unchanged. Monitor with `pg_stat_activity`. Total < 50% of `max_connections`. | +| R9 | **Schema migration breaks existing app** | Very Low | High — production down | New tables only — no modifications to existing tables. Test migration on staging first. Rollback = drop new tables. | +| R10 | **Telegram account banned** | Medium | Medium — one account lost | Use multiple accounts across channels. Don't exceed rate limits. Implement per-account disable toggle. Monitor in admin UI. | +| R11 | **Multipart ZIP reassembly failure** | Low | Low — single ZIP skipped | Log error, mark run as partial. Don't block ingestion of other ZIPs. Admin can investigate specific failures. | +| R12 | **Database grows too large** | Low (long-term) | Medium | `PackageFile` is metadata only (~200 bytes/row). 100k ZIPs × 100 files = 10M rows ≈ 2GB. Add retention policy if needed later. | + +### Critical Path Risks (ordered by priority) + +1. **TDLib compilation & runtime in Docker** — This is the highest-risk item. TDLib native compilation is complex. Mitigate by using pre-built binaries from `https://github.com/nicknisi/tdlib-builds` or building in a dedicated multi-stage Dockerfile. + +2. **Telegram rate limits** — Primary bottleneck for ingestion throughput. Cannot be eliminated, only managed. Design must be rate-limit-aware from day one. + +3. **TDLib session persistence** — Losing session state means manual re-authentication (phone code). Volume mount is critical and must survive container rebuilds. + +--- + +## 10. Assumptions & Open Questions + +### Assumptions Made + +| # | Assumption | Impact if Wrong | +|---|-----------|----------------| +| A1 | One Telegram account maps to multiple source channels | Schema supports this via `AccountChannelMap` | +| A2 | One shared destination channel for all re-uploads | If multiple destinations needed, `Package.destChannelId` already supports it | +| A3 | ZIP files are single-message attachments (not split across messages by Telegram) | Multipart detection logic may need adjustment | +| A4 | Worker runs 24/7 in Docker alongside the app | If serverless/on-demand execution needed, architecture changes | +| A5 | All Telegram accounts share the same `apiId`/`apiHash` | If not, the schema already supports per-account credentials | +| A6 | No need for real-time notifications (webhooks) on new ZIPs | If needed, add a webhook/event system later | +| A7 | Admin users manage Telegram config; regular users only query ZIPs | Role-based access matches existing `ADMIN`/`USER` enum | + +### Decisions (Confirmed) + +| # | Question | Decision | Implications | +|---|----------|----------|-------------| +| Q1 | Prisma schema sharing | **Shared** — single `prisma/schema.prisma` | Worker copies `prisma/` at build time. One migration path. Worker runs `prisma generate` in its own Dockerfile. | +| Q2 | TDLib authentication flow | **Admin UI** — Next.js page for phone code entry | Requires an `auth_state` column on `TelegramAccount` + a polling/SSE mechanism. Worker watches DB for auth completion. New page at `src/app/(app)/telegram/accounts/[id]/auth/`. | +| Q3 | Last processed message tracking | **In DB** — `lastProcessedMessageId BigInt?` on `AccountChannelMap` | Worker updates after each channel scan. Allows manual reset for re-processing. Survives TDLib session loss. | +| Q4 | API key management | **Env var for MVP** — single `TELEGRAM_API_KEY` in `.env` | API routes check `X-API-Key` header against env var. No DB table needed yet. Upgrade to dynamic keys later if needed. | +| Q5 | File search strategy | **Prisma `contains`** (case-insensitive `ILIKE`) | No extra extensions or indexes. Revisit with `pg_trgm` GIN index if search exceeds ~100k `PackageFile` rows. | +| Q6 | Repack strategy for >2GB | **Byte-level split** — raw file splitting into ≤2GB chunks | No decompression. Fast. Uses `fs.createReadStream` with `start`/`end` options. Parts named `filename.zip.001`, `.002`, etc. | +| Q7 | Worker package structure | **Standalone** `package.json` | Own `node_modules`, own lockfile. No npm workspace config. Simpler Docker builds. Copies `prisma/` from root at build time. | +| Q8 | Archive format support | **ZIP + RAR (full index)** | Both formats supported. RAR metadata via `unrar l -v` binary (no extraction). Worker Dockerfile includes `unrar` package. `ArchiveType` enum on `Package` model. | +| Q9 | Multipart hashing strategy | **Concatenate then hash** | All parts streamed in order through a single SHA-256 hasher. True content-level dedup. Disk must hold all parts simultaneously. Volume is disk-backed (not tmpfs) to avoid RAM pressure. | +| Q10 | Multipart metadata indexing | **Yes, full indexing** | ZIP: read central directory from last part. RAR: `unrar l -v` on first part auto-discovers siblings. Fallback: ingest without file listing if reading fails. | + +--- + +## Summary of Changes to Existing System + +| File/Area | Change Type | Risk | +|-----------|-------------|------| +| `prisma/schema.prisma` | **Add** new models (no modify) | Very Low | +| `prisma/migrations/` | **Add** new migration | Very Low | +| `docker-compose.yml` | **Modify** — add worker service + volumes | Low | +| `docker-compose.dev.yml` | **Modify** — add worker service | Low | +| `src/app/(app)/telegram/` | **Add** new pages | None — new route group | +| `src/app/api/zips/` | **Add** new API routes | None — new routes | +| `src/app/api/ingestion/` | **Add** new API routes | None — new routes | +| `src/lib/telegram/` | **Add** shared types & queries | None — new files | +| `worker/` | **Add** entire new directory | None — isolated process | +| Existing code | **No changes** | Zero risk | + +**Total impact on existing system: Minimal.** All changes are additive. No existing files are modified except `prisma/schema.prisma` (additive models) and `docker-compose.yml` (additive service). diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 6fd65a8..6566bf5 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -19,6 +19,8 @@ services: build: context: . dockerfile: worker/Dockerfile + env_file: + - .env.local environment: - DATABASE_URL=postgresql://dragons:stash@db:5432/dragonsstash - WORKER_INTERVAL_MINUTES=5 @@ -26,8 +28,6 @@ services: - TDLIB_STATE_DIR=/data/tdlib - WORKER_MAX_ZIP_SIZE_MB=4096 - LOG_LEVEL=debug - - TELEGRAM_API_ID=${TELEGRAM_API_ID} - - TELEGRAM_API_HASH=${TELEGRAM_API_HASH} volumes: - tdlib_dev_state:/data/tdlib - tmp_dev_zips:/tmp/zips diff --git a/prisma/migrations/20260225095116_channel_fetch_and_global_dest/migration.sql b/prisma/migrations/20260225095116_channel_fetch_and_global_dest/migration.sql new file mode 100644 index 0000000..db5eaf2 --- /dev/null +++ b/prisma/migrations/20260225095116_channel_fetch_and_global_dest/migration.sql @@ -0,0 +1,30 @@ +-- CreateEnum +CREATE TYPE "FetchStatus" AS ENUM ('PENDING', 'IN_PROGRESS', 'COMPLETED', 'FAILED'); + +-- CreateTable +CREATE TABLE "global_settings" ( + "key" VARCHAR(64) NOT NULL, + "value" TEXT NOT NULL, + "updatedAt" TIMESTAMP(3) NOT NULL, + + CONSTRAINT "global_settings_pkey" PRIMARY KEY ("key") +); + +-- CreateTable +CREATE TABLE "channel_fetch_requests" ( + "id" TEXT NOT NULL, + "accountId" TEXT NOT NULL, + "status" "FetchStatus" NOT NULL DEFAULT 'PENDING', + "resultJson" TEXT, + "error" TEXT, + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updatedAt" TIMESTAMP(3) NOT NULL, + + CONSTRAINT "channel_fetch_requests_pkey" PRIMARY KEY ("id") +); + +-- CreateIndex +CREATE INDEX "channel_fetch_requests_accountId_status_idx" ON "channel_fetch_requests"("accountId", "status"); + +-- AddForeignKey +ALTER TABLE "channel_fetch_requests" ADD CONSTRAINT "channel_fetch_requests_accountId_fkey" FOREIGN KEY ("accountId") REFERENCES "telegram_accounts"("id") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 66f567b..3705232 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -384,6 +384,13 @@ enum IngestionStatus { CANCELLED } +enum FetchStatus { + PENDING + IN_PROGRESS + COMPLETED + FAILED +} + model TelegramAccount { id String @id @default(cuid()) phone String @unique @@ -397,6 +404,7 @@ model TelegramAccount { channelMaps AccountChannelMap[] ingestionRuns IngestionRun[] + fetchRequests ChannelFetchRequest[] @@index([isActive]) @@map("telegram_accounts") @@ -535,3 +543,26 @@ model TopicProgress { @@index([accountChannelMapId]) @@map("topic_progress") } + +model GlobalSetting { + key String @id @db.VarChar(64) + value String @db.Text + updatedAt DateTime @updatedAt + + @@map("global_settings") +} + +model ChannelFetchRequest { + id String @id @default(cuid()) + accountId String + status FetchStatus @default(PENDING) + resultJson String? @db.Text + error String? + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + + account TelegramAccount @relation(fields: [accountId], references: [id], onDelete: Cascade) + + @@index([accountId, status]) + @@map("channel_fetch_requests") +} diff --git a/src/app/(app)/telegram/_components/account-columns.tsx b/src/app/(app)/telegram/_components/account-columns.tsx index e831e89..eab9a93 100644 --- a/src/app/(app)/telegram/_components/account-columns.tsx +++ b/src/app/(app)/telegram/_components/account-columns.tsx @@ -9,6 +9,7 @@ import { Link2, Play, KeyRound, + Download, } from "lucide-react"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; @@ -36,6 +37,7 @@ interface AccountColumnsProps { onViewLinks: (id: string) => void; onTriggerSync: (id: string) => void; onEnterCode: (account: AccountRow) => void; + onFetchChannels: (id: string) => void; } export function getAccountColumns({ @@ -45,6 +47,7 @@ export function getAccountColumns({ onViewLinks, onTriggerSync, onEnterCode, + onFetchChannels, }: AccountColumnsProps): ColumnDef[] { return [ { @@ -157,6 +160,13 @@ export function getAccountColumns({ Manage Channels + onFetchChannels(row.original.id)} + disabled={row.original.authState !== "AUTHENTICATED"} + > + + Fetch Channels + onTriggerSync(row.original.id)}> Sync Now diff --git a/src/app/(app)/telegram/_components/accounts-tab.tsx b/src/app/(app)/telegram/_components/accounts-tab.tsx index 92405e8..a51cd2f 100644 --- a/src/app/(app)/telegram/_components/accounts-tab.tsx +++ b/src/app/(app)/telegram/_components/accounts-tab.tsx @@ -1,12 +1,14 @@ "use client"; -import { useState, useTransition } from "react"; +import { useState, useEffect, useTransition } from "react"; +import { useRouter } from "next/navigation"; import { Plus, Play } from "lucide-react"; import { toast } from "sonner"; import { getAccountColumns } from "./account-columns"; import { AccountModal } from "./account-modal"; import { AccountLinksDrawer } from "./account-links-drawer"; import { AuthCodeDialog } from "./auth-code-dialog"; +import { ChannelPickerDialog } from "./channel-picker-dialog"; import { deleteAccount, toggleAccountActive, triggerIngestion } from "../actions"; import { DataTable } from "@/components/shared/data-table"; import { DeleteDialog } from "@/components/shared/delete-dialog"; @@ -19,12 +21,27 @@ interface AccountsTabProps { } export function AccountsTab({ accounts }: AccountsTabProps) { + const router = useRouter(); const [isPending, startTransition] = useTransition(); const [modalOpen, setModalOpen] = useState(false); const [editAccount, setEditAccount] = useState(); const [deleteId, setDeleteId] = useState(null); const [linksAccountId, setLinksAccountId] = useState(null); const [authCodeAccount, setAuthCodeAccount] = useState(null); + const [fetchChannelsAccountId, setFetchChannelsAccountId] = useState(null); + + // Auto-refresh when accounts are in transitional states (PENDING, AWAITING_CODE, AWAITING_PASSWORD) + const hasTransitional = accounts.some( + (a) => a.authState === "PENDING" || a.authState === "AWAITING_CODE" || a.authState === "AWAITING_PASSWORD" + ); + + useEffect(() => { + if (!hasTransitional) return; + const interval = setInterval(() => { + router.refresh(); + }, 3_000); + return () => clearInterval(interval); + }, [hasTransitional, router]); const columns = getAccountColumns({ onEdit: (account) => { @@ -48,6 +65,7 @@ export function AccountsTab({ accounts }: AccountsTabProps) { else toast.error(result.error); }); }, + onFetchChannels: (id) => setFetchChannelsAccountId(id), }); const { table } = useDataTable({ @@ -135,6 +153,14 @@ export function AccountsTab({ accounts }: AccountsTabProps) { if (!open) setAuthCodeAccount(null); }} /> + + { + if (!open) setFetchChannelsAccountId(null); + }} + /> ); } diff --git a/src/app/(app)/telegram/_components/channel-columns.tsx b/src/app/(app)/telegram/_components/channel-columns.tsx index 04f69ad..98bdfca 100644 --- a/src/app/(app)/telegram/_components/channel-columns.tsx +++ b/src/app/(app)/telegram/_components/channel-columns.tsx @@ -3,9 +3,10 @@ import { type ColumnDef } from "@tanstack/react-table"; import { MoreHorizontal, - Pencil, Trash2, Power, + ArrowDownToLine, + ArrowUpFromLine, } from "lucide-react"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; @@ -19,15 +20,15 @@ import { import type { ChannelRow } from "@/lib/telegram/admin-queries"; interface ChannelColumnsProps { - onEdit: (channel: ChannelRow) => void; onToggleActive: (id: string) => void; onDelete: (id: string) => void; + onSetType: (id: string, type: "SOURCE" | "DESTINATION") => void; } export function getChannelColumns({ - onEdit, onToggleActive, onDelete, + onSetType, }: ChannelColumnsProps): ColumnDef[] { return [ { @@ -105,10 +106,21 @@ export function getChannelColumns({ - onEdit(row.original)}> - - Edit - + {row.original.type === "SOURCE" ? ( + onSetType(row.original.id, "DESTINATION")} + > + + Set as Destination + + ) : ( + onSetType(row.original.id, "SOURCE")} + > + + Set as Source + + )} onToggleActive(row.original.id)} > diff --git a/src/app/(app)/telegram/_components/channel-picker-dialog.tsx b/src/app/(app)/telegram/_components/channel-picker-dialog.tsx new file mode 100644 index 0000000..eee8d58 --- /dev/null +++ b/src/app/(app)/telegram/_components/channel-picker-dialog.tsx @@ -0,0 +1,337 @@ +"use client"; + +import { useState, useEffect, useCallback, useTransition } from "react"; +import { Loader2, Search, CheckSquare, Square, Radio } from "lucide-react"; +import { toast } from "sonner"; +import { saveChannelSelections } from "../actions"; +import { + Dialog, + DialogContent, + DialogDescription, + DialogHeader, + DialogTitle, + DialogFooter, +} from "@/components/ui/dialog"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Badge } from "@/components/ui/badge"; +import { Checkbox } from "@/components/ui/checkbox"; +import { ScrollArea } from "@/components/ui/scroll-area"; + +interface FetchedChannel { + chatId: string; + title: string; + type: "channel" | "supergroup"; + isForum: boolean; + memberCount: number | null; + alreadyLinked: boolean; + existingChannelId: string | null; +} + +interface ChannelPickerDialogProps { + accountId: string | null; + open: boolean; + onOpenChange: (open: boolean) => void; +} + +type FetchState = + | { phase: "idle" } + | { phase: "fetching"; requestId?: string } + | { phase: "loaded"; channels: FetchedChannel[] } + | { phase: "error"; message: string }; + +export function ChannelPickerDialog({ + accountId, + open, + onOpenChange, +}: ChannelPickerDialogProps) { + const [isPending, startTransition] = useTransition(); + const [fetchState, setFetchState] = useState({ phase: "idle" }); + const [selected, setSelected] = useState>(new Set()); + const [search, setSearch] = useState(""); + + // Start fetching when dialog opens + useEffect(() => { + if (!open || !accountId) { + setFetchState({ phase: "idle" }); + setSelected(new Set()); + setSearch(""); + return; + } + + let mounted = true; + + const startFetch = async () => { + setFetchState({ phase: "fetching" }); + + try { + // POST to create a fetch request + const postRes = await fetch( + `/api/telegram/accounts/${accountId}/fetch-channels`, + { method: "POST" } + ); + + if (!postRes.ok) { + let message = `Server error (${postRes.status})`; + try { + const err = await postRes.json(); + message = err.error || message; + } catch { + // response wasn't JSON + } + if (mounted) setFetchState({ phase: "error", message }); + return; + } + + const { requestId } = await postRes.json(); + if (mounted) setFetchState({ phase: "fetching", requestId }); + + // Poll for result + const poll = async () => { + for (let i = 0; i < 30; i++) { + await new Promise((r) => setTimeout(r, 2000)); + if (!mounted) return; + + const getRes = await fetch( + `/api/telegram/accounts/${accountId}/fetch-channels?requestId=${requestId}` + ); + if (!getRes.ok) continue; + + const data = await getRes.json(); + if (data.status === "COMPLETED") { + if (mounted) { + // Filter out already-linked channels + const available = (data.channels as FetchedChannel[]).filter( + (ch) => !ch.alreadyLinked + ); + setFetchState({ phase: "loaded", channels: available }); + } + return; + } else if (data.status === "FAILED") { + if (mounted) { + setFetchState({ + phase: "error", + message: data.error || "Fetch failed", + }); + } + return; + } + } + + if (mounted) { + setFetchState({ phase: "error", message: "Fetch timed out" }); + } + }; + + await poll(); + } catch (err) { + if (mounted) { + const message = err instanceof Error ? err.message : "Network error"; + setFetchState({ phase: "error", message: `Network error: ${message}` }); + } + } + }; + + startFetch(); + return () => { mounted = false; }; + }, [open, accountId]); + + const channels = + fetchState.phase === "loaded" ? fetchState.channels : []; + + const filteredChannels = channels.filter((ch) => + ch.title.toLowerCase().includes(search.toLowerCase()) + ); + + const toggleChannel = (chatId: string) => { + setSelected((prev) => { + const next = new Set(prev); + if (next.has(chatId)) { + next.delete(chatId); + } else { + next.add(chatId); + } + return next; + }); + }; + + const selectAll = () => { + setSelected(new Set(filteredChannels.map((ch) => ch.chatId))); + }; + + const deselectAll = () => { + setSelected(new Set()); + }; + + const handleSave = () => { + if (!accountId || selected.size === 0) return; + + const selectedChannels = channels + .filter((ch) => selected.has(ch.chatId)) + .map((ch) => ({ + telegramId: ch.chatId, + title: ch.title, + isForum: ch.isForum, + })); + + startTransition(async () => { + const result = await saveChannelSelections(accountId, selectedChannels); + if (result.success) { + toast.success(`${selectedChannels.length} channel(s) linked as source`); + onOpenChange(false); + } else { + toast.error(result.error); + } + }); + }; + + return ( + + + + Select Source Channels + + Choose which channels to scan for archives. Already-linked channels + are hidden. + + + + {fetchState.phase === "fetching" && ( +
+ +

+ Fetching channels from Telegram... +

+

+ This may take a few seconds +

+
+ )} + + {fetchState.phase === "error" && ( +
+

{fetchState.message}

+ +
+ )} + + {fetchState.phase === "loaded" && ( + <> + {channels.length === 0 ? ( +
+

+ All channels are already linked to this account. +

+
+ ) : ( + <> + {/* Search + bulk actions */} +
+
+ + setSearch(e.target.value)} + className="pl-9" + /> +
+ + +
+ +

+ {filteredChannels.length} channel(s) available + {selected.size > 0 && ` \u2014 ${selected.size} selected`} +

+ + {/* Channel list */} + +
+ {filteredChannels.map((ch) => ( + + ))} +
+
+ + )} + + )} + + + + + +
+
+ ); +} diff --git a/src/app/(app)/telegram/_components/channels-tab.tsx b/src/app/(app)/telegram/_components/channels-tab.tsx index 69deb6a..0c12b23 100644 --- a/src/app/(app)/telegram/_components/channels-tab.tsx +++ b/src/app/(app)/telegram/_components/channels-tab.tsx @@ -1,32 +1,29 @@ "use client"; import { useState, useTransition } from "react"; -import { Plus } from "lucide-react"; import { toast } from "sonner"; import { getChannelColumns } from "./channel-columns"; -import { ChannelModal } from "./channel-modal"; -import { deleteChannel, toggleChannelActive } from "../actions"; +import { DestinationCard } from "./destination-card"; +import { + deleteChannel, + toggleChannelActive, + setChannelType, +} from "../actions"; import { DataTable } from "@/components/shared/data-table"; import { DeleteDialog } from "@/components/shared/delete-dialog"; -import { Button } from "@/components/ui/button"; -import type { ChannelRow } from "@/lib/telegram/admin-queries"; +import type { ChannelRow, GlobalDestination } from "@/lib/telegram/admin-queries"; import { useDataTable } from "@/hooks/use-data-table"; interface ChannelsTabProps { channels: ChannelRow[]; + globalDestination: GlobalDestination; } -export function ChannelsTab({ channels }: ChannelsTabProps) { +export function ChannelsTab({ channels, globalDestination }: ChannelsTabProps) { const [isPending, startTransition] = useTransition(); - const [modalOpen, setModalOpen] = useState(false); - const [editChannel, setEditChannel] = useState(); const [deleteId, setDeleteId] = useState(null); const columns = getChannelColumns({ - onEdit: (channel) => { - setEditChannel(channel); - setModalOpen(true); - }, onToggleActive: (id) => { startTransition(async () => { const result = await toggleChannelActive(id); @@ -35,6 +32,13 @@ export function ChannelsTab({ channels }: ChannelsTabProps) { }); }, onDelete: (id) => setDeleteId(id), + onSetType: (id, type) => { + startTransition(async () => { + const result = await setChannelType(id, type); + if (result.success) toast.success(`Channel set as ${type.toLowerCase()}`); + else toast.error(result.error); + }); + }, }); const { table } = useDataTable({ @@ -58,30 +62,17 @@ export function ChannelsTab({ channels }: ChannelsTabProps) { return (
-
- -
+ + + {channels.length > 0 && ( +

+ Source channels are added per-account via the "Fetch Channels" button on the Accounts tab. +

+ )} - - { - setModalOpen(open); - if (!open) setEditChannel(undefined); - }} - channel={editChannel} + emptyMessage="No channels yet. Use "Fetch Channels" on an account to discover and add source channels." /> ({ phase: "idle" }); + + // Poll for worker result when creating + useEffect(() => { + if (createState.phase !== "creating" || !createState.requestId) return; + + let mounted = true; + const requestId = createState.requestId; + + const poll = async () => { + for (let i = 0; i < 60; i++) { + await new Promise((r) => setTimeout(r, 2000)); + if (!mounted) return; + + try { + const res = await fetch( + `/api/telegram/worker-request?requestId=${requestId}` + ); + if (!res.ok) continue; + + const data = await res.json(); + if (data.status === "COMPLETED" && data.result) { + if (mounted) { + setCreateState({ + phase: "done", + title: data.result.title, + telegramId: data.result.telegramId, + }); + toast.success(`Telegram group "${data.result.title}" created and set as destination!`); + setCreateOpen(false); + // Refresh the page to show the new destination + window.location.reload(); + } + return; + } else if (data.status === "FAILED") { + if (mounted) { + setCreateState({ + phase: "error", + message: data.error || "Worker failed to create the group", + }); + } + return; + } + } catch { + // Network blip — keep polling + } + } + + if (mounted) { + setCreateState({ phase: "error", message: "Timed out waiting for the worker" }); + } + }; + + poll(); + return () => { mounted = false; }; + }, [createState]); + + const handleCreate = () => { + if (!title.trim()) return; + + startTransition(async () => { + const result = await createDestinationViaWorker(title.trim()); + if (result.success) { + setCreateState({ phase: "creating", requestId: result.data.requestId }); + } else { + setCreateState({ phase: "error", message: result.error ?? "Unknown error" }); + } + }); + }; + + const handleOpenChange = (open: boolean) => { + setCreateOpen(open); + if (!open) { + // Reset state when closing (unless actively creating) + if (createState.phase !== "creating") { + setCreateState({ phase: "idle" }); + } + } + }; + + if (!destination) { + return ( + <> + + +
+ +
+

+ No destination channel configured +

+

+ Create a private Telegram group that all accounts will write + archives to. Requires at least one authenticated account. +

+
+
+ +
+
+ + + + ); + } + + return ( + <> + + +
+ +
+
+

{destination.title}

+ + DESTINATION + +
+
+ ID: {destination.telegramId} + {destination.inviteLink && ( + + + Invite link active + + )} +
+
+
+ +
+
+ + + + ); +} + +function CreateDestinationDialog({ + open, + onOpenChange, + title, + setTitle, + onSubmit, + createState, + isPending, +}: { + open: boolean; + onOpenChange: (open: boolean) => void; + title: string; + setTitle: (v: string) => void; + onSubmit: () => void; + createState: CreateState; + isPending: boolean; +}) { + const isCreating = createState.phase === "creating"; + + return ( + + + + Create Destination Channel + + A private Telegram group will be created automatically using one of + your authenticated accounts. All accounts will write archives here. + + + + {isCreating ? ( +
+ +

+ Creating Telegram group... +

+

+ This may take a few seconds +

+
+ ) : ( +
+ {createState.phase === "error" && ( +
+

{createState.message}

+
+ )} + +
+ + setTitle(e.target.value)} + /> +

+ This will be the name of the Telegram group. You can rename it later in Telegram. +

+
+
+ )} + + + + + +
+
+ ); +} diff --git a/src/app/(app)/telegram/_components/telegram-admin.tsx b/src/app/(app)/telegram/_components/telegram-admin.tsx index 8923caf..5312dc0 100644 --- a/src/app/(app)/telegram/_components/telegram-admin.tsx +++ b/src/app/(app)/telegram/_components/telegram-admin.tsx @@ -4,14 +4,23 @@ import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs"; import { PageHeader } from "@/components/shared/page-header"; import { AccountsTab } from "./accounts-tab"; import { ChannelsTab } from "./channels-tab"; -import type { AccountRow, ChannelRow } from "@/lib/telegram/admin-queries"; +import { WorkerStatusPanel } from "./worker-status-panel"; +import type { AccountRow, ChannelRow, GlobalDestination } from "@/lib/telegram/admin-queries"; +import type { IngestionAccountStatus } from "@/lib/telegram/types"; interface TelegramAdminProps { accounts: AccountRow[]; channels: ChannelRow[]; + ingestionStatus: IngestionAccountStatus[]; + globalDestination: GlobalDestination; } -export function TelegramAdmin({ accounts, channels }: TelegramAdminProps) { +export function TelegramAdmin({ + accounts, + channels, + ingestionStatus, + globalDestination, +}: TelegramAdminProps) { return (
+ + @@ -33,7 +44,7 @@ export function TelegramAdmin({ accounts, channels }: TelegramAdminProps) { - +
diff --git a/src/app/(app)/telegram/_components/worker-status-panel.tsx b/src/app/(app)/telegram/_components/worker-status-panel.tsx new file mode 100644 index 0000000..282ebef --- /dev/null +++ b/src/app/(app)/telegram/_components/worker-status-panel.tsx @@ -0,0 +1,340 @@ +"use client"; + +import { useEffect, useState, useCallback } from "react"; +import { + Loader2, + CheckCircle2, + XCircle, + Clock, + Radio, + AlertTriangle, + RefreshCw, +} from "lucide-react"; +import { Card, CardContent } from "@/components/ui/card"; +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; +import { cn } from "@/lib/utils"; +import type { IngestionAccountStatus } from "@/lib/telegram/types"; + +interface WorkerStatusPanelProps { + initialStatus: IngestionAccountStatus[]; +} + +const AUTH_STATE_CONFIG: Record< + string, + { label: string; color: string; icon: string } +> = { + PENDING: { label: "Pending", color: "text-yellow-500", icon: "clock" }, + AWAITING_CODE: { + label: "Awaiting Code", + color: "text-orange-500", + icon: "alert", + }, + AWAITING_PASSWORD: { + label: "Awaiting Password", + color: "text-orange-500", + icon: "alert", + }, + AUTHENTICATED: { label: "Connected", color: "text-emerald-500", icon: "check" }, + EXPIRED: { label: "Expired", color: "text-red-500", icon: "x" }, +}; + +export function WorkerStatusPanel({ initialStatus }: WorkerStatusPanelProps) { + const [accounts, setAccounts] = useState(initialStatus); + const [error, setError] = useState(false); + const [nextRunCountdown, setNextRunCountdown] = useState(null); + + // Find active run + const activeRun = accounts.find((a) => a.currentRun); + const isRunning = !!activeRun; + + // Poll for status + useEffect(() => { + let timer: ReturnType; + let mounted = true; + + const poll = async () => { + try { + const res = await fetch("/api/ingestion/status"); + if (!res.ok) throw new Error("fetch failed"); + const data = await res.json(); + if (mounted) { + setAccounts(data.accounts ?? []); + setError(false); + } + } catch { + if (mounted) setError(true); + } + if (mounted) { + const interval = accounts.some((a) => a.currentRun) ? 2_000 : 10_000; + timer = setTimeout(poll, interval); + } + }; + + timer = setTimeout(poll, 2_000); + return () => { + mounted = false; + clearTimeout(timer); + }; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [isRunning]); + + // Countdown timer to next run + useEffect(() => { + if (isRunning) { + setNextRunCountdown(null); + return; + } + + // Estimate next run based on last run finish time + interval (5 min + up to 5 min jitter) + const lastFinished = accounts + .filter((a) => a.lastRun?.finishedAt) + .map((a) => new Date(a.lastRun!.finishedAt!).getTime()) + .sort((a, b) => b - a)[0]; + + if (!lastFinished) { + setNextRunCountdown(null); + return; + } + + const intervalMs = 5 * 60 * 1000; // 5 min base + const estimatedNext = lastFinished + intervalMs; + + const tick = () => { + const remaining = estimatedNext - Date.now(); + if (remaining <= 0) { + setNextRunCountdown("any moment..."); + } else { + const mins = Math.floor(remaining / 60_000); + const secs = Math.floor((remaining % 60_000) / 1_000); + setNextRunCountdown( + mins > 0 ? `~${mins}m ${secs}s` : `~${secs}s` + ); + } + }; + + tick(); + const interval = setInterval(tick, 1_000); + return () => clearInterval(interval); + }, [isRunning, accounts]); + + if (accounts.length === 0 && !error) { + return ( + + + +
+

No accounts configured

+

+ Add a Telegram account below to get started. You'll need your + phone number and the API credentials in your .env.local file. +

+
+
+
+ ); + } + + return ( + + + {/* Account status row */} +
+ {accounts.map((account) => { + const config = AUTH_STATE_CONFIG[account.authState] ?? AUTH_STATE_CONFIG.PENDING; + return ( +
+ {config.icon === "check" && ( + + )} + {config.icon === "clock" && ( + + )} + {config.icon === "alert" && ( + + )} + {config.icon === "x" && ( + + )} + + {account.displayName || account.phone} + + + {config.label} + +
+ ); + })} +
+ + {/* Divider */} +
+ + {/* Worker activity */} + {error ? ( +
+ + Could not reach worker status +
+ ) : isRunning && activeRun?.currentRun ? ( + + ) : ( + + )} + + + ); +} + +function RunningStatus({ + run, +}: { + run: NonNullable; +}) { + return ( +
+
+ + + {run.currentActivity ?? "Working..."} + +
+ + {/* Progress bar for downloads */} + {run.downloadPercent != null && run.downloadPercent > 0 && ( +
+
+
+
+ + {run.downloadPercent}% + +
+ )} + + {/* Stats line */} +
+ {run.currentChannel && ( + + Channel: {run.currentChannel} + + )} + {run.totalFiles != null && run.currentFileNum != null && ( + + Archive{" "} + + {run.currentFileNum}/{run.totalFiles} + + + )} + {run.zipsIngested > 0 && ( + + {run.zipsIngested} ingested + + )} + {run.zipsDuplicate > 0 && ( + + {run.zipsDuplicate} skipped + + )} +
+
+ ); +} + +function IdleStatus({ + accounts, + nextRunCountdown, +}: { + accounts: IngestionAccountStatus[]; + nextRunCountdown: string | null; +}) { + const lastRun = accounts + .filter((a) => a.lastRun) + .sort( + (a, b) => + new Date(b.lastRun!.finishedAt ?? b.lastRun!.startedAt).getTime() - + new Date(a.lastRun!.finishedAt ?? a.lastRun!.startedAt).getTime() + )[0]?.lastRun; + + const hasAuthenticated = accounts.some( + (a) => a.authState === "AUTHENTICATED" + ); + + return ( +
+
+ {lastRun ? ( + <> + {lastRun.status === "FAILED" ? ( + + ) : ( + + )} + + {lastRun.status === "FAILED" + ? `Last sync failed ${getTimeAgo(lastRun.finishedAt ?? lastRun.startedAt)}` + : `Last sync ${getTimeAgo(lastRun.finishedAt ?? lastRun.startedAt)} — ${lastRun.zipsIngested} new, ${lastRun.zipsDuplicate} skipped, ${lastRun.messagesScanned} messages`} + + + ) : hasAuthenticated ? ( + <> + + + Waiting for first sync... + + + ) : accounts.some((a) => a.authState === "PENDING") ? ( + <> + + + Pending account detected — worker will send an SMS code on the next cycle. Please wait... + + + ) : accounts.some( + (a) => a.authState === "AWAITING_CODE" || a.authState === "AWAITING_PASSWORD" + ) ? ( + <> + + + Waiting for you to enter the auth code — check the Accounts table below + + + ) : ( + <> + + + Worker idle — authenticate an account to start syncing + + + )} +
+ + {nextRunCountdown && hasAuthenticated && ( +
+ + + Next: {nextRunCountdown} + +
+ )} +
+ ); +} + +function getTimeAgo(dateStr: string): string { + const diff = Date.now() - new Date(dateStr).getTime(); + const mins = Math.floor(diff / 60_000); + if (mins < 1) return "just now"; + if (mins < 60) return `${mins}m ago`; + const hours = Math.floor(mins / 60); + if (hours < 24) return `${hours}h ago`; + const days = Math.floor(hours / 24); + return `${days}d ago`; +} diff --git a/src/app/(app)/telegram/actions.ts b/src/app/(app)/telegram/actions.ts index 65984af..18b0edf 100644 --- a/src/app/(app)/telegram/actions.ts +++ b/src/app/(app)/telegram/actions.ts @@ -258,6 +258,44 @@ export async function deleteChannel(id: string): Promise { } } +export async function setChannelType( + id: string, + type: "SOURCE" | "DESTINATION" +): Promise { + const admin = await requireAdmin(); + if (!admin.success) return admin; + + const existing = await prisma.telegramChannel.findUnique({ where: { id } }); + if (!existing) return { success: false, error: "Channel not found" }; + + try { + await prisma.telegramChannel.update({ + where: { id }, + data: { type }, + }); + revalidatePath(REVALIDATE_PATH); + return { success: true, data: undefined }; + } catch { + return { success: false, error: "Failed to update channel type" }; + } +} + +export async function triggerChannelSync(): Promise { + const admin = await requireAdmin(); + if (!admin.success) return admin; + + try { + // Signal the worker to do a channel sync via pg_notify + await prisma.$queryRawUnsafe( + `SELECT pg_notify('channel_sync', 'requested')` + ); + revalidatePath(REVALIDATE_PATH); + return { success: true, data: undefined }; + } catch { + return { success: false, error: "Failed to trigger channel sync" }; + } +} + // ── Account-Channel link actions ── export async function linkChannel( @@ -317,24 +355,42 @@ export async function triggerIngestion( if (!admin.success) return admin; try { - const res = await fetch( - `${process.env.NEXT_PUBLIC_APP_URL || "http://localhost:3000"}/api/ingestion/trigger`, - { - method: "POST", - headers: { - "Content-Type": "application/json", - "X-API-Key": process.env.INGESTION_API_KEY || "", - }, - body: JSON.stringify({ accountId }), - } - ); + // Find eligible accounts + const where: { isActive: boolean; authState: "AUTHENTICATED"; id?: string } = { + isActive: true, + authState: "AUTHENTICATED", + }; + if (accountId) where.id = accountId; - if (!res.ok) { - const data = await res.json().catch(() => ({})); - return { - success: false, - error: (data as { error?: string }).error || "Failed to trigger ingestion", - }; + const accounts = await prisma.telegramAccount.findMany({ + where, + select: { id: true }, + }); + + if (accounts.length === 0) { + return { success: false, error: "No eligible accounts found" }; + } + + // Create ingestion runs — the worker picks these up + for (const account of accounts) { + const existing = await prisma.ingestionRun.findFirst({ + where: { accountId: account.id, status: "RUNNING" }, + }); + if (!existing) { + await prisma.ingestionRun.create({ + data: { accountId: account.id, status: "RUNNING" }, + }); + } + } + + // pg_notify for immediate worker pickup + try { + await prisma.$queryRawUnsafe( + `SELECT pg_notify('ingestion_trigger', $1)`, + accounts.map((a) => a.id).join(",") + ); + } catch { + // Best-effort } revalidatePath(REVALIDATE_PATH); @@ -343,3 +399,227 @@ export async function triggerIngestion( return { success: false, error: "Failed to trigger ingestion" }; } } + +// ── Channel selection (from fetch results) ── + +export async function saveChannelSelections( + accountId: string, + channels: { telegramId: string; title: string; isForum: boolean }[] +): Promise { + const admin = await requireAdmin(); + if (!admin.success) return admin; + + const existing = await prisma.telegramAccount.findUnique({ + where: { id: accountId }, + }); + if (!existing) return { success: false, error: "Account not found" }; + + try { + let linked = 0; + for (const ch of channels) { + // Upsert the channel record + const channel = await prisma.telegramChannel.upsert({ + where: { telegramId: BigInt(ch.telegramId) }, + create: { + telegramId: BigInt(ch.telegramId), + title: ch.title, + type: "SOURCE", + isForum: ch.isForum, + }, + update: { + title: ch.title, + isForum: ch.isForum, + }, + }); + + // Create READER link (idempotent) + try { + await prisma.accountChannelMap.create({ + data: { accountId, channelId: channel.id, role: "READER" }, + }); + linked++; + } catch (err: unknown) { + // Unique constraint = already linked, that's fine + if (!(err instanceof Error && err.message.includes("Unique constraint"))) { + throw err; + } + } + } + + revalidatePath(REVALIDATE_PATH); + return { success: true, data: undefined }; + } catch { + return { success: false, error: "Failed to save channel selections" }; + } +} + +// ── Global destination channel ── + +export async function setGlobalDestination( + channelId: string +): Promise { + const admin = await requireAdmin(); + if (!admin.success) return admin; + + const channel = await prisma.telegramChannel.findUnique({ + where: { id: channelId }, + }); + if (!channel) return { success: false, error: "Channel not found" }; + + try { + // Set the channel type to DESTINATION + await prisma.telegramChannel.update({ + where: { id: channelId }, + data: { type: "DESTINATION" }, + }); + + // Save as global destination + await prisma.globalSetting.upsert({ + where: { key: "destination_channel_id" }, + create: { key: "destination_channel_id", value: channelId }, + update: { value: channelId }, + }); + + // Auto-create WRITER links for all active authenticated accounts + const accounts = await prisma.telegramAccount.findMany({ + where: { isActive: true, authState: "AUTHENTICATED" }, + select: { id: true }, + }); + + for (const account of accounts) { + try { + await prisma.accountChannelMap.create({ + data: { accountId: account.id, channelId, role: "WRITER" }, + }); + } catch { + // Already linked — ignore + } + } + + // Signal worker to generate invite link + try { + await prisma.$queryRawUnsafe( + `SELECT pg_notify('generate_invite', $1)`, + channelId + ); + } catch { + // Best-effort + } + + revalidatePath(REVALIDATE_PATH); + return { success: true, data: undefined }; + } catch { + return { success: false, error: "Failed to set global destination" }; + } +} + +export async function createDestinationChannel( + telegramId: string, + title: string +): Promise> { + const admin = await requireAdmin(); + if (!admin.success) return admin; + + try { + // Create the channel as DESTINATION + const channel = await prisma.telegramChannel.upsert({ + where: { telegramId: BigInt(telegramId) }, + create: { + telegramId: BigInt(telegramId), + title, + type: "DESTINATION", + }, + update: { + title, + type: "DESTINATION", + }, + }); + + // Set as global destination + await prisma.globalSetting.upsert({ + where: { key: "destination_channel_id" }, + create: { key: "destination_channel_id", value: channel.id }, + update: { value: channel.id }, + }); + + // Auto-create WRITER links for all active authenticated accounts + const accounts = await prisma.telegramAccount.findMany({ + where: { isActive: true, authState: "AUTHENTICATED" }, + select: { id: true }, + }); + + for (const account of accounts) { + try { + await prisma.accountChannelMap.create({ + data: { accountId: account.id, channelId: channel.id, role: "WRITER" }, + }); + } catch { + // Already linked + } + } + + // Signal worker to generate invite link + try { + await prisma.$queryRawUnsafe( + `SELECT pg_notify('generate_invite', $1)`, + channel.id + ); + } catch { + // Best-effort + } + + revalidatePath(REVALIDATE_PATH); + return { success: true, data: { id: channel.id } }; + } catch (err: unknown) { + if ( + err instanceof Error && + err.message.includes("Unique constraint failed") + ) { + return { success: false, error: "A channel with this Telegram ID already exists" }; + } + return { success: false, error: "Failed to create destination channel" }; + } +} + +/** + * Request the worker to create a new Telegram supergroup as the destination. + * Uses ChannelFetchRequest as a generic DB-mediated request with pg_notify. + * Returns the requestId so the UI can poll for completion. + */ +export async function createDestinationViaWorker( + title: string +): Promise> { + const admin = await requireAdmin(); + if (!admin.success) return admin; + + if (!title.trim()) return { success: false, error: "Title is required" }; + + try { + // Need at least one authenticated account for TDLib + const hasAccount = await prisma.telegramAccount.findFirst({ + where: { isActive: true, authState: "AUTHENTICATED" }, + select: { id: true }, + }); + if (!hasAccount) { + return { success: false, error: "At least one authenticated account is needed to create a Telegram group" }; + } + + // Create a fetch request to track progress (reusing the model as a generic worker request) + const fetchRequest = await prisma.channelFetchRequest.create({ + data: { + accountId: hasAccount.id, + status: "PENDING", + }, + }); + + // Signal worker via pg_notify + await prisma.$queryRawUnsafe( + `SELECT pg_notify('create_destination', $1)`, + JSON.stringify({ requestId: fetchRequest.id, title: title.trim() }) + ); + + return { success: true, data: { requestId: fetchRequest.id } }; + } catch { + return { success: false, error: "Failed to request destination creation" }; + } +} diff --git a/src/app/(app)/telegram/page.tsx b/src/app/(app)/telegram/page.tsx index 097ae89..de35a0d 100644 --- a/src/app/(app)/telegram/page.tsx +++ b/src/app/(app)/telegram/page.tsx @@ -1,6 +1,7 @@ import { auth } from "@/lib/auth"; import { redirect } from "next/navigation"; -import { listAccounts, listChannels } from "@/lib/telegram/admin-queries"; +import { listAccounts, listChannels, getGlobalDestination } from "@/lib/telegram/admin-queries"; +import { getIngestionStatus } from "@/lib/telegram/queries"; import { TelegramAdmin } from "./_components/telegram-admin"; export default async function TelegramPage() { @@ -8,10 +9,19 @@ export default async function TelegramPage() { if (!session?.user?.id) redirect("/login"); if (session.user.role !== "ADMIN") redirect("/dashboard"); - const [accounts, channels] = await Promise.all([ + const [accounts, channels, ingestionStatus, globalDestination] = await Promise.all([ listAccounts(), listChannels(), + getIngestionStatus(), + getGlobalDestination(), ]); - return ; + return ( + + ); } diff --git a/src/app/api/telegram/accounts/[accountId]/fetch-channels/route.ts b/src/app/api/telegram/accounts/[accountId]/fetch-channels/route.ts new file mode 100644 index 0000000..a4d052c --- /dev/null +++ b/src/app/api/telegram/accounts/[accountId]/fetch-channels/route.ts @@ -0,0 +1,149 @@ +import { NextResponse } from "next/server"; +import { authenticateApiRequest } from "@/lib/telegram/api-auth"; +import { prisma } from "@/lib/prisma"; + +export const dynamic = "force-dynamic"; + +/** + * POST: Create a channel fetch request for this account. + * Signals the worker via pg_notify to fetch channels from Telegram. + */ +export async function POST( + request: Request, + { params }: { params: Promise<{ accountId: string }> } +) { + const authResult = await authenticateApiRequest(request, true); + if ("error" in authResult) return authResult.error; + + const { accountId } = await params; + + try { + // Verify account exists and is authenticated + const account = await prisma.telegramAccount.findUnique({ + where: { id: accountId }, + select: { id: true, authState: true }, + }); + + if (!account) { + return NextResponse.json({ error: "Account not found" }, { status: 404 }); + } + + if (account.authState !== "AUTHENTICATED") { + return NextResponse.json( + { error: "Account must be authenticated to fetch channels" }, + { status: 400 } + ); + } + + // Check for an existing recent request that's still pending/in-progress + const existing = await prisma.channelFetchRequest.findFirst({ + where: { + accountId, + status: { in: ["PENDING", "IN_PROGRESS"] }, + }, + }); + + if (existing) { + return NextResponse.json({ requestId: existing.id }, { status: 202 }); + } + + // Also check for a recently completed request (within last 30 seconds) + const recent = await prisma.channelFetchRequest.findFirst({ + where: { + accountId, + status: "COMPLETED", + updatedAt: { gte: new Date(Date.now() - 30_000) }, + }, + orderBy: { createdAt: "desc" }, + }); + + if (recent) { + return NextResponse.json({ requestId: recent.id }, { status: 200 }); + } + + // Create a new fetch request + const fetchRequest = await prisma.channelFetchRequest.create({ + data: { accountId, status: "PENDING" }, + }); + + // Signal the worker via pg_notify + try { + await prisma.$queryRawUnsafe( + `SELECT pg_notify('channel_fetch', $1)`, + fetchRequest.id + ); + } catch { + // Best-effort — worker will also pick it up on next poll + } + + return NextResponse.json({ requestId: fetchRequest.id }, { status: 202 }); + } catch (err) { + console.error("fetch-channels POST error:", err); + return NextResponse.json( + { error: "Server error — try restarting the dev server if the schema changed" }, + { status: 500 } + ); + } +} + +/** + * GET: Poll for the result of a channel fetch request. + * Query param: ?requestId=xxx + */ +export async function GET( + request: Request, + { params }: { params: Promise<{ accountId: string }> } +) { + const authResult = await authenticateApiRequest(request, true); + if ("error" in authResult) return authResult.error; + + const { accountId } = await params; + const url = new URL(request.url); + const requestId = url.searchParams.get("requestId"); + + try { + if (!requestId) { + // Return the most recent completed fetch request for this account + const latest = await prisma.channelFetchRequest.findFirst({ + where: { accountId, status: "COMPLETED" }, + orderBy: { createdAt: "desc" }, + }); + + if (!latest) { + return NextResponse.json( + { status: "NOT_FOUND", channels: [] }, + { status: 200 } + ); + } + + return NextResponse.json({ + requestId: latest.id, + status: latest.status, + channels: latest.resultJson ? JSON.parse(latest.resultJson) : [], + }); + } + + const fetchRequest = await prisma.channelFetchRequest.findUnique({ + where: { id: requestId }, + }); + + if (!fetchRequest || fetchRequest.accountId !== accountId) { + return NextResponse.json({ error: "Request not found" }, { status: 404 }); + } + + return NextResponse.json({ + requestId: fetchRequest.id, + status: fetchRequest.status, + error: fetchRequest.error, + channels: fetchRequest.status === "COMPLETED" && fetchRequest.resultJson + ? JSON.parse(fetchRequest.resultJson) + : [], + }); + } catch (err) { + console.error("fetch-channels GET error:", err); + return NextResponse.json( + { error: "Server error — try restarting the dev server if the schema changed" }, + { status: 500 } + ); + } +} diff --git a/src/app/api/telegram/worker-request/route.ts b/src/app/api/telegram/worker-request/route.ts new file mode 100644 index 0000000..0d0df74 --- /dev/null +++ b/src/app/api/telegram/worker-request/route.ts @@ -0,0 +1,41 @@ +import { NextResponse } from "next/server"; +import { authenticateApiRequest } from "@/lib/telegram/api-auth"; +import { prisma } from "@/lib/prisma"; + +export const dynamic = "force-dynamic"; + +/** + * GET: Poll for the result of a worker request (ChannelFetchRequest used as generic request). + * Query param: ?requestId=xxx + */ +export async function GET(request: Request) { + const authResult = await authenticateApiRequest(request, true); + if ("error" in authResult) return authResult.error; + + const url = new URL(request.url); + const requestId = url.searchParams.get("requestId"); + + if (!requestId) { + return NextResponse.json( + { error: "requestId is required" }, + { status: 400 } + ); + } + + const fetchRequest = await prisma.channelFetchRequest.findUnique({ + where: { id: requestId }, + }); + + if (!fetchRequest) { + return NextResponse.json({ error: "Request not found" }, { status: 404 }); + } + + return NextResponse.json({ + requestId: fetchRequest.id, + status: fetchRequest.status, + error: fetchRequest.error, + result: fetchRequest.status === "COMPLETED" && fetchRequest.resultJson + ? JSON.parse(fetchRequest.resultJson) + : null, + }); +} diff --git a/src/lib/telegram/admin-queries.ts b/src/lib/telegram/admin-queries.ts index 15e68fd..bed517e 100644 --- a/src/lib/telegram/admin-queries.ts +++ b/src/lib/telegram/admin-queries.ts @@ -80,6 +80,42 @@ export type AccountChannelLinkRow = Awaited< ReturnType >[number]; +// ── Global destination ── + +export async function getGlobalDestination() { + try { + const setting = await prisma.globalSetting.findUnique({ + where: { key: "destination_channel_id" }, + }); + if (!setting) return null; + + const channel = await prisma.telegramChannel.findUnique({ + where: { id: setting.value }, + select: { id: true, title: true, telegramId: true, isActive: true }, + }); + + if (!channel) return null; + + // Also get the invite link if it exists + const inviteSetting = await prisma.globalSetting.findUnique({ + where: { key: "destination_invite_link" }, + }); + + return { + id: channel.id, + title: channel.title, + telegramId: channel.telegramId.toString(), + isActive: channel.isActive, + inviteLink: inviteSetting?.value ?? null, + }; + } catch (error) { + console.error("Failed to fetch global destination (restart dev server if schema changed):", error); + return null; + } +} + +export type GlobalDestination = Awaited>; + export async function getUnlinkedChannels(accountId: string) { const linked = await prisma.accountChannelMap.findMany({ where: { accountId }, diff --git a/telegram_test.html b/telegram_test.html new file mode 100644 index 0000000..43cbd38 --- /dev/null +++ b/telegram_test.html @@ -0,0 +1,25 @@ +Dragon's Stash
\ No newline at end of file diff --git a/worker/src/archive/multipart.ts b/worker/src/archive/multipart.ts index 055ea23..6af47e0 100644 --- a/worker/src/archive/multipart.ts +++ b/worker/src/archive/multipart.ts @@ -57,17 +57,19 @@ export function groupArchiveSets(messages: TelegramMessage[]): ArchiveSet[] { // Check if any single entry is the "final part" of a legacy split const allEntries = [...multipartEntries, ...singleEntries]; - // Check time span — skip if parts span too long - const dates = allEntries.map((e) => e.msg.date.getTime()); - const span = Math.max(...dates) - Math.min(...dates); - const maxSpanMs = config.multipartTimeoutHours * 60 * 60 * 1000; + // Check time span — skip if parts span too long (0 = no limit) + if (config.multipartTimeoutHours > 0) { + const dates = allEntries.map((e) => e.msg.date.getTime()); + const span = Math.max(...dates) - Math.min(...dates); + const maxSpanMs = config.multipartTimeoutHours * 60 * 60 * 1000; - if (span > maxSpanMs) { - log.warn( - { baseName, format, span: span / 3600000 }, - "Multipart set spans too long, skipping" - ); - continue; + if (span > maxSpanMs) { + log.warn( + { baseName, format, span: span / 3600000 }, + "Multipart set spans too long, skipping" + ); + continue; + } } // Sort by part number (singles get a very high number so they come last — they're the final part) diff --git a/worker/src/archive/split.ts b/worker/src/archive/split.ts index 136d3e6..17d6401 100644 --- a/worker/src/archive/split.ts +++ b/worker/src/archive/split.ts @@ -46,3 +46,35 @@ export async function byteLevelSplit(filePath: string): Promise { log.info({ filePath, parts: parts.length }, "File split complete"); return parts; } + +/** + * Concatenate multiple files into a single output file by streaming + * each input sequentially. Used for repacking multipart archives + * that have oversized parts (>2GB) before re-splitting. + */ +export async function concatenateFiles( + inputPaths: string[], + outputPath: string +): Promise { + const out = createWriteStream(outputPath); + + for (let i = 0; i < inputPaths.length; i++) { + log.info( + { part: i + 1, total: inputPaths.length, file: path.basename(inputPaths[i]) }, + "Concatenating part" + ); + await pipeline(createReadStream(inputPaths[i]), out, { end: false }); + } + + // Close the output stream + await new Promise((resolve, reject) => { + out.end(() => resolve()); + out.on("error", reject); + }); + + const stats = await stat(outputPath); + log.info( + { outputPath, totalBytes: stats.size, parts: inputPaths.length }, + "Concatenation complete" + ); +} diff --git a/worker/src/archive/zip-reader.ts b/worker/src/archive/zip-reader.ts index 53cbf7d..960ea15 100644 --- a/worker/src/archive/zip-reader.ts +++ b/worker/src/archive/zip-reader.ts @@ -1,5 +1,7 @@ import yauzl from "yauzl"; +import { open as fsOpen, stat as fsStat } from "fs/promises"; import path from "path"; +import { Readable } from "stream"; import { childLogger } from "../util/logger.js"; const log = childLogger("zip-reader"); @@ -15,20 +17,28 @@ export interface FileEntry { /** * Read the central directory of a ZIP file without extracting any contents. - * For multipart ZIPs, pass the paths sorted by part order. - * We attempt to read from the last part first (central directory is at the end). + * For multipart ZIPs (.zip.001, .zip.002 etc.), uses a custom random-access + * reader that spans all parts seamlessly so yauzl can find the central + * directory at the end of the combined data. */ export async function readZipCentralDirectory( filePaths: string[] ): Promise { - // The central directory lives at the end of the last file - const targetFile = filePaths[filePaths.length - 1]; + if (filePaths.length === 1) { + return readSingleZip(filePaths[0]); + } - return new Promise((resolve, reject) => { + // Multipart: use a spanning random-access reader + return readMultipartZip(filePaths); +} + +/** Read a single (non-split) ZIP file. */ +function readSingleZip(targetFile: string): Promise { + return new Promise((resolve) => { yauzl.open(targetFile, { lazyEntries: true, autoClose: true }, (err, zipFile) => { if (err) { log.warn({ err, file: targetFile }, "Failed to open ZIP for reading"); - resolve([]); // Fallback: return empty on error + resolve([]); return; } @@ -36,13 +46,12 @@ export async function readZipCentralDirectory( zipFile.readEntry(); zipFile.on("entry", (entry: yauzl.Entry) => { - // Skip directories if (!entry.fileName.endsWith("/")) { const ext = path.extname(entry.fileName).toLowerCase(); entries.push({ path: entry.fileName, fileName: path.basename(entry.fileName), - extension: ext ? ext.slice(1) : null, // Remove leading dot + extension: ext ? ext.slice(1) : null, compressedSize: BigInt(entry.compressedSize), uncompressedSize: BigInt(entry.uncompressedSize), crc32: entry.crc32 !== 0 ? entry.crc32.toString(16).padStart(8, "0") : null, @@ -54,8 +63,144 @@ export async function readZipCentralDirectory( zipFile.on("end", () => resolve(entries)); zipFile.on("error", (error) => { log.warn({ error, file: targetFile }, "Error reading ZIP entries"); - resolve(entries); // Return whatever we got + resolve(entries); }); }); }); } + +/** + * Read a multipart split ZIP using yauzl's RandomAccessReader API. + * This creates a virtual "file" that spans all parts so yauzl can + * seek freely across the entire archive to read the central directory. + */ +async function readMultipartZip(filePaths: string[]): Promise { + // Get sizes of all parts + const partSizes: number[] = []; + for (const fp of filePaths) { + const s = await fsStat(fp); + partSizes.push(s.size); + } + const totalSize = partSizes.reduce((a, b) => a + b, 0); + + log.debug( + { parts: filePaths.length, totalSize }, + "Reading multipart ZIP via spanning reader" + ); + + return new Promise((resolve) => { + const reader = createMultiPartReader(filePaths, partSizes); + + yauzl.fromRandomAccessReader( + reader, + totalSize, + { lazyEntries: true, autoClose: true }, + (err, zipFile) => { + if (err) { + log.warn({ err }, "Failed to open multipart ZIP for reading"); + reader.close(() => {}); + resolve([]); + return; + } + + const entries: FileEntry[] = []; + + zipFile.readEntry(); + zipFile.on("entry", (entry: yauzl.Entry) => { + if (!entry.fileName.endsWith("/")) { + const ext = path.extname(entry.fileName).toLowerCase(); + entries.push({ + path: entry.fileName, + fileName: path.basename(entry.fileName), + extension: ext ? ext.slice(1) : null, + compressedSize: BigInt(entry.compressedSize), + uncompressedSize: BigInt(entry.uncompressedSize), + crc32: entry.crc32 !== 0 ? entry.crc32.toString(16).padStart(8, "0") : null, + }); + } + zipFile.readEntry(); + }); + + zipFile.on("end", () => { + log.info({ entries: entries.length }, "Multipart ZIP entries read"); + resolve(entries); + }); + zipFile.on("error", (error) => { + log.warn({ error }, "Error reading multipart ZIP entries"); + resolve(entries); + }); + } + ); + }); +} + +/** + * Create a yauzl RandomAccessReader that reads across multiple split part files. + * Maps a global offset to the correct part file and local offset. + * + * Uses Object.create to properly inherit from yauzl.RandomAccessReader + * (whose constructor + prototype is defined at runtime, not as a TS class). + */ +function createMultiPartReader( + filePaths: string[], + partSizes: number[] +): yauzl.RandomAccessReader { + // Build cumulative offset table + const partOffsets: number[] = []; + let offset = 0; + for (const size of partSizes) { + partOffsets.push(offset); + offset += size; + } + + // Create an instance by calling the parent constructor + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const reader = new (yauzl.RandomAccessReader as any)() as yauzl.RandomAccessReader; + + // Override _readStreamForRange — yauzl calls this to read a range of bytes + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (reader as any)._readStreamForRange = function (start: number, end: number): Readable { + const readable = new Readable({ read() {} }); + + readRange(start, end, readable).catch((err) => { + readable.destroy(err); + }); + + return readable; + }; + + async function readRange(start: number, end: number, readable: Readable): Promise { + let remaining = end - start; + let globalOffset = start; + + while (remaining > 0) { + // Find which part this offset falls in + let partIdx = partOffsets.length - 1; + for (let i = 0; i < partOffsets.length; i++) { + if (i + 1 < partOffsets.length && globalOffset < partOffsets[i + 1]) { + partIdx = i; + break; + } + } + + const localOffset = globalOffset - partOffsets[partIdx]; + const partRemaining = partSizes[partIdx] - localOffset; + const toRead = Math.min(remaining, partRemaining); + + const fh = await fsOpen(filePaths[partIdx], "r"); + try { + const buf = Buffer.alloc(toRead); + const { bytesRead } = await fh.read(buf, 0, toRead, localOffset); + readable.push(buf.subarray(0, bytesRead)); + remaining -= bytesRead; + globalOffset += bytesRead; + } finally { + await fh.close(); + } + } + + readable.push(null); // Signal end of stream + } + + return reader; +} diff --git a/worker/src/db/queries.ts b/worker/src/db/queries.ts index 7a50e38..ee03803 100644 --- a/worker/src/db/queries.ts +++ b/worker/src/db/queries.ts @@ -1,5 +1,5 @@ import { db } from "./client.js"; -import type { ArchiveType } from "@prisma/client"; +import type { ArchiveType, FetchStatus } from "@prisma/client"; export async function getActiveAccounts() { return db.telegramAccount.findMany({ @@ -7,6 +7,17 @@ export async function getActiveAccounts() { }); } +export async function getPendingAccounts() { + return db.telegramAccount.findMany({ + where: { isActive: true, authState: "PENDING" }, + }); +} + +export async function hasAnyChannels(): Promise { + const count = await db.telegramChannel.count(); + return count > 0; +} + export async function getSourceChannelMappings(accountId: string) { return db.accountChannelMap.findMany({ where: { @@ -18,26 +29,66 @@ export async function getSourceChannelMappings(accountId: string) { }); } -export async function getDestinationChannel(accountId: string) { - const mapping = await db.accountChannelMap.findFirst({ - where: { - accountId, - role: "WRITER", - channel: { type: "DESTINATION", isActive: true }, - }, - include: { channel: true }, +// ── Global destination channel ── + +export async function getGlobalDestinationChannel() { + const setting = await db.globalSetting.findUnique({ + where: { key: "destination_channel_id" }, + }); + if (!setting) return null; + return db.telegramChannel.findFirst({ + where: { id: setting.value, type: "DESTINATION", isActive: true }, + }); +} + +export async function getGlobalSetting(key: string): Promise { + const setting = await db.globalSetting.findUnique({ where: { key } }); + return setting?.value ?? null; +} + +export async function setGlobalSetting(key: string, value: string) { + return db.globalSetting.upsert({ + where: { key }, + create: { key, value }, + update: { value }, }); - return mapping?.channel ?? null; } export async function packageExistsByHash(contentHash: string) { - const pkg = await db.package.findUnique({ - where: { contentHash }, + const pkg = await db.package.findFirst({ + where: { contentHash, destMessageId: { not: null } }, select: { id: true }, }); return pkg !== null; } +/** + * Check if a package already exists for a given source message ID + * AND was successfully uploaded to the destination (destMessageId is set). + * Used as an early skip before downloading. + */ +export async function packageExistsBySourceMessage( + sourceChannelId: string, + sourceMessageId: bigint +): Promise { + const pkg = await db.package.findFirst({ + where: { sourceChannelId, sourceMessageId, destMessageId: { not: null } }, + select: { id: true }, + }); + return pkg !== null; +} + +/** + * Delete orphaned Package rows that have the same content hash but never + * completed the upload (destMessageId is null). Called before creating a + * new complete record to avoid unique constraint violations. + */ +export async function deleteOrphanedPackageByHash(contentHash: string): Promise { + await db.package.deleteMany({ + where: { contentHash, destMessageId: null }, + }); +} + export interface CreatePackageInput { contentHash: string; fileName: string; @@ -228,6 +279,57 @@ export async function getAccountAuthCode(accountId: string) { return account; } +// ── Channel sync (auto-discovery from Telegram) ── + +export interface UpsertChannelInput { + telegramId: bigint; + title: string; + type: "SOURCE" | "DESTINATION"; + isForum: boolean; +} + +/** + * Upsert a channel by telegramId. Returns the channel record. + * If it already exists, update title and forum status. + */ +export async function upsertChannel(input: UpsertChannelInput) { + return db.telegramChannel.upsert({ + where: { telegramId: input.telegramId }, + create: { + telegramId: input.telegramId, + title: input.title, + type: input.type, + isForum: input.isForum, + }, + update: { + title: input.title, + isForum: input.isForum, + }, + }); +} + +/** + * Link an account to a channel if not already linked. + * Uses a try/catch on unique constraint to make it idempotent. + */ +export async function ensureAccountChannelLink( + accountId: string, + channelId: string, + role: "READER" | "WRITER" +) { + try { + return await db.accountChannelMap.create({ + data: { accountId, channelId, role }, + }); + } catch (err: unknown) { + // Already linked — ignore unique constraint violation + if (err instanceof Error && err.message.includes("Unique constraint")) { + return null; + } + throw err; + } +} + // ── Forum / Topic progress ── export async function setChannelForum(channelId: string, isForum: boolean) { @@ -268,3 +370,50 @@ export async function upsertTopicProgress( }, }); } + +// ── Channel fetch requests (DB-mediated communication with web app) ── + +export async function getChannelFetchRequest(requestId: string) { + return db.channelFetchRequest.findUnique({ + where: { id: requestId }, + include: { account: true }, + }); +} + +export async function updateFetchRequestStatus( + requestId: string, + status: FetchStatus, + extra?: { resultJson?: string; error?: string } +) { + return db.channelFetchRequest.update({ + where: { id: requestId }, + data: { + status, + resultJson: extra?.resultJson ?? undefined, + error: extra?.error ?? undefined, + }, + }); +} + +export async function getAccountLinkedChannelIds(accountId: string): Promise> { + const links = await db.accountChannelMap.findMany({ + where: { accountId }, + select: { channel: { select: { telegramId: true } } }, + }); + return new Set(links.map((l) => l.channel.telegramId.toString())); +} + +export async function getExistingChannelsByTelegramId(): Promise> { + const channels = await db.telegramChannel.findMany({ + select: { id: true, telegramId: true }, + }); + const map = new Map(); + for (const ch of channels) { + map.set(ch.telegramId.toString(), ch.id); + } + return map; +} + +export async function getAccountById(accountId: string) { + return db.telegramAccount.findUnique({ where: { id: accountId } }); +} diff --git a/worker/src/fetch-listener.ts b/worker/src/fetch-listener.ts new file mode 100644 index 0000000..f431dc8 --- /dev/null +++ b/worker/src/fetch-listener.ts @@ -0,0 +1,206 @@ +import type pg from "pg"; +import { pool } from "./db/client.js"; +import { childLogger } from "./util/logger.js"; +import { withTdlibMutex } from "./util/mutex.js"; +import { processFetchRequest } from "./worker.js"; +import { generateInviteLink, createSupergroup } from "./tdlib/chats.js"; +import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js"; +import { + getGlobalDestinationChannel, + getGlobalSetting, + setGlobalSetting, + getActiveAccounts, + upsertChannel, + ensureAccountChannelLink, +} from "./db/queries.js"; + +const log = childLogger("fetch-listener"); + +let pgClient: pg.PoolClient | null = null; + +/** + * Start listening for pg_notify signals from the web app. + * + * Channels: + * - `channel_fetch` — payload = requestId → fetch channels for an account + * - `generate_invite` — payload = channelId → generate invite link for destination + * - `create_destination` — payload = JSON { requestId, title } → create supergroup via TDLib + */ +export async function startFetchListener(): Promise { + pgClient = await pool.connect(); + await pgClient.query("LISTEN channel_fetch"); + await pgClient.query("LISTEN generate_invite"); + await pgClient.query("LISTEN create_destination"); + + pgClient.on("notification", (msg) => { + if (msg.channel === "channel_fetch" && msg.payload) { + handleChannelFetch(msg.payload); + } else if (msg.channel === "generate_invite" && msg.payload) { + handleGenerateInvite(msg.payload); + } else if (msg.channel === "create_destination" && msg.payload) { + handleCreateDestination(msg.payload); + } + }); + + log.info("Fetch listener started (channel_fetch, generate_invite, create_destination)"); +} + +export function stopFetchListener(): void { + if (pgClient) { + pgClient.release(); + pgClient = null; + } + log.info("Fetch listener stopped"); +} + +// ── Channel fetch handler ── + +// Chain promises to ensure sequential execution +let fetchQueue: Promise = Promise.resolve(); + +function handleChannelFetch(requestId: string): void { + fetchQueue = fetchQueue.then(async () => { + try { + await withTdlibMutex("fetch-channels", () => + processFetchRequest(requestId) + ); + } catch (err) { + log.error({ err, requestId }, "Failed to process fetch request"); + } + }); +} + +// ── Invite link generation handler ── + +function handleGenerateInvite(channelId: string): void { + fetchQueue = fetchQueue.then(async () => { + try { + await withTdlibMutex("generate-invite", async () => { + const destChannel = await getGlobalDestinationChannel(); + if (!destChannel || destChannel.id !== channelId) { + log.warn({ channelId }, "Destination channel mismatch, skipping invite generation"); + return; + } + + // Use the first available authenticated account to generate the link + const accounts = await getActiveAccounts(); + if (accounts.length === 0) { + log.warn("No authenticated accounts to generate invite link"); + return; + } + + const account = accounts[0]; + const client = await createTdlibClient({ id: account.id, phone: account.phone }); + + try { + const link = await generateInviteLink(client, destChannel.telegramId); + await setGlobalSetting("destination_invite_link", link); + log.info({ link }, "Invite link generated and saved"); + } finally { + await closeTdlibClient(client); + } + }); + } catch (err) { + log.error({ err, channelId }, "Failed to generate invite link"); + } + }); +} + +// ── Create destination supergroup handler ── + +function handleCreateDestination(payload: string): void { + fetchQueue = fetchQueue.then(async () => { + let requestId: string | undefined; + try { + const parsed = JSON.parse(payload) as { requestId: string; title: string }; + requestId = parsed.requestId; + + await withTdlibMutex("create-destination", async () => { + const { db } = await import("./db/client.js"); + + // Mark the request as in-progress + await db.channelFetchRequest.update({ + where: { id: parsed.requestId }, + data: { status: "IN_PROGRESS" }, + }); + + // Use the first available authenticated account + const accounts = await getActiveAccounts(); + if (accounts.length === 0) { + throw new Error("No authenticated accounts available to create the group"); + } + + const account = accounts[0]; + const client = await createTdlibClient({ id: account.id, phone: account.phone }); + + try { + // Create the supergroup via TDLib + const result = await createSupergroup(client, parsed.title); + log.info({ chatId: result.chatId.toString(), title: result.title }, "Supergroup created"); + + // Upsert it as a DESTINATION channel in the DB + const channel = await upsertChannel({ + telegramId: result.chatId, + title: result.title, + type: "DESTINATION", + isForum: false, + }); + + // Set as global destination + await setGlobalSetting("destination_channel_id", channel.id); + + // Generate an invite link + const link = await generateInviteLink(client, result.chatId); + await setGlobalSetting("destination_invite_link", link); + log.info({ link }, "Invite link generated for new destination"); + + // Link all authenticated accounts as WRITER + for (const acc of accounts) { + try { + await ensureAccountChannelLink(acc.id, channel.id, "WRITER"); + } catch { + // Already linked + } + } + + // Mark fetch request as completed with the channel info + await db.channelFetchRequest.update({ + where: { id: parsed.requestId }, + data: { + status: "COMPLETED", + resultJson: JSON.stringify({ + channelId: channel.id, + telegramId: result.chatId.toString(), + title: result.title, + inviteLink: link, + }), + }, + }); + + log.info( + { channelId: channel.id, telegramId: result.chatId.toString() }, + "Destination channel created and configured" + ); + } finally { + await closeTdlibClient(client); + } + }); + } catch (err) { + log.error({ err, payload }, "Failed to create destination channel"); + if (requestId) { + try { + const { db } = await import("./db/client.js"); + await db.channelFetchRequest.update({ + where: { id: requestId }, + data: { + status: "FAILED", + error: err instanceof Error ? err.message : String(err), + }, + }); + } catch { + // Best-effort + } + } + } + }); +} diff --git a/worker/src/index.ts b/worker/src/index.ts index a93eeca..1407f56 100644 --- a/worker/src/index.ts +++ b/worker/src/index.ts @@ -4,6 +4,7 @@ import { logger } from "./util/logger.js"; import { markStaleRunsAsFailed } from "./db/queries.js"; import { cleanupTempDir } from "./worker.js"; import { startScheduler, stopScheduler } from "./scheduler.js"; +import { startFetchListener, stopFetchListener } from "./fetch-listener.js"; import { db, pool } from "./db/client.js"; const log = logger.child({ module: "main" }); @@ -20,6 +21,9 @@ async function main(): Promise { await cleanupTempDir(); await markStaleRunsAsFailed(); + // Start the fetch listener (pg_notify for on-demand channel fetching) + await startFetchListener(); + // Start the scheduler await startScheduler(); } @@ -28,6 +32,7 @@ async function main(): Promise { function shutdown(signal: string): void { log.info({ signal }, "Shutdown signal received"); stopScheduler(); + stopFetchListener(); // Close DB connections Promise.all([db.$disconnect(), pool.end()]) diff --git a/worker/src/scheduler.ts b/worker/src/scheduler.ts index afac97e..f7dbc23 100644 --- a/worker/src/scheduler.ts +++ b/worker/src/scheduler.ts @@ -1,15 +1,22 @@ import { config } from "./util/config.js"; import { childLogger } from "./util/logger.js"; -import { getActiveAccounts } from "./db/queries.js"; -import { runWorkerForAccount } from "./worker.js"; +import { withTdlibMutex } from "./util/mutex.js"; +import { getActiveAccounts, getPendingAccounts } from "./db/queries.js"; +import { runWorkerForAccount, authenticateAccount } from "./worker.js"; const log = childLogger("scheduler"); let running = false; let timer: ReturnType | null = null; +let cycleCount = 0; /** - * Run one ingestion cycle: process all active, authenticated accounts sequentially. + * Run one ingestion cycle: + * 1. Authenticate any PENDING accounts (triggers SMS code flow + auto-fetch channels) + * 2. Process all active AUTHENTICATED accounts for ingestion + * + * All TDLib operations are wrapped in the mutex to ensure only one client + * runs at a time (also shared with the fetch listener for on-demand requests). */ async function runCycle(): Promise { if (running) { @@ -18,20 +25,38 @@ async function runCycle(): Promise { } running = true; - log.info("Starting ingestion cycle"); + cycleCount++; + log.info({ cycle: cycleCount }, "Starting ingestion cycle"); try { + // ── Phase 1: Authenticate pending accounts ── + const pendingAccounts = await getPendingAccounts(); + if (pendingAccounts.length > 0) { + log.info( + { count: pendingAccounts.length }, + "Found pending accounts, starting authentication" + ); + for (const account of pendingAccounts) { + await withTdlibMutex(`auth:${account.phone}`, () => + authenticateAccount(account) + ); + } + } + + // ── Phase 2: Ingest for authenticated accounts ── const accounts = await getActiveAccounts(); if (accounts.length === 0) { - log.info("No active authenticated accounts, nothing to do"); + log.info("No active authenticated accounts, nothing to ingest"); return; } log.info({ accountCount: accounts.length }, "Processing accounts"); for (const account of accounts) { - await runWorkerForAccount(account); + await withTdlibMutex(`ingest:${account.phone}`, () => + runWorkerForAccount(account) + ); } log.info("Ingestion cycle complete"); diff --git a/worker/src/tdlib/chats.ts b/worker/src/tdlib/chats.ts new file mode 100644 index 0000000..cabfbce --- /dev/null +++ b/worker/src/tdlib/chats.ts @@ -0,0 +1,162 @@ +import type { Client } from "tdl"; +import { childLogger } from "../util/logger.js"; +import { config } from "../util/config.js"; + +const log = childLogger("chats"); + +export interface TelegramChatInfo { + chatId: bigint; + title: string; + type: "channel" | "supergroup" | "group" | "private" | "other"; + isForum: boolean; + memberCount?: number; +} + +/** + * Fetch all chats the account is a member of. + * Uses TDLib's getChats to load the chat list, then getChat for details. + * Filters to channels and supergroups only (groups/privates are not useful for ingestion). + */ +export async function getAccountChats( + client: Client +): Promise { + const chats: TelegramChatInfo[] = []; + + // Load main chat list — TDLib loads in batches + let offsetOrder = "9223372036854775807"; // max int64 as string + let offsetChatId = 0; + let hasMore = true; + + while (hasMore) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const result = (await client.invoke({ + _: "getChats", + chat_list: { _: "chatListMain" }, + limit: 100, + })) as { chat_ids: number[] }; + + if (!result.chat_ids || result.chat_ids.length === 0) { + break; + } + + for (const chatId of result.chat_ids) { + try { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const chat = (await client.invoke({ + _: "getChat", + chat_id: chatId, + })) as any; + + const chatType = chat.type?._; + let type: TelegramChatInfo["type"] = "other"; + let isForum = false; + + if (chatType === "chatTypeSupergroup") { + // Get supergroup details to check if it's a channel or group + try { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const sg = (await client.invoke({ + _: "getSupergroup", + supergroup_id: chat.type.supergroup_id, + })) as any; + + type = sg.is_channel ? "channel" : "supergroup"; + isForum = sg.is_forum ?? false; + } catch { + type = "supergroup"; + } + } else if (chatType === "chatTypeBasicGroup") { + type = "group"; + } else if (chatType === "chatTypePrivate" || chatType === "chatTypeSecret") { + type = "private"; + } + + // Only include channels and supergroups + if (type === "channel" || type === "supergroup") { + chats.push({ + chatId: BigInt(chatId), + title: chat.title ?? `Chat ${chatId}`, + type, + isForum, + }); + } + } catch (err) { + log.warn({ chatId, err }, "Failed to get chat details, skipping"); + } + } + + // getChats with chatListMain returns all chats at once in newer TDLib versions + // So we break after the first batch + hasMore = false; + + await sleep(config.apiDelayMs); + } + + log.info( + { total: chats.length }, + "Fetched channels/supergroups from Telegram" + ); + + return chats; +} + +/** + * Generate an invite link for a chat. The account must be an admin or have + * invite link permissions. + */ +export async function generateInviteLink( + client: Client, + chatId: bigint +): Promise { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const result = (await client.invoke({ + _: "createChatInviteLink", + chat_id: Number(chatId), + name: "DragonsStash Auto-Join", + creates_join_request: false, + })) as any; + + const link = result.invite_link as string; + log.info({ chatId: chatId.toString(), link }, "Generated invite link"); + return link; +} + +/** + * Create a new supergroup (private group) via TDLib. + * Returns the chat ID and title. + */ +export async function createSupergroup( + client: Client, + title: string +): Promise<{ chatId: bigint; title: string }> { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const result = (await client.invoke({ + _: "createNewSupergroupChat", + title, + is_forum: false, + is_channel: false, + description: "DragonsStash archive destination — all accounts write here", + })) as any; + + const chatId = BigInt(result.id); + log.info({ chatId: chatId.toString(), title }, "Created new supergroup"); + return { chatId, title: result.title ?? title }; +} + +/** + * Join a chat using an invite link. + */ +export async function joinChatByInviteLink( + client: Client, + inviteLink: string +): Promise { + await client.invoke({ + _: "joinChatByInviteLink", + invite_link: inviteLink, + }); + log.info({ inviteLink }, "Joined chat by invite link"); +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} diff --git a/worker/src/tdlib/download.ts b/worker/src/tdlib/download.ts index da0df5f..4f40eca 100644 --- a/worker/src/tdlib/download.ts +++ b/worker/src/tdlib/download.ts @@ -1,5 +1,5 @@ import type { Client } from "tdl"; -import { readFile, rename, stat } from "fs/promises"; +import { readFile, rename, copyFile, unlink, stat } from "fs/promises"; import { config } from "../util/config.js"; import { childLogger } from "../util/logger.js"; import { isArchiveAttachment } from "../archive/detect.js"; @@ -69,19 +69,26 @@ export interface ChannelScanResult { } /** - * Fetch messages from a channel since a given message ID. + * Fetch messages from a channel, stopping once we've scanned past the + * last-processed boundary (with one page of lookback for multipart safety). * Collects both archive attachments AND photo messages (for preview matching). * Returns messages in chronological order (oldest first). + * + * When `lastProcessedMessageId` is null (first run), scans everything. + * The worker applies a post-grouping filter to skip fully-processed sets, + * and keeps `packageExistsBySourceMessage` as a safety net. */ export async function getChannelMessages( client: Client, chatId: bigint, - fromMessageId?: bigint | null, + lastProcessedMessageId?: bigint | null, limit = 100 ): Promise { const archives: TelegramMessage[] = []; const photos: TelegramPhoto[] = []; - let currentFromId = fromMessageId ? Number(fromMessageId) : 0; + const boundary = lastProcessedMessageId ? Number(lastProcessedMessageId) : null; + + let currentFromId = 0; // eslint-disable-next-line no-constant-condition while (true) { @@ -114,8 +121,6 @@ export async function getChannelMessages( const photo = msg.content?.photo; const caption = msg.content?.caption?.text ?? ""; if (photo?.sizes && photo.sizes.length > 0) { - // Pick the smallest size for thumbnail (type "s" or "m") - // TDLib photo sizes are ordered from smallest to largest const smallest = photo.sizes[0]; photos.push({ id: BigInt(msg.id), @@ -128,13 +133,22 @@ export async function getChannelMessages( } currentFromId = result.messages[result.messages.length - 1].id; + + // Stop scanning once we've gone past the boundary (this page is the lookback) + if (boundary && currentFromId < boundary) break; + if (result.messages.length < 100) break; // Rate limit delay await sleep(config.apiDelayMs); } - // Return in chronological order (oldest first) + log.info( + { chatId: chatId.toString(), archives: archives.length, photos: photos.length }, + "Channel scan complete" + ); + + // Reverse to chronological order (oldest first) so worker processes old→new return { archives: archives.reverse(), photos: photos.reverse(), @@ -380,8 +394,23 @@ async function verifyAndMove( "File verified and complete" ); - // Move from TDLib's cache to our temp directory - await rename(localPath, destPath); + // Move from TDLib's cache to our temp directory. + // Use rename first (fast, same filesystem), fall back to copy+delete + // when source and destination are on different filesystems (EXDEV). + try { + await rename(localPath, destPath); + } catch (err: unknown) { + if ((err as NodeJS.ErrnoException).code === "EXDEV") { + log.debug( + { fileId, fileName }, + "Cross-device rename — falling back to copy + unlink" + ); + await copyFile(localPath, destPath); + await unlink(localPath); + } else { + throw err; + } + } } function sleep(ms: number): Promise { diff --git a/worker/src/tdlib/topics.ts b/worker/src/tdlib/topics.ts index 6eff4e1..23f02c5 100644 --- a/worker/src/tdlib/topics.ts +++ b/worker/src/tdlib/topics.ts @@ -125,29 +125,43 @@ export async function getForumTopicList( } /** - * Fetch messages from a specific forum topic (thread). - * Uses getMessageThreadHistory to scan within a topic. + * Fetch messages from a specific forum topic (thread), stopping once + * we've scanned past the last-processed boundary (with one page of lookback). + * Uses searchChatMessages with message_thread_id to scan within a topic. + * + * Returns messages in chronological order (oldest first). + * + * When `lastProcessedMessageId` is null (first run), scans everything. + * The worker applies a post-grouping filter to skip fully-processed sets, + * and keeps `packageExistsBySourceMessage` as a safety net. */ export async function getTopicMessages( client: Client, chatId: bigint, topicId: bigint, - fromMessageId?: bigint | null, + lastProcessedMessageId?: bigint | null, limit = 100 ): Promise { const archives: TelegramMessage[] = []; const photos: TelegramPhoto[] = []; - let currentFromId = fromMessageId ? Number(fromMessageId) : 0; + const boundary = lastProcessedMessageId ? Number(lastProcessedMessageId) : null; + + let currentFromId = 0; // eslint-disable-next-line no-constant-condition while (true) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any const result = (await client.invoke({ - _: "getMessageThreadHistory", + _: "searchChatMessages", chat_id: Number(chatId), - message_id: Number(topicId), + query: "", + message_thread_id: Number(topicId), from_message_id: currentFromId, offset: 0, limit: Math.min(limit, 100), + filter: null, + sender_id: null, + saved_messages_topic_id: 0, })) as { messages?: { id: number; @@ -206,11 +220,21 @@ export async function getTopicMessages( } currentFromId = result.messages[result.messages.length - 1].id; + + // Stop scanning once we've gone past the boundary (this page is the lookback) + if (boundary && currentFromId < boundary) break; + if (result.messages.length < 100) break; await sleep(config.apiDelayMs); } + log.info( + { chatId: chatId.toString(), topicId: topicId.toString(), archives: archives.length, photos: photos.length }, + "Topic scan complete" + ); + + // Reverse to chronological order (oldest first) so worker processes old→new return { archives: archives.reverse(), photos: photos.reverse(), diff --git a/worker/src/upload/channel.ts b/worker/src/upload/channel.ts index c06057e..79f8978 100644 --- a/worker/src/upload/channel.ts +++ b/worker/src/upload/channel.ts @@ -1,3 +1,5 @@ +import path from "path"; +import { stat } from "fs/promises"; import type { Client } from "tdl"; import { config } from "../util/config.js"; import { childLogger } from "../util/logger.js"; @@ -11,7 +13,13 @@ export interface UploadResult { /** * Upload one or more files to a destination Telegram channel. * For multipart archives, each file is sent as a separate message. - * Returns the message ID of the first uploaded message. + * Returns the **final** (server-assigned) message ID of the first uploaded message. + * + * IMPORTANT: `sendMessage` returns a *temporary* message immediately. + * The actual file upload happens asynchronously in TDLib. We listen for + * `updateMessageSendSucceeded` to get the real server-side message ID and + * to make sure the upload is fully committed before we clean up temp files + * or close the TDLib client (which would cancel pending uploads). */ export async function uploadToChannel( client: Client, @@ -26,31 +34,24 @@ export async function uploadToChannel( const fileCaption = i === 0 && caption ? caption : undefined; - log.debug( - { chatId: Number(chatId), filePath, part: i + 1, total: filePaths.length }, + const fileName = path.basename(filePath); + let fileSizeMB = 0; + try { + const s = await stat(filePath); + fileSizeMB = Math.round(s.size / (1024 * 1024)); + } catch { + // Non-critical + } + + log.info( + { chatId: Number(chatId), fileName, sizeMB: fileSizeMB, part: i + 1, total: filePaths.length }, "Uploading file to channel" ); - const result = (await client.invoke({ - _: "sendMessage", - chat_id: Number(chatId), - input_message_content: { - _: "inputMessageDocument", - document: { - _: "inputFileLocal", - path: filePath, - }, - caption: fileCaption - ? { - _: "formattedText", - text: fileCaption, - } - : undefined, - }, - })) as { id: number }; + const serverMsgId = await sendAndWaitForUpload(client, chatId, filePath, fileCaption, fileName, fileSizeMB); if (i === 0) { - firstMessageId = BigInt(result.id); + firstMessageId = serverMsgId; } // Rate limit delay between uploads @@ -65,12 +66,133 @@ export async function uploadToChannel( log.info( { chatId: Number(chatId), messageId: Number(firstMessageId), files: filePaths.length }, - "Upload complete" + "All uploads confirmed by Telegram" ); return { messageId: firstMessageId }; } +/** + * Send a single file message and wait for Telegram to confirm the upload. + * Returns the final server-assigned message ID. + */ +async function sendAndWaitForUpload( + client: Client, + chatId: bigint, + filePath: string, + caption: string | undefined, + fileName: string, + fileSizeMB: number +): Promise { + // Send the message — this returns a temporary message immediately + const tempMsg = (await client.invoke({ + _: "sendMessage", + chat_id: Number(chatId), + input_message_content: { + _: "inputMessageDocument", + document: { + _: "inputFileLocal", + path: filePath, + }, + caption: caption + ? { + _: "formattedText", + text: caption, + } + : undefined, + }, + })) as { id: number }; + + const tempMsgId = tempMsg.id; + + log.debug( + { fileName, tempMsgId }, + "Message queued, waiting for upload confirmation" + ); + + // Wait for the actual upload to complete + return new Promise((resolve, reject) => { + let settled = false; + let lastLoggedPercent = 0; + + // Timeout: 10 minutes per GB, minimum 10 minutes + const timeoutMs = Math.max( + 10 * 60_000, + (fileSizeMB / 1024) * 10 * 60_000 + ); + + const timer = setTimeout(() => { + if (!settled) { + settled = true; + cleanup(); + reject( + new Error( + `Upload timed out after ${Math.round(timeoutMs / 60_000)}min for ${fileName}` + ) + ); + } + }, timeoutMs); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const handleUpdate = (update: any) => { + // Track upload progress via updateFile events + if (update?._ === "updateFile") { + const file = update.file; + if (file?.remote?.is_uploading_active && file.expected_size > 0) { + const uploaded = file.remote.uploaded_size ?? 0; + const total = file.expected_size; + const percent = Math.round((uploaded / total) * 100); + if (percent >= lastLoggedPercent + 20) { + lastLoggedPercent = percent - (percent % 20); + log.info( + { fileName, uploaded, total, percent: `${percent}%` }, + "Upload progress" + ); + } + } + } + + // The money event: upload succeeded, we get the final server message ID + if (update?._ === "updateMessageSendSucceeded") { + const msg = update.message; + const oldMsgId = update.old_message_id; + if (oldMsgId === tempMsgId) { + if (!settled) { + settled = true; + cleanup(); + const finalId = BigInt(msg.id); + log.info( + { fileName, tempMsgId, finalMsgId: Number(finalId) }, + "Upload confirmed by Telegram" + ); + resolve(finalId); + } + } + } + + // Upload failed + if (update?._ === "updateMessageSendFailed") { + const oldMsgId = update.old_message_id; + if (oldMsgId === tempMsgId) { + if (!settled) { + settled = true; + cleanup(); + const errorMsg = update.error?.message ?? "Unknown upload error"; + reject(new Error(`Upload failed for ${fileName}: ${errorMsg}`)); + } + } + } + }; + + const cleanup = () => { + clearTimeout(timer); + client.off("update", handleUpdate); + }; + + client.on("update", handleUpdate); + }); +} + function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } diff --git a/worker/src/util/config.ts b/worker/src/util/config.ts index 57bc09c..331a64f 100644 --- a/worker/src/util/config.ts +++ b/worker/src/util/config.ts @@ -9,8 +9,8 @@ export const config = { telegramApiHash: process.env.TELEGRAM_API_HASH ?? "", /** Maximum jitter added to scheduler interval (in minutes) */ jitterMinutes: 5, - /** Maximum time between multipart archive parts (in hours) */ - multipartTimeoutHours: 24, + /** Maximum time span for multipart archive parts (in hours). 0 = no limit. */ + multipartTimeoutHours: parseInt(process.env.MULTIPART_TIMEOUT_HOURS ?? "0", 10), /** Delay between Telegram API calls (in ms) to avoid rate limits */ apiDelayMs: 1000, /** Max retries for rate-limited requests */ diff --git a/worker/src/util/mutex.ts b/worker/src/util/mutex.ts new file mode 100644 index 0000000..f35f193 --- /dev/null +++ b/worker/src/util/mutex.ts @@ -0,0 +1,40 @@ +import { childLogger } from "./logger.js"; + +const log = childLogger("mutex"); + +let locked = false; +let holder = ""; +const queue: Array<{ resolve: () => void; label: string }> = []; + +/** + * Ensures only one TDLib client runs at a time across the entire worker process. + * Both the scheduler (auth, ingestion) and the fetch listener acquire this + * before creating any TDLib client. + */ +export async function withTdlibMutex( + label: string, + fn: () => Promise +): Promise { + if (locked) { + log.info({ waiting: label, holder }, "Waiting for TDLib mutex"); + await new Promise((resolve) => queue.push({ resolve, label })); + } + + locked = true; + holder = label; + log.debug({ label }, "TDLib mutex acquired"); + + try { + return await fn(); + } finally { + locked = false; + holder = ""; + const next = queue.shift(); + if (next) { + log.debug({ next: next.label }, "TDLib mutex releasing to next waiter"); + next.resolve(); + } else { + log.debug({ label }, "TDLib mutex released"); + } + } +} diff --git a/worker/src/worker.ts b/worker/src/worker.ts index aaf9be0..945e447 100644 --- a/worker/src/worker.ts +++ b/worker/src/worker.ts @@ -1,12 +1,13 @@ import path from "path"; -import { unlink, readdir } from "fs/promises"; +import { unlink, readdir, mkdir, rm } from "fs/promises"; import { config } from "./util/config.js"; import { childLogger } from "./util/logger.js"; import { tryAcquireLock, releaseLock } from "./db/locks.js"; import { getSourceChannelMappings, - getDestinationChannel, + getGlobalDestinationChannel, packageExistsByHash, + packageExistsBySourceMessage, createPackageWithFiles, createIngestionRun, completeIngestionRun, @@ -16,9 +17,19 @@ import { setChannelForum, getTopicProgress, upsertTopicProgress, + upsertChannel, + ensureAccountChannelLink, + getGlobalSetting, + getChannelFetchRequest, + updateFetchRequestStatus, + getAccountLinkedChannelIds, + getExistingChannelsByTelegramId, + getAccountById, + deleteOrphanedPackageByHash, } from "./db/queries.js"; import type { ActivityUpdate } from "./db/queries.js"; import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js"; +import { getAccountChats, joinChatByInviteLink } from "./tdlib/chats.js"; import { getChannelMessages, downloadFile, downloadPhotoThumbnail } from "./tdlib/download.js"; import type { DownloadProgress, ChannelScanResult } from "./tdlib/download.js"; import { isChatForum, getForumTopicList, getTopicMessages } from "./tdlib/topics.js"; @@ -29,13 +40,203 @@ import { extractCreatorFromFileName } from "./archive/creator.js"; import { hashParts } from "./archive/hash.js"; import { readZipCentralDirectory } from "./archive/zip-reader.js"; import { readRarContents } from "./archive/rar-reader.js"; -import { byteLevelSplit } from "./archive/split.js"; +import { byteLevelSplit, concatenateFiles } from "./archive/split.js"; import { uploadToChannel } from "./upload/channel.js"; import type { TelegramAccount, TelegramChannel } from "@prisma/client"; import type { Client } from "tdl"; const log = childLogger("worker"); +/** + * Authenticate a PENDING account by creating a TDLib client. + * TDLib will send an SMS code to the phone number, and the client.login() + * callbacks set the authState to AWAITING_CODE. Once the admin enters the + * code via the UI, pollForAuthCode picks it up and completes the login. + * + * After successful auth: + * 1. Fetches channels from Telegram and writes as a ChannelFetchRequest + * (so the admin can select sources in the UI) + * 2. Auto-joins the destination group if an invite link is configured + */ +export async function authenticateAccount( + account: TelegramAccount +): Promise { + const aLog = childLogger("auth", { accountId: account.id, phone: account.phone }); + aLog.info("Starting authentication flow"); + + let client: Client | undefined; + try { + client = await createTdlibClient({ + id: account.id, + phone: account.phone, + }); + aLog.info("Authentication successful"); + + // Auto-fetch channels and create a fetch request result + aLog.info("Fetching channels from Telegram..."); + await createAutoFetchRequest(client, account.id, aLog); + + // Auto-join the destination group if an invite link exists + const inviteLink = await getGlobalSetting("destination_invite_link"); + if (inviteLink) { + aLog.info("Attempting to join destination group via invite link..."); + try { + await joinChatByInviteLink(client, inviteLink); + // Link this account as WRITER to the destination channel + const destChannel = await getGlobalDestinationChannel(); + if (destChannel) { + await ensureAccountChannelLink(account.id, destChannel.id, "WRITER"); + aLog.info({ destChannel: destChannel.title }, "Joined destination group and linked as WRITER"); + } + } catch (err) { + // May already be a member — that's fine + aLog.warn({ err }, "Could not join destination group (may already be a member)"); + // Still try to link as WRITER + const destChannel = await getGlobalDestinationChannel(); + if (destChannel) { + await ensureAccountChannelLink(account.id, destChannel.id, "WRITER"); + } + } + } + } catch (err) { + aLog.error({ err }, "Authentication failed"); + } finally { + if (client) { + await closeTdlibClient(client); + } + } +} + +/** + * Process a ChannelFetchRequest: fetch channels from Telegram, + * enrich with DB state, and write the result JSON. + * Called by the fetch listener (pg_notify) and by authenticateAccount. + */ +export async function processFetchRequest(requestId: string): Promise { + const aLog = childLogger("fetch-request", { requestId }); + const request = await getChannelFetchRequest(requestId); + + if (!request || request.status !== "PENDING") { + aLog.warn("Fetch request not found or not pending, skipping"); + return; + } + + await updateFetchRequestStatus(requestId, "IN_PROGRESS"); + aLog.info({ accountId: request.accountId }, "Processing fetch request"); + + const client = await createTdlibClient({ + id: request.account.id, + phone: request.account.phone, + }); + + try { + const chats = await getAccountChats(client); + + // Enrich with DB state + const linkedTelegramIds = await getAccountLinkedChannelIds(request.accountId); + const existingChannels = await getExistingChannelsByTelegramId(); + + const enrichedChats = chats.map((chat) => { + const telegramIdStr = chat.chatId.toString(); + return { + chatId: telegramIdStr, + title: chat.title, + type: chat.type, + isForum: chat.isForum, + memberCount: chat.memberCount ?? null, + alreadyLinked: linkedTelegramIds.has(telegramIdStr), + existingChannelId: existingChannels.get(telegramIdStr) ?? null, + }; + }); + + // Also upsert channel metadata while we have the data + for (const chat of chats) { + try { + await upsertChannel({ + telegramId: chat.chatId, + title: chat.title, + type: "SOURCE", + isForum: chat.isForum, + }); + } catch { + // Non-critical — metadata sync can fail silently + } + } + + await updateFetchRequestStatus(requestId, "COMPLETED", { + resultJson: JSON.stringify(enrichedChats), + }); + + aLog.info( + { total: chats.length, linked: [...linkedTelegramIds].length }, + "Fetch request completed" + ); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + aLog.error({ err }, "Fetch request failed"); + await updateFetchRequestStatus(requestId, "FAILED", { error: message }); + } finally { + await closeTdlibClient(client); + } +} + +/** + * Internal helper called after authentication to auto-create a fetch request + * with the channel list (so the UI can show the picker immediately). + */ +async function createAutoFetchRequest( + client: Client, + accountId: string, + aLog: ReturnType +): Promise { + const chats = await getAccountChats(client); + + const linkedTelegramIds = await getAccountLinkedChannelIds(accountId); + const existingChannels = await getExistingChannelsByTelegramId(); + + const enrichedChats = chats.map((chat) => { + const telegramIdStr = chat.chatId.toString(); + return { + chatId: telegramIdStr, + title: chat.title, + type: chat.type, + isForum: chat.isForum, + memberCount: chat.memberCount ?? null, + alreadyLinked: linkedTelegramIds.has(telegramIdStr), + existingChannelId: existingChannels.get(telegramIdStr) ?? null, + }; + }); + + // Upsert channel metadata + for (const chat of chats) { + try { + await upsertChannel({ + telegramId: chat.chatId, + title: chat.title, + type: "SOURCE", + isForum: chat.isForum, + }); + } catch { + // Non-critical + } + } + + // Create the fetch request record with the result already filled in + const { db } = await import("./db/client.js"); + await db.channelFetchRequest.create({ + data: { + accountId, + status: "COMPLETED", + resultJson: JSON.stringify(enrichedChats), + }, + }); + + aLog.info( + { total: chats.length }, + "Auto-fetch request created with channel list" + ); +} + /** * Throttle DB writes for download progress to avoid hammering the DB. * Only writes if at least 2 seconds have passed since the last write. @@ -140,17 +341,18 @@ export async function runWorkerForAccount( }; try { - // 4. Get assigned source channels and destination + // 4. Get assigned source channels and global destination const channelMappings = await getSourceChannelMappings(account.id); - const destChannel = await getDestinationChannel(account.id); + const destChannel = await getGlobalDestinationChannel(); if (!destChannel) { - throw new Error("No active destination channel configured"); + throw new Error("No global destination channel configured — set one in the admin UI"); } for (const mapping of channelMappings) { const channel = mapping.channel; + try { // ── Check if channel is a forum ── const forum = await isChatForum(client, channel.telegramId); if (forum !== channel.isForum) { @@ -198,61 +400,63 @@ export async function runWorkerForAccount( ); for (const topic of topics) { - const progress = topicProgressList.find( - (tp) => tp.topicId === topic.topicId - ); - - await updateRunActivity(activeRunId, { - currentActivity: `Scanning topic "${topic.name}" in "${channel.title}"`, - currentStep: "scanning", - currentChannel: `${channel.title} › ${topic.name}`, - currentFile: null, - currentFileNum: null, - totalFiles: null, - downloadedBytes: null, - totalBytes: null, - downloadPercent: null, - }); - - const scanResult = await getTopicMessages( - client, - channel.telegramId, - topic.topicId, - progress?.lastProcessedMessageId - ); - - if (scanResult.archives.length === 0) { - accountLog.debug( - { channelId: channel.id, topic: topic.name }, - "No new archives in topic" + try { + const progress = topicProgressList.find( + (tp) => tp.topicId === topic.topicId ); - continue; - } - accountLog.info( - { topic: topic.name, archives: scanResult.archives.length, photos: scanResult.photos.length }, - "Found messages in topic" - ); + await updateRunActivity(activeRunId, { + currentActivity: `Scanning topic "${topic.name}" in "${channel.title}"`, + currentStep: "scanning", + currentChannel: `${channel.title} › ${topic.name}`, + currentFile: null, + currentFileNum: null, + totalFiles: null, + downloadedBytes: null, + totalBytes: null, + downloadPercent: null, + }); - // Process archives with topic creator - pipelineCtx.topicCreator = topic.name; - pipelineCtx.sourceTopicId = topic.topicId; - pipelineCtx.channelTitle = `${channel.title} › ${topic.name}`; - - await processArchiveSets(pipelineCtx, scanResult, run.id); - - // Update topic progress - const allMsgIds = [ - ...scanResult.archives.map((m) => m.id), - ...scanResult.photos.map((p) => p.id), - ]; - if (allMsgIds.length > 0) { - const maxId = allMsgIds.reduce((a, b) => (a > b ? a : b)); - await upsertTopicProgress( - mapping.id, + const scanResult = await getTopicMessages( + client, + channel.telegramId, topic.topicId, - topic.name, - maxId + progress?.lastProcessedMessageId + ); + + if (scanResult.archives.length === 0) { + accountLog.debug( + { channelId: channel.id, topic: topic.name }, + "No new archives in topic" + ); + continue; + } + + accountLog.info( + { topic: topic.name, archives: scanResult.archives.length, photos: scanResult.photos.length }, + "Found messages in topic" + ); + + // Process archives with topic creator + pipelineCtx.topicCreator = topic.name; + pipelineCtx.sourceTopicId = topic.topicId; + pipelineCtx.channelTitle = `${channel.title} › ${topic.name}`; + + const maxProcessedId = await processArchiveSets(pipelineCtx, scanResult, run.id, progress?.lastProcessedMessageId); + + // Only advance progress to the highest successfully processed message + if (maxProcessedId) { + await upsertTopicProgress( + mapping.id, + topic.topicId, + topic.name, + maxProcessedId + ); + } + } catch (topicErr) { + accountLog.warn( + { err: topicErr, channelId: channel.id, topic: topic.name, topicId: topic.topicId.toString() }, + "Failed to process topic, skipping" ); } } @@ -296,18 +500,19 @@ export async function runWorkerForAccount( pipelineCtx.sourceTopicId = null; pipelineCtx.channelTitle = channel.title; - await processArchiveSets(pipelineCtx, scanResult, run.id); + const maxProcessedId = await processArchiveSets(pipelineCtx, scanResult, run.id, mapping.lastProcessedMessageId); - // Update last processed message - const allMsgIds = [ - ...scanResult.archives.map((m) => m.id), - ...scanResult.photos.map((p) => p.id), - ]; - if (allMsgIds.length > 0) { - const maxId = allMsgIds.reduce((a, b) => (a > b ? a : b)); - await updateLastProcessedMessage(mapping.id, maxId); + // Only advance progress to the highest successfully processed message + if (maxProcessedId) { + await updateLastProcessedMessage(mapping.id, maxProcessedId); } } + } catch (channelErr) { + accountLog.warn( + { err: channelErr, channelId: channel.id, title: channel.title }, + "Failed to process channel, skipping to next" + ); + } } // ── Done ── @@ -332,16 +537,37 @@ export async function runWorkerForAccount( /** * Process a scan result through the archive pipeline: * group → download → hash → dedup → metadata → split → upload → preview → index. + * + * Returns the highest message ID that was successfully processed (ingested or + * confirmed duplicate). The caller should only advance the progress boundary + * to this value — never to the max of all scanned messages. */ async function processArchiveSets( ctx: PipelineContext, scanResult: ChannelScanResult, - ingestionRunId: string -): Promise { + ingestionRunId: string, + lastProcessedMessageId?: bigint | null +): Promise { const { client, runId, channelTitle, channel, throttled, counters, accountLog } = ctx; // Group into archive sets - const archiveSets = groupArchiveSets(scanResult.archives); + let archiveSets = groupArchiveSets(scanResult.archives); + + // Filter out sets where ALL parts are at or below the boundary (already processed) + if (lastProcessedMessageId) { + const totalBefore = archiveSets.length; + archiveSets = archiveSets.filter((set) => + set.parts.some((p) => p.id > lastProcessedMessageId) + ); + const filtered = totalBefore - archiveSets.length; + if (filtered > 0) { + accountLog.info( + { filtered, remaining: archiveSets.length }, + "Filtered out already-processed archive sets" + ); + } + } + counters.zipsFound += archiveSets.length; // Match preview photos to archive sets @@ -369,16 +595,38 @@ async function processArchiveSets( zipsFound: counters.zipsFound, }); + // Track the highest message ID that was successfully processed + let maxProcessedId: bigint | null = null; + for (let setIdx = 0; setIdx < archiveSets.length; setIdx++) { - await processOneArchiveSet( - ctx, - archiveSets[setIdx], - setIdx, - archiveSets.length, - previewMatches, - ingestionRunId - ); + try { + await processOneArchiveSet( + ctx, + archiveSets[setIdx], + setIdx, + archiveSets.length, + previewMatches, + ingestionRunId + ); + + // Set completed (ingested or confirmed duplicate) — advance watermark + const setMaxId = archiveSets[setIdx].parts.reduce( + (max, p) => (p.id > max ? p.id : max), + 0n + ); + if (setMaxId > (maxProcessedId ?? 0n)) { + maxProcessedId = setMaxId; + } + } catch (setErr) { + // If a set fails, do NOT advance the watermark past it + accountLog.warn( + { err: setErr, baseName: archiveSets[setIdx].baseName }, + "Archive set failed, watermark will not advance past this set" + ); + } } + + return maxProcessedId; } /** @@ -400,17 +648,43 @@ async function processOneArchiveSet( counters.messagesScanned += archiveSet.parts.length; const archiveName = archiveSet.parts[0].fileName; + + // ── Early skip: check if this archive set was already ingested ── + // This avoids re-downloading large archives that were processed in a prior run. + const alreadyIngested = await packageExistsBySourceMessage( + channel.id, + archiveSet.parts[0].id + ); + if (alreadyIngested) { + counters.zipsDuplicate++; + accountLog.debug( + { fileName: archiveName, sourceMessageId: Number(archiveSet.parts[0].id) }, + "Archive already ingested (by source message), skipping" + ); + await updateRunActivity(runId, { + currentActivity: `Skipped ${archiveName} (already ingested)`, + currentStep: "deduplicating", + currentChannel: channelTitle, + currentFile: archiveName, + currentFileNum: setIdx + 1, + totalFiles: totalSets, + zipsDuplicate: counters.zipsDuplicate, + }); + return; + } + const tempPaths: string[] = []; let splitPaths: string[] = []; + // Per-set subdirectory so uploaded files keep their original filenames + const setDir = path.join(config.tempDir, `${ingestionRunId}_${archiveSet.parts[0].id}`); + await mkdir(setDir, { recursive: true }); + try { // ── Downloading ── for (let partIdx = 0; partIdx < archiveSet.parts.length; partIdx++) { const part = archiveSet.parts[partIdx]; - const tempPath = path.join( - config.tempDir, - `${ingestionRunId}_${part.id}_${part.fileName}` - ); + const tempPath = path.join(setDir, part.fileName); const partLabel = archiveSet.parts.length > 1 ? ` (part ${partIdx + 1}/${archiveSet.parts.length})` @@ -526,14 +800,33 @@ async function processOneArchiveSet( accountLog.warn({ err, baseName: archiveSet.baseName }, "Failed to read archive metadata, ingesting without file list"); } - // ── Splitting (if needed) ── - let uploadPaths = tempPaths; + // ── Splitting / Repacking (if needed) ── + let uploadPaths = [...tempPaths]; const totalSize = archiveSet.parts.reduce( (sum, p) => sum + p.fileSize, 0n ); + const MAX_UPLOAD_SIZE = 2n * 1024n * 1024n * 1024n; + const hasOversizedPart = archiveSet.parts.some((p) => p.fileSize > MAX_UPLOAD_SIZE); - if (!archiveSet.isMultipart && totalSize > 2n * 1024n * 1024n * 1024n) { + if (hasOversizedPart) { + // Full repack: concatenate all parts → single file → re-split into uniform 2GB chunks + await updateRunActivity(runId, { + currentActivity: `Repacking ${archiveName} (parts >2GB, concatenating + re-splitting)`, + currentStep: "splitting", + currentChannel: channelTitle, + currentFile: archiveName, + currentFileNum: setIdx + 1, + totalFiles: totalSets, + }); + const concatPath = path.join(setDir, `${archiveSet.baseName}.concat`); + await concatenateFiles(tempPaths, concatPath); + splitPaths = await byteLevelSplit(concatPath); + uploadPaths = splitPaths; + // Clean up the concat intermediate file + await unlink(concatPath).catch(() => {}); + } else if (!archiveSet.isMultipart && totalSize > MAX_UPLOAD_SIZE) { + // Single file >2GB: split directly await updateRunActivity(runId, { currentActivity: `Splitting ${archiveName} for upload (>2GB)`, currentStep: "splitting", @@ -595,6 +888,9 @@ async function processOneArchiveSet( totalFiles: totalSets, }); + // Clean up any orphaned record (same hash but no dest upload) before creating + await deleteOrphanedPackageByHash(contentHash); + await createPackageWithFiles({ contentHash, fileName: archiveName, @@ -632,8 +928,9 @@ async function processOneArchiveSet( "Archive ingested" ); } finally { - // ALWAYS delete temp files + // ALWAYS delete temp files and the set directory await deleteFiles([...tempPaths, ...splitPaths]); + await rm(setDir, { recursive: true, force: true }).catch(() => {}); } } @@ -648,16 +945,16 @@ async function deleteFiles(paths: string[]): Promise { } /** - * Clean up any leftover temp files from previous runs. + * Clean up any leftover temp files/directories from previous runs. */ export async function cleanupTempDir(): Promise { try { - const files = await readdir(config.tempDir); - for (const file of files) { - await unlink(path.join(config.tempDir, file)).catch(() => {}); + const entries = await readdir(config.tempDir); + for (const entry of entries) { + await rm(path.join(config.tempDir, entry), { recursive: true, force: true }).catch(() => {}); } - if (files.length > 0) { - log.info({ count: files.length }, "Cleaned up stale temp files"); + if (entries.length > 0) { + log.info({ count: entries.length }, "Cleaned up stale temp files"); } } catch { // Directory might not exist yet