feat: add Telegram integration with forum topic support and creator tracking

Adds full Telegram ZIP ingestion pipeline: TDLib worker service scans source
channels for archive files, deduplicates by content hash, extracts metadata,
uploads to archive channel, and indexes in Postgres. Forum supergroups are
scanned per-topic with topic names used as creator. Filename-based creator
extraction (e.g. "Mammoth Factory - 2026-01.zip") serves as fallback.

Includes admin UI for managing accounts/channels, simplified account setup
(API credentials via env vars), auth code/password submission dialog,
package browser with creator column, and live ingestion activity tracking.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
xCyanGrizzly
2026-02-24 16:02:06 +01:00
parent beb9cfb312
commit b427193d17
70 changed files with 8627 additions and 2 deletions

92
worker/src/scheduler.ts Normal file
View File

@@ -0,0 +1,92 @@
import { config } from "./util/config.js";
import { childLogger } from "./util/logger.js";
import { getActiveAccounts } from "./db/queries.js";
import { runWorkerForAccount } from "./worker.js";
const log = childLogger("scheduler");
let running = false;
let timer: ReturnType<typeof setTimeout> | null = null;
/**
* Run one ingestion cycle: process all active, authenticated accounts sequentially.
*/
async function runCycle(): Promise<void> {
if (running) {
log.warn("Previous cycle still running, skipping");
return;
}
running = true;
log.info("Starting ingestion cycle");
try {
const accounts = await getActiveAccounts();
if (accounts.length === 0) {
log.info("No active authenticated accounts, nothing to do");
return;
}
log.info({ accountCount: accounts.length }, "Processing accounts");
for (const account of accounts) {
await runWorkerForAccount(account);
}
log.info("Ingestion cycle complete");
} catch (err) {
log.error({ err }, "Ingestion cycle failed");
} finally {
running = false;
}
}
/**
* Schedule the next cycle with jitter.
*/
function scheduleNext(): void {
const intervalMs = config.workerIntervalMinutes * 60 * 1000;
const jitterMs = Math.random() * config.jitterMinutes * 60 * 1000;
const delay = intervalMs + jitterMs;
log.info(
{ nextRunInMinutes: Math.round(delay / 60000) },
"Next cycle scheduled"
);
timer = setTimeout(async () => {
await runCycle();
scheduleNext();
}, delay);
}
/**
* Start the scheduler. Runs an immediate first cycle, then schedules subsequent ones.
*/
export async function startScheduler(): Promise<void> {
log.info(
{
intervalMinutes: config.workerIntervalMinutes,
jitterMinutes: config.jitterMinutes,
},
"Scheduler starting"
);
// Run immediately on start
await runCycle();
// Then schedule recurring cycles
scheduleNext();
}
/**
* Stop the scheduler gracefully.
*/
export function stopScheduler(): void {
if (timer) {
clearTimeout(timer);
timer = null;
}
log.info("Scheduler stopped");
}