feat: add Telegram integration with forum topic support and creator tracking

Adds full Telegram ZIP ingestion pipeline: TDLib worker service scans source channels for archive files, deduplicates by content hash, extracts metadata, uploads to archive channel, and indexes in Postgres. Forum supergroups are scanned per-topic with topic names used as creator. Filename-based creator extraction (e.g. "Mammoth Factory - 2026-01.zip") serves as fallback. Includes admin UI for managing accounts/channels, simplified account setup (API credentials via env vars), auth code/password submission dialog, package browser with creator column, and live ingestion activity tracking. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-11 06:11:15 +00:00 · 2026-02-24 16:02:06 +01:00
parent beb9cfb312
commit b427193d17
70 changed files with 8627 additions and 2 deletions
--- a/worker/src/archive/creator.ts
+++ b/worker/src/archive/creator.ts
@@ -0,0 +1,21 @@
+/**
+ * Extract a creator name from common archive file naming patterns.
+ *
+ * Priority in the worker: topic name > filename extraction.
+ * This is the fallback when no forum topic name is available.
+ *
+ * Patterns handled (split on ` - `):
+ *   "Mammoth Factory - 2026-01.zip"        → "Mammoth Factory"
+ *   "Artist Name - Pack Title.part01.rar"   → "Artist Name"
+ *   "some_random_file.zip"                  → null
+ */
+export function extractCreatorFromFileName(fileName: string): string | null {
+  // Strip archive extensions (.zip, .rar, .part01.rar, .z01, etc.)
+  const bare = fileName.replace(/(\.(part\d+\.rar|z\d{2}|zip|rar))+$/i, "");
+
+  const idx = bare.indexOf(" - ");
+  if (idx <= 0) return null;
+
+  const creator = bare.slice(0, idx).trim();
+  return creator.length > 0 ? creator : null;
+}
--- a/worker/src/archive/detect.ts
+++ b/worker/src/archive/detect.ts
@@ -0,0 +1,96 @@
+export type ArchiveFormat = "ZIP" | "RAR";
+
+export interface MultipartInfo {
+  baseName: string;
+  partNumber: number;
+  format: ArchiveFormat;
+  pattern: "ZIP_NUMBERED" | "ZIP_LEGACY" | "RAR_PART" | "RAR_LEGACY" | "SINGLE";
+}
+
+const patterns: {
+  regex: RegExp;
+  format: ArchiveFormat;
+  pattern: MultipartInfo["pattern"];
+  getBaseName: (match: RegExpMatchArray) => string;
+  getPartNumber: (match: RegExpMatchArray) => number;
+}[] = [
+  // pack.zip.001, pack.zip.002
+  {
+    regex: /^(.+\.zip)\.(\d{3,})$/i,
+    format: "ZIP",
+    pattern: "ZIP_NUMBERED",
+    getBaseName: (m) => m[1],
+    getPartNumber: (m) => parseInt(m[2], 10),
+  },
+  // pack.z01, pack.z02 (legacy split — final part is pack.zip)
+  {
+    regex: /^(.+)\.z(\d{2,})$/i,
+    format: "ZIP",
+    pattern: "ZIP_LEGACY",
+    getBaseName: (m) => m[1],
+    getPartNumber: (m) => parseInt(m[2], 10),
+  },
+  // pack.part1.rar, pack.part2.rar
+  {
+    regex: /^(.+)\.part(\d+)\.rar$/i,
+    format: "RAR",
+    pattern: "RAR_PART",
+    getBaseName: (m) => m[1],
+    getPartNumber: (m) => parseInt(m[2], 10),
+  },
+  // pack.r00, pack.r01 (legacy split — final part is pack.rar)
+  {
+    regex: /^(.+)\.r(\d{2,})$/i,
+    format: "RAR",
+    pattern: "RAR_LEGACY",
+    getBaseName: (m) => m[1],
+    getPartNumber: (m) => parseInt(m[2], 10),
+  },
+];
+
+/**
+ * Detect if a filename is an archive and extract multipart info.
+ */
+export function detectArchive(fileName: string): MultipartInfo | null {
+  // Check multipart patterns first
+  for (const p of patterns) {
+    const match = fileName.match(p.regex);
+    if (match) {
+      return {
+        baseName: p.getBaseName(match),
+        partNumber: p.getPartNumber(match),
+        format: p.format,
+        pattern: p.pattern,
+      };
+    }
+  }
+
+  // Single .zip file — could be a standalone or the final part of a ZIP_LEGACY set
+  if (/\.zip$/i.test(fileName)) {
+    return {
+      baseName: fileName.replace(/\.zip$/i, ""),
+      partNumber: -1, // -1 signals "could be single or final legacy part"
+      format: "ZIP",
+      pattern: "SINGLE",
+    };
+  }
+
+  // Single .rar file — could be standalone or final part of RAR_LEGACY set
+  if (/\.rar$/i.test(fileName)) {
+    return {
+      baseName: fileName.replace(/\.rar$/i, ""),
+      partNumber: -1,
+      format: "RAR",
+      pattern: "SINGLE",
+    };
+  }
+
+  return null;
+}
+
+/**
+ * Check if a filename looks like any archive attachment we should process.
+ */
+export function isArchiveAttachment(fileName: string): boolean {
+  return detectArchive(fileName) !== null;
+}
--- a/worker/src/archive/hash.ts
+++ b/worker/src/archive/hash.ts
@@ -0,0 +1,25 @@
+import { createReadStream } from "fs";
+import { createHash } from "crypto";
+import { pipeline } from "stream/promises";
+import { PassThrough } from "stream";
+
+/**
+ * Compute SHA-256 hash of one or more files by streaming them in order.
+ * Memory usage: O(1) — reads in 64KB chunks regardless of total size.
+ * For multipart archives, pass all parts sorted by part number.
+ */
+export async function hashParts(filePaths: string[]): Promise<string> {
+  const hash = createHash("sha256");
+  for (const filePath of filePaths) {
+    await pipeline(
+      createReadStream(filePath),
+      new PassThrough({
+        transform(chunk, _encoding, callback) {
+          hash.update(chunk);
+          callback();
+        },
+      })
+    );
+  }
+  return hash.digest("hex");
+}
--- a/worker/src/archive/multipart.ts
+++ b/worker/src/archive/multipart.ts
@@ -0,0 +1,100 @@
+import { detectArchive, type ArchiveFormat, type MultipartInfo } from "./detect.js";
+import { config } from "../util/config.js";
+import { childLogger } from "../util/logger.js";
+
+const log = childLogger("multipart");
+
+export interface TelegramMessage {
+  id: bigint;
+  fileName: string;
+  fileId: string;
+  fileSize: bigint;
+  date: Date;
+}
+
+export interface ArchiveSet {
+  type: ArchiveFormat;
+  baseName: string;
+  parts: TelegramMessage[];
+  isMultipart: boolean;
+}
+
+/**
+ * Group messages into archive sets (single files + multipart groups).
+ * Messages should be pre-filtered to only include archive attachments.
+ */
+export function groupArchiveSets(messages: TelegramMessage[]): ArchiveSet[] {
+  // Detect and annotate each message
+  const annotated: { msg: TelegramMessage; info: MultipartInfo }[] = [];
+  for (const msg of messages) {
+    const info = detectArchive(msg.fileName);
+    if (info) {
+      annotated.push({ msg, info });
+    }
+  }
+
+  // Group by baseName + format
+  const groups = new Map<string, { msg: TelegramMessage; info: MultipartInfo }[]>();
+  for (const item of annotated) {
+    const key = `${item.info.format}:${item.info.baseName.toLowerCase()}`;
+    const group = groups.get(key) ?? [];
+    group.push(item);
+    groups.set(key, group);
+  }
+
+  const results: ArchiveSet[] = [];
+
+  for (const [, group] of groups) {
+    const format = group[0].info.format;
+    const baseName = group[0].info.baseName;
+
+    // Separate explicit multipart entries from potential singles
+    const multipartEntries = group.filter((g) => g.info.pattern !== "SINGLE");
+    const singleEntries = group.filter((g) => g.info.pattern === "SINGLE");
+
+    if (multipartEntries.length > 0) {
+      // This is a multipart set
+      // Check if any single entry is the "final part" of a legacy split
+      const allEntries = [...multipartEntries, ...singleEntries];
+
+      // Check time span — skip if parts span too long
+      const dates = allEntries.map((e) => e.msg.date.getTime());
+      const span = Math.max(...dates) - Math.min(...dates);
+      const maxSpanMs = config.multipartTimeoutHours * 60 * 60 * 1000;
+
+      if (span > maxSpanMs) {
+        log.warn(
+          { baseName, format, span: span / 3600000 },
+          "Multipart set spans too long, skipping"
+        );
+        continue;
+      }
+
+      // Sort by part number (singles get a very high number so they come last — they're the final part)
+      allEntries.sort((a, b) => {
+        const aNum = a.info.partNumber === -1 ? 999999 : a.info.partNumber;
+        const bNum = b.info.partNumber === -1 ? 999999 : b.info.partNumber;
+        return aNum - bNum;
+      });
+
+      results.push({
+        type: format,
+        baseName,
+        parts: allEntries.map((e) => e.msg),
+        isMultipart: true,
+      });
+    } else {
+      // All entries are singles — each is its own archive set
+      for (const entry of singleEntries) {
+        results.push({
+          type: format,
+          baseName: entry.info.baseName,
+          parts: [entry.msg],
+          isMultipart: false,
+        });
+      }
+    }
+  }
+
+  return results;
+}
--- a/worker/src/archive/rar-reader.ts
+++ b/worker/src/archive/rar-reader.ts
@@ -0,0 +1,90 @@
+import { execFile } from "child_process";
+import { promisify } from "util";
+import path from "path";
+import { childLogger } from "../util/logger.js";
+import type { FileEntry } from "./zip-reader.js";
+
+const execFileAsync = promisify(execFile);
+const log = childLogger("rar-reader");
+
+/**
+ * Parse output of `unrar l -v <file>` to extract file metadata.
+ * unrar automatically discovers sibling parts when they're co-located.
+ */
+export async function readRarContents(
+  firstPartPath: string
+): Promise<FileEntry[]> {
+  try {
+    const { stdout } = await execFileAsync("unrar", ["l", "-v", firstPartPath], {
+      timeout: 30000,
+      maxBuffer: 10 * 1024 * 1024, // 10MB for very large archives
+    });
+
+    return parseUnrarOutput(stdout);
+  } catch (err) {
+    log.warn({ err, file: firstPartPath }, "Failed to read RAR contents");
+    return []; // Fallback: return empty on error
+  }
+}
+
+/**
+ * Parse the tabular output of `unrar l -v`.
+ *
+ * Example output format:
+ *  Archive: test.rar
+ *  Details: RAR 5
+ *
+ *   Attributes      Size     Packed Ratio   Date   Time   CRC-32  Name
+ *  ----------- ---------  --------- ----- -------- ----- --------  ----
+ *   ...A....      12345      10234  83%  2024-01-15 10:30 DEADBEEF  folder/file.stl
+ *  ----------- ---------  --------- ----- -------- ----- --------  ----
+ */
+function parseUnrarOutput(output: string): FileEntry[] {
+  const entries: FileEntry[] = [];
+  const lines = output.split("\n");
+
+  let inFileList = false;
+  let separatorCount = 0;
+
+  for (const line of lines) {
+    const trimmed = line.trim();
+
+    // Detect separator lines (------- pattern)
+    if (/^-{5,}/.test(trimmed)) {
+      separatorCount++;
+      if (separatorCount === 1) {
+        inFileList = true;
+      } else if (separatorCount >= 2) {
+        inFileList = false;
+      }
+      continue;
+    }
+
+    if (!inFileList) continue;
+
+    // Parse file entry line
+    // Format: Attributes Size Packed Ratio Date Time CRC Name
+    const match = trimmed.match(
+      /^\S+\s+(\d+)\s+(\d+)\s+\d+%\s+\S+\s+\S+\s+([0-9A-Fa-f]+)\s+(.+)$/
+    );
+
+    if (match) {
+      const [, uncompressedStr, compressedStr, crc32, filePath] = match;
+
+      // Skip directory entries (typically end with / or have size 0 with dir attributes)
+      if (filePath.endsWith("/") || filePath.endsWith("\\")) continue;
+
+      const ext = path.extname(filePath).toLowerCase();
+      entries.push({
+        path: filePath,
+        fileName: path.basename(filePath),
+        extension: ext ? ext.slice(1) : null,
+        compressedSize: BigInt(compressedStr),
+        uncompressedSize: BigInt(uncompressedStr),
+        crc32: crc32.toLowerCase(),
+      });
+    }
+  }
+
+  return entries;
+}
--- a/worker/src/archive/split.ts
+++ b/worker/src/archive/split.ts
@@ -0,0 +1,48 @@
+import { createReadStream, createWriteStream } from "fs";
+import { stat } from "fs/promises";
+import path from "path";
+import { pipeline } from "stream/promises";
+import { childLogger } from "../util/logger.js";
+
+const log = childLogger("split");
+
+/** 2GB in bytes — Telegram's file size limit */
+const MAX_PART_SIZE = 2n * 1024n * 1024n * 1024n;
+
+/**
+ * Split a file into ≤2GB parts using byte-level splitting.
+ * Returns paths to the split parts. If the file is already ≤2GB, returns the original path.
+ */
+export async function byteLevelSplit(filePath: string): Promise<string[]> {
+  const stats = await stat(filePath);
+  const fileSize = BigInt(stats.size);
+
+  if (fileSize <= MAX_PART_SIZE) {
+    return [filePath];
+  }
+
+  const dir = path.dirname(filePath);
+  const baseName = path.basename(filePath);
+  const partSize = Number(MAX_PART_SIZE);
+  const totalParts = Math.ceil(Number(fileSize) / partSize);
+  const parts: string[] = [];
+
+  log.info({ filePath, fileSize: Number(fileSize), totalParts }, "Splitting file");
+
+  for (let i = 0; i < totalParts; i++) {
+    const partNum = String(i + 1).padStart(3, "0");
+    const partPath = path.join(dir, `${baseName}.${partNum}`);
+    const start = i * partSize;
+    const end = Math.min(start + partSize - 1, Number(fileSize) - 1);
+
+    await pipeline(
+      createReadStream(filePath, { start, end }),
+      createWriteStream(partPath)
+    );
+
+    parts.push(partPath);
+  }
+
+  log.info({ filePath, parts: parts.length }, "File split complete");
+  return parts;
+}
--- a/worker/src/archive/zip-reader.ts
+++ b/worker/src/archive/zip-reader.ts
@@ -0,0 +1,61 @@
+import yauzl from "yauzl";
+import path from "path";
+import { childLogger } from "../util/logger.js";
+
+const log = childLogger("zip-reader");
+
+export interface FileEntry {
+  path: string;
+  fileName: string;
+  extension: string | null;
+  compressedSize: bigint;
+  uncompressedSize: bigint;
+  crc32: string | null;
+}
+
+/**
+ * Read the central directory of a ZIP file without extracting any contents.
+ * For multipart ZIPs, pass the paths sorted by part order.
+ * We attempt to read from the last part first (central directory is at the end).
+ */
+export async function readZipCentralDirectory(
+  filePaths: string[]
+): Promise<FileEntry[]> {
+  // The central directory lives at the end of the last file
+  const targetFile = filePaths[filePaths.length - 1];
+
+  return new Promise((resolve, reject) => {
+    yauzl.open(targetFile, { lazyEntries: true, autoClose: true }, (err, zipFile) => {
+      if (err) {
+        log.warn({ err, file: targetFile }, "Failed to open ZIP for reading");
+        resolve([]); // Fallback: return empty on error
+        return;
+      }
+
+      const entries: FileEntry[] = [];
+
+      zipFile.readEntry();
+      zipFile.on("entry", (entry: yauzl.Entry) => {
+        // Skip directories
+        if (!entry.fileName.endsWith("/")) {
+          const ext = path.extname(entry.fileName).toLowerCase();
+          entries.push({
+            path: entry.fileName,
+            fileName: path.basename(entry.fileName),
+            extension: ext ? ext.slice(1) : null, // Remove leading dot
+            compressedSize: BigInt(entry.compressedSize),
+            uncompressedSize: BigInt(entry.uncompressedSize),
+            crc32: entry.crc32 !== 0 ? entry.crc32.toString(16).padStart(8, "0") : null,
+          });
+        }
+        zipFile.readEntry();
+      });
+
+      zipFile.on("end", () => resolve(entries));
+      zipFile.on("error", (error) => {
+        log.warn({ error, file: targetFile }, "Error reading ZIP entries");
+        resolve(entries); // Return whatever we got
+      });
+    });
+  });
+}
--- a/worker/src/db/client.ts
+++ b/worker/src/db/client.ts
@@ -0,0 +1,14 @@
+import { PrismaClient } from "@prisma/client";
+import { PrismaPg } from "@prisma/adapter-pg";
+import pg from "pg";
+import { config } from "../util/config.js";
+
+const pool = new pg.Pool({
+  connectionString: config.databaseUrl,
+  max: 5,
+});
+
+const adapter = new PrismaPg(pool);
+export const db = new PrismaClient({ adapter });
+
+export { pool };
--- a/worker/src/db/locks.ts
+++ b/worker/src/db/locks.ts
@@ -0,0 +1,56 @@
+import { pool } from "./client.js";
+import { childLogger } from "../util/logger.js";
+
+const log = childLogger("locks");
+
+/**
+ * Derive a stable 32-bit integer lock ID from an account ID string.
+ * PostgreSQL advisory locks use bigint, but we use 32-bit for safety.
+ */
+function hashToLockId(accountId: string): number {
+  let hash = 0;
+  for (let i = 0; i < accountId.length; i++) {
+    const char = accountId.charCodeAt(i);
+    hash = (hash << 5) - hash + char;
+    hash |= 0; // Convert to 32-bit integer
+  }
+  return Math.abs(hash);
+}
+
+/**
+ * Try to acquire a PostgreSQL advisory lock for an account.
+ * Returns true if acquired, false if already held by another session.
+ */
+export async function tryAcquireLock(accountId: string): Promise<boolean> {
+  const lockId = hashToLockId(accountId);
+  const client = await pool.connect();
+  try {
+    const result = await client.query<{ pg_try_advisory_lock: boolean }>(
+      "SELECT pg_try_advisory_lock($1)",
+      [lockId]
+    );
+    const acquired = result.rows[0]?.pg_try_advisory_lock ?? false;
+    if (acquired) {
+      log.debug({ accountId, lockId }, "Advisory lock acquired");
+    } else {
+      log.debug({ accountId, lockId }, "Advisory lock already held");
+    }
+    return acquired;
+  } finally {
+    client.release();
+  }
+}
+
+/**
+ * Release the advisory lock for an account.
+ */
+export async function releaseLock(accountId: string): Promise<void> {
+  const lockId = hashToLockId(accountId);
+  const client = await pool.connect();
+  try {
+    await client.query("SELECT pg_advisory_unlock($1)", [lockId]);
+    log.debug({ accountId, lockId }, "Advisory lock released");
+  } finally {
+    client.release();
+  }
+}
--- a/worker/src/db/queries.ts
+++ b/worker/src/db/queries.ts
@@ -0,0 +1,270 @@
+import { db } from "./client.js";
+import type { ArchiveType } from "@prisma/client";
+
+export async function getActiveAccounts() {
+  return db.telegramAccount.findMany({
+    where: { isActive: true, authState: "AUTHENTICATED" },
+  });
+}
+
+export async function getSourceChannelMappings(accountId: string) {
+  return db.accountChannelMap.findMany({
+    where: {
+      accountId,
+      role: "READER",
+      channel: { type: "SOURCE", isActive: true },
+    },
+    include: { channel: true },
+  });
+}
+
+export async function getDestinationChannel(accountId: string) {
+  const mapping = await db.accountChannelMap.findFirst({
+    where: {
+      accountId,
+      role: "WRITER",
+      channel: { type: "DESTINATION", isActive: true },
+    },
+    include: { channel: true },
+  });
+  return mapping?.channel ?? null;
+}
+
+export async function packageExistsByHash(contentHash: string) {
+  const pkg = await db.package.findUnique({
+    where: { contentHash },
+    select: { id: true },
+  });
+  return pkg !== null;
+}
+
+export interface CreatePackageInput {
+  contentHash: string;
+  fileName: string;
+  fileSize: bigint;
+  archiveType: ArchiveType;
+  sourceChannelId: string;
+  sourceMessageId: bigint;
+  sourceTopicId?: bigint | null;
+  destChannelId?: string;
+  destMessageId?: bigint;
+  isMultipart: boolean;
+  partCount: number;
+  ingestionRunId: string;
+  creator?: string | null;
+  previewData?: Buffer | null;
+  previewMsgId?: bigint | null;
+  files: {
+    path: string;
+    fileName: string;
+    extension: string | null;
+    compressedSize: bigint;
+    uncompressedSize: bigint;
+    crc32: string | null;
+  }[];
+}
+
+export async function createPackageWithFiles(input: CreatePackageInput) {
+  return db.package.create({
+    data: {
+      contentHash: input.contentHash,
+      fileName: input.fileName,
+      fileSize: input.fileSize,
+      archiveType: input.archiveType,
+      sourceChannelId: input.sourceChannelId,
+      sourceMessageId: input.sourceMessageId,
+      sourceTopicId: input.sourceTopicId ?? undefined,
+      destChannelId: input.destChannelId,
+      destMessageId: input.destMessageId,
+      isMultipart: input.isMultipart,
+      partCount: input.partCount,
+      fileCount: input.files.length,
+      ingestionRunId: input.ingestionRunId,
+      creator: input.creator ?? undefined,
+      previewData: input.previewData ? new Uint8Array(input.previewData) : undefined,
+      previewMsgId: input.previewMsgId ?? undefined,
+      files: {
+        create: input.files,
+      },
+    },
+  });
+}
+
+export async function createIngestionRun(accountId: string) {
+  return db.ingestionRun.create({
+    data: {
+      accountId,
+      status: "RUNNING",
+      currentActivity: "Starting ingestion run",
+      currentStep: "initializing",
+      lastActivityAt: new Date(),
+    },
+  });
+}
+
+export interface ActivityUpdate {
+  currentActivity: string;
+  currentStep: string;
+  currentChannel?: string | null;
+  currentFile?: string | null;
+  currentFileNum?: number | null;
+  totalFiles?: number | null;
+  downloadedBytes?: bigint | null;
+  totalBytes?: bigint | null;
+  downloadPercent?: number | null;
+  messagesScanned?: number;
+  zipsFound?: number;
+  zipsDuplicate?: number;
+  zipsIngested?: number;
+}
+
+export async function updateRunActivity(
+  runId: string,
+  activity: ActivityUpdate
+) {
+  return db.ingestionRun.update({
+    where: { id: runId },
+    data: {
+      currentActivity: activity.currentActivity,
+      currentStep: activity.currentStep,
+      currentChannel: activity.currentChannel ?? undefined,
+      currentFile: activity.currentFile ?? undefined,
+      currentFileNum: activity.currentFileNum ?? undefined,
+      totalFiles: activity.totalFiles ?? undefined,
+      downloadedBytes: activity.downloadedBytes ?? undefined,
+      totalBytes: activity.totalBytes ?? undefined,
+      downloadPercent: activity.downloadPercent ?? undefined,
+      lastActivityAt: new Date(),
+      ...(activity.messagesScanned !== undefined && { messagesScanned: activity.messagesScanned }),
+      ...(activity.zipsFound !== undefined && { zipsFound: activity.zipsFound }),
+      ...(activity.zipsDuplicate !== undefined && { zipsDuplicate: activity.zipsDuplicate }),
+      ...(activity.zipsIngested !== undefined && { zipsIngested: activity.zipsIngested }),
+    },
+  });
+}
+
+const CLEAR_ACTIVITY = {
+  currentActivity: null,
+  currentStep: null,
+  currentChannel: null,
+  currentFile: null,
+  currentFileNum: null,
+  totalFiles: null,
+  downloadedBytes: null,
+  totalBytes: null,
+  downloadPercent: null,
+  lastActivityAt: new Date(),
+};
+
+export async function completeIngestionRun(
+  runId: string,
+  counters: {
+    messagesScanned: number;
+    zipsFound: number;
+    zipsDuplicate: number;
+    zipsIngested: number;
+  }
+) {
+  return db.ingestionRun.update({
+    where: { id: runId },
+    data: {
+      status: "COMPLETED",
+      finishedAt: new Date(),
+      ...counters,
+      ...CLEAR_ACTIVITY,
+    },
+  });
+}
+
+export async function failIngestionRun(runId: string, errorMessage: string) {
+  return db.ingestionRun.update({
+    where: { id: runId },
+    data: {
+      status: "FAILED",
+      finishedAt: new Date(),
+      errorMessage,
+      ...CLEAR_ACTIVITY,
+    },
+  });
+}
+
+export async function updateLastProcessedMessage(
+  mappingId: string,
+  messageId: bigint
+) {
+  return db.accountChannelMap.update({
+    where: { id: mappingId },
+    data: { lastProcessedMessageId: messageId },
+  });
+}
+
+export async function markStaleRunsAsFailed() {
+  return db.ingestionRun.updateMany({
+    where: { status: "RUNNING" },
+    data: {
+      status: "FAILED",
+      finishedAt: new Date(),
+      errorMessage: "Worker restarted — run was still marked as RUNNING",
+    },
+  });
+}
+
+export async function updateAccountAuthState(
+  accountId: string,
+  authState: "PENDING" | "AWAITING_CODE" | "AWAITING_PASSWORD" | "AUTHENTICATED" | "EXPIRED",
+  authCode?: string | null
+) {
+  return db.telegramAccount.update({
+    where: { id: accountId },
+    data: { authState, authCode, lastSeenAt: authState === "AUTHENTICATED" ? new Date() : undefined },
+  });
+}
+
+export async function getAccountAuthCode(accountId: string) {
+  const account = await db.telegramAccount.findUnique({
+    where: { id: accountId },
+    select: { authCode: true, authState: true },
+  });
+  return account;
+}
+
+// ── Forum / Topic progress ──
+
+export async function setChannelForum(channelId: string, isForum: boolean) {
+  return db.telegramChannel.update({
+    where: { id: channelId },
+    data: { isForum },
+  });
+}
+
+export async function getTopicProgress(mappingId: string) {
+  return db.topicProgress.findMany({
+    where: { accountChannelMapId: mappingId },
+  });
+}
+
+export async function upsertTopicProgress(
+  mappingId: string,
+  topicId: bigint,
+  topicName: string | null,
+  lastProcessedMessageId: bigint
+) {
+  return db.topicProgress.upsert({
+    where: {
+      accountChannelMapId_topicId: {
+        accountChannelMapId: mappingId,
+        topicId,
+      },
+    },
+    create: {
+      accountChannelMapId: mappingId,
+      topicId,
+      topicName,
+      lastProcessedMessageId,
+    },
+    update: {
+      topicName,
+      lastProcessedMessageId,
+    },
+  });
+}
--- a/worker/src/index.ts
+++ b/worker/src/index.ts
@@ -0,0 +1,50 @@
+import { mkdir } from "fs/promises";
+import { config } from "./util/config.js";
+import { logger } from "./util/logger.js";
+import { markStaleRunsAsFailed } from "./db/queries.js";
+import { cleanupTempDir } from "./worker.js";
+import { startScheduler, stopScheduler } from "./scheduler.js";
+import { db, pool } from "./db/client.js";
+
+const log = logger.child({ module: "main" });
+
+async function main(): Promise<void> {
+  log.info("DragonsStash Telegram Worker starting");
+  log.info({ config: { ...config, databaseUrl: "***" } }, "Configuration loaded");
+
+  // Ensure temp directory exists
+  await mkdir(config.tempDir, { recursive: true });
+  await mkdir(config.tdlibStateDir, { recursive: true });
+
+  // Clean up stale state
+  await cleanupTempDir();
+  await markStaleRunsAsFailed();
+
+  // Start the scheduler
+  await startScheduler();
+}
+
+// Graceful shutdown
+function shutdown(signal: string): void {
+  log.info({ signal }, "Shutdown signal received");
+  stopScheduler();
+
+  // Close DB connections
+  Promise.all([db.$disconnect(), pool.end()])
+    .then(() => {
+      log.info("Shutdown complete");
+      process.exit(0);
+    })
+    .catch((err) => {
+      log.error({ err }, "Error during shutdown");
+      process.exit(1);
+    });
+}
+
+process.on("SIGTERM", () => shutdown("SIGTERM"));
+process.on("SIGINT", () => shutdown("SIGINT"));
+
+main().catch((err) => {
+  log.fatal({ err }, "Worker failed to start");
+  process.exit(1);
+});
--- a/worker/src/preview/match.ts
+++ b/worker/src/preview/match.ts
@@ -0,0 +1,86 @@
+import { childLogger } from "../util/logger.js";
+
+const log = childLogger("preview-match");
+
+export interface TelegramPhoto {
+  id: bigint;
+  date: Date;
+  /** Caption text on the photo message (if any). */
+  caption: string;
+  /** The smallest photo size available — used as thumbnail. */
+  fileId: string;
+  fileSize: number;
+}
+
+export interface ArchiveRef {
+  baseName: string;
+  firstMessageId: bigint;
+  firstMessageDate: Date;
+}
+
+/**
+ * Try to match a photo message to an archive by:
+ * 1. Caption contains the archive baseName (without extension)
+ * 2. Photo was posted within ±10 messages (time-window: ±6 hours)
+ *
+ * Returns the best match (closest in time), or null.
+ */
+export function matchPreviewToArchive(
+  photos: TelegramPhoto[],
+  archives: ArchiveRef[]
+): Map<string, TelegramPhoto> {
+  const results = new Map<string, TelegramPhoto>();
+  const TIME_WINDOW_MS = 6 * 60 * 60 * 1000; // 6 hours
+
+  for (const archive of archives) {
+    // Normalize the archive base name for matching
+    const normalizedBase = normalizeForMatch(archive.baseName);
+    if (!normalizedBase) continue;
+
+    let bestMatch: TelegramPhoto | null = null;
+    let bestTimeDiff = Infinity;
+
+    for (const photo of photos) {
+      const timeDiff = Math.abs(
+        photo.date.getTime() - archive.firstMessageDate.getTime()
+      );
+
+      // Must be within time window
+      if (timeDiff > TIME_WINDOW_MS) continue;
+
+      // Check if the photo caption contains the archive base name
+      const normalizedCaption = normalizeForMatch(photo.caption);
+      if (!normalizedCaption) continue;
+
+      const matches =
+        normalizedCaption.includes(normalizedBase) ||
+        normalizedBase.includes(normalizedCaption);
+
+      if (matches && timeDiff < bestTimeDiff) {
+        bestMatch = photo;
+        bestTimeDiff = timeDiff;
+      }
+    }
+
+    if (bestMatch) {
+      log.debug(
+        { baseName: archive.baseName, photoId: bestMatch.id.toString() },
+        "Matched preview photo to archive"
+      );
+      results.set(archive.baseName, bestMatch);
+    }
+  }
+
+  return results;
+}
+
+/**
+ * Strip extension, punctuation, and normalize for fuzzy matching.
+ */
+function normalizeForMatch(input: string): string {
+  return input
+    .toLowerCase()
+    .replace(/\.[a-z0-9]{1,5}$/i, "") // strip extension
+    .replace(/[_\-.\s]+/g, " ") // normalize separators
+    .trim();
+}
--- a/worker/src/scheduler.ts
+++ b/worker/src/scheduler.ts
@@ -0,0 +1,92 @@
+import { config } from "./util/config.js";
+import { childLogger } from "./util/logger.js";
+import { getActiveAccounts } from "./db/queries.js";
+import { runWorkerForAccount } from "./worker.js";
+
+const log = childLogger("scheduler");
+
+let running = false;
+let timer: ReturnType<typeof setTimeout> | null = null;
+
+/**
+ * Run one ingestion cycle: process all active, authenticated accounts sequentially.
+ */
+async function runCycle(): Promise<void> {
+  if (running) {
+    log.warn("Previous cycle still running, skipping");
+    return;
+  }
+
+  running = true;
+  log.info("Starting ingestion cycle");
+
+  try {
+    const accounts = await getActiveAccounts();
+
+    if (accounts.length === 0) {
+      log.info("No active authenticated accounts, nothing to do");
+      return;
+    }
+
+    log.info({ accountCount: accounts.length }, "Processing accounts");
+
+    for (const account of accounts) {
+      await runWorkerForAccount(account);
+    }
+
+    log.info("Ingestion cycle complete");
+  } catch (err) {
+    log.error({ err }, "Ingestion cycle failed");
+  } finally {
+    running = false;
+  }
+}
+
+/**
+ * Schedule the next cycle with jitter.
+ */
+function scheduleNext(): void {
+  const intervalMs = config.workerIntervalMinutes * 60 * 1000;
+  const jitterMs = Math.random() * config.jitterMinutes * 60 * 1000;
+  const delay = intervalMs + jitterMs;
+
+  log.info(
+    { nextRunInMinutes: Math.round(delay / 60000) },
+    "Next cycle scheduled"
+  );
+
+  timer = setTimeout(async () => {
+    await runCycle();
+    scheduleNext();
+  }, delay);
+}
+
+/**
+ * Start the scheduler. Runs an immediate first cycle, then schedules subsequent ones.
+ */
+export async function startScheduler(): Promise<void> {
+  log.info(
+    {
+      intervalMinutes: config.workerIntervalMinutes,
+      jitterMinutes: config.jitterMinutes,
+    },
+    "Scheduler starting"
+  );
+
+  // Run immediately on start
+  await runCycle();
+
+  // Then schedule recurring cycles
+  scheduleNext();
+}
+
+/**
+ * Stop the scheduler gracefully.
+ */
+export function stopScheduler(): void {
+  if (timer) {
+    clearTimeout(timer);
+    timer = null;
+  }
+  log.info("Scheduler stopped");
+}
--- a/worker/src/tdlib/client.ts
+++ b/worker/src/tdlib/client.ts
@@ -0,0 +1,120 @@
+import tdl, { createClient, type Client } from "tdl";
+import { getTdjson } from "prebuilt-tdlib";
+import path from "path";
+import { config } from "../util/config.js";
+import { childLogger } from "../util/logger.js";
+import {
+  updateAccountAuthState,
+  getAccountAuthCode,
+} from "../db/queries.js";
+
+const log = childLogger("tdlib-client");
+
+// Configure tdl to use the prebuilt tdjson shared library
+tdl.configure({ tdjson: getTdjson() });
+
+interface AccountConfig {
+  id: string;
+  phone: string;
+}
+
+/**
+ * Create and authenticate a TDLib client for a Telegram account.
+ * Authentication flow communicates with the admin UI via the database:
+ * - Worker sets authState to AWAITING_CODE when TDLib asks for phone code
+ * - Admin enters the code via UI, which writes it to authCode field
+ * - Worker polls DB for the code and feeds it to TDLib
+ */
+export async function createTdlibClient(
+  account: AccountConfig
+): Promise<Client> {
+  const dbPath = path.join(config.tdlibStateDir, account.id);
+
+  const client = createClient({
+    apiId: config.telegramApiId,
+    apiHash: config.telegramApiHash,
+    databaseDirectory: dbPath,
+    filesDirectory: path.join(dbPath, "files"),
+  });
+
+  client.on("error", (err) => {
+    log.error({ err, accountId: account.id }, "TDLib client error");
+  });
+
+  try {
+    await client.login(() => ({
+      getPhoneNumber: async () => {
+        log.info({ accountId: account.id }, "TDLib requesting phone number");
+        return account.phone;
+      },
+      getAuthCode: async () => {
+        log.info({ accountId: account.id }, "TDLib requesting auth code");
+        await updateAccountAuthState(account.id, "AWAITING_CODE");
+
+        // Poll database for the code entered via admin UI
+        const code = await pollForAuthCode(account.id);
+        if (!code) {
+          throw new Error("Auth code not provided within timeout");
+        }
+
+        // Clear the code after reading
+        await updateAccountAuthState(account.id, "AUTHENTICATED", null);
+        return code;
+      },
+      getPassword: async () => {
+        log.info({ accountId: account.id }, "TDLib requesting 2FA password");
+        await updateAccountAuthState(account.id, "AWAITING_PASSWORD");
+
+        // Poll database for the password entered via admin UI
+        const code = await pollForAuthCode(account.id);
+        if (!code) {
+          throw new Error("2FA password not provided within timeout");
+        }
+
+        await updateAccountAuthState(account.id, "AUTHENTICATED", null);
+        return code;
+      },
+    }));
+
+    await updateAccountAuthState(account.id, "AUTHENTICATED");
+    log.info({ accountId: account.id }, "TDLib client authenticated");
+    return client;
+  } catch (err) {
+    log.error({ err, accountId: account.id }, "TDLib authentication failed");
+    await updateAccountAuthState(account.id, "EXPIRED");
+    throw err;
+  }
+}
+
+/**
+ * Poll the database every 5 seconds for an auth code, up to 5 minutes.
+ */
+async function pollForAuthCode(
+  accountId: string,
+  timeoutMs = 300_000
+): Promise<string | null> {
+  const start = Date.now();
+  while (Date.now() - start < timeoutMs) {
+    const result = await getAccountAuthCode(accountId);
+    if (result?.authCode) {
+      return result.authCode;
+    }
+    await sleep(5000);
+  }
+  return null;
+}
+
+function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+/**
+ * Close a TDLib client gracefully.
+ */
+export async function closeTdlibClient(client: Client): Promise<void> {
+  try {
+    await client.close();
+  } catch (err) {
+    log.warn({ err }, "Error closing TDLib client");
+  }
+}
--- a/worker/src/tdlib/download.ts
+++ b/worker/src/tdlib/download.ts
@@ -0,0 +1,389 @@
+import type { Client } from "tdl";
+import { readFile, rename, stat } from "fs/promises";
+import { config } from "../util/config.js";
+import { childLogger } from "../util/logger.js";
+import { isArchiveAttachment } from "../archive/detect.js";
+import type { TelegramMessage } from "../archive/multipart.js";
+import type { TelegramPhoto } from "../preview/match.js";
+
+const log = childLogger("download");
+
+interface TdPhotoSize {
+  type: string;
+  photo: {
+    id: number;
+    size: number;
+    expected_size: number;
+    local?: {
+      path?: string;
+      is_downloading_active?: boolean;
+      is_downloading_completed?: boolean;
+      downloaded_size?: number;
+    };
+  };
+  width: number;
+  height: number;
+}
+
+interface TdMessage {
+  id: number;
+  date: number;
+  content: {
+    _: string;
+    document?: {
+      file_name?: string;
+      document?: {
+        id: number;
+        size: number;
+        local?: {
+          path?: string;
+          is_downloading_completed?: boolean;
+        };
+      };
+    };
+    photo?: {
+      sizes?: TdPhotoSize[];
+    };
+    caption?: {
+      text?: string;
+    };
+  };
+}
+
+interface TdFile {
+  id: number;
+  size: number;
+  expected_size: number;
+  local: {
+    path: string;
+    is_downloading_active: boolean;
+    is_downloading_completed: boolean;
+    downloaded_size: number;
+    download_offset: number;
+  };
+}
+
+export interface ChannelScanResult {
+  archives: TelegramMessage[];
+  photos: TelegramPhoto[];
+}
+
+/**
+ * Fetch messages from a channel since a given message ID.
+ * Collects both archive attachments AND photo messages (for preview matching).
+ * Returns messages in chronological order (oldest first).
+ */
+export async function getChannelMessages(
+  client: Client,
+  chatId: bigint,
+  fromMessageId?: bigint | null,
+  limit = 100
+): Promise<ChannelScanResult> {
+  const archives: TelegramMessage[] = [];
+  const photos: TelegramPhoto[] = [];
+  let currentFromId = fromMessageId ? Number(fromMessageId) : 0;
+
+  // eslint-disable-next-line no-constant-condition
+  while (true) {
+    const result = (await client.invoke({
+      _: "getChatHistory",
+      chat_id: Number(chatId),
+      from_message_id: currentFromId,
+      offset: 0,
+      limit: Math.min(limit, 100),
+      only_local: false,
+    })) as { messages: TdMessage[] };
+
+    if (!result.messages || result.messages.length === 0) break;
+
+    for (const msg of result.messages) {
+      // Check for archive documents
+      const doc = msg.content?.document;
+      if (doc?.file_name && doc.document && isArchiveAttachment(doc.file_name)) {
+        archives.push({
+          id: BigInt(msg.id),
+          fileName: doc.file_name,
+          fileId: String(doc.document.id),
+          fileSize: BigInt(doc.document.size),
+          date: new Date(msg.date * 1000),
+        });
+        continue;
+      }
+
+      // Check for photo messages (potential previews)
+      const photo = msg.content?.photo;
+      const caption = msg.content?.caption?.text ?? "";
+      if (photo?.sizes && photo.sizes.length > 0) {
+        // Pick the smallest size for thumbnail (type "s" or "m")
+        // TDLib photo sizes are ordered from smallest to largest
+        const smallest = photo.sizes[0];
+        photos.push({
+          id: BigInt(msg.id),
+          date: new Date(msg.date * 1000),
+          caption,
+          fileId: String(smallest.photo.id),
+          fileSize: smallest.photo.size || smallest.photo.expected_size,
+        });
+      }
+    }
+
+    currentFromId = result.messages[result.messages.length - 1].id;
+    if (result.messages.length < 100) break;
+
+    // Rate limit delay
+    await sleep(config.apiDelayMs);
+  }
+
+  // Return in chronological order (oldest first)
+  return {
+    archives: archives.reverse(),
+    photos: photos.reverse(),
+  };
+}
+
+/**
+ * Download a photo thumbnail from Telegram and return its raw bytes.
+ * Uses synchronous download (photos are small, typically < 100KB).
+ * Returns null if download fails (non-critical).
+ */
+export async function downloadPhotoThumbnail(
+  client: Client,
+  fileId: string
+): Promise<Buffer | null> {
+  const numericId = parseInt(fileId, 10);
+
+  try {
+    const result = (await client.invoke({
+      _: "downloadFile",
+      file_id: numericId,
+      priority: 1, // Low priority — thumbnails are nice-to-have
+      offset: 0,
+      limit: 0,
+      synchronous: true, // Small file — wait for it
+    })) as TdFile;
+
+    if (result?.local?.is_downloading_completed && result.local.path) {
+      const data = await readFile(result.local.path);
+      log.debug(
+        { fileId, bytes: data.length },
+        "Downloaded photo thumbnail"
+      );
+      return data;
+    }
+  } catch (err) {
+    log.warn({ fileId, err }, "Failed to download photo thumbnail");
+  }
+
+  return null;
+}
+
+export interface DownloadProgress {
+  fileId: string;
+  fileName: string;
+  downloadedBytes: number;
+  totalBytes: number;
+  percent: number;
+  isComplete: boolean;
+}
+
+export type ProgressCallback = (progress: DownloadProgress) => void;
+
+/**
+ * Download a file from Telegram to a local path with progress tracking
+ * and integrity verification.
+ *
+ * Progress flow:
+ *  1. Starts async download via TDLib
+ *  2. Listens for `updateFile` events to track download progress
+ *  3. Logs progress at every 10% increment
+ *  4. Once complete, verifies the local file size matches the expected size
+ *  5. Moves the file from TDLib's cache to the destination path
+ *
+ * Verification:
+ *  - Compares actual file size on disk to the expected size from Telegram
+ *  - Throws on mismatch (partial/corrupt download)
+ *  - Throws on timeout (configurable, scales with file size)
+ *  - Throws if download stops without completing (network error, etc.)
+ */
+export async function downloadFile(
+  client: Client,
+  fileId: string,
+  destPath: string,
+  expectedSize: bigint,
+  fileName: string,
+  onProgress?: ProgressCallback
+): Promise<void> {
+  const numericId = parseInt(fileId, 10);
+  const totalBytes = Number(expectedSize);
+
+  log.info(
+    { fileId, fileName, destPath, totalBytes },
+    "Starting file download"
+  );
+
+  // Report initial progress
+  onProgress?.({
+    fileId,
+    fileName,
+    downloadedBytes: 0,
+    totalBytes,
+    percent: 0,
+    isComplete: false,
+  });
+
+  return new Promise<void>((resolve, reject) => {
+    let lastLoggedPercent = 0;
+    let settled = false;
+
+    // Timeout: 10 minutes per GB, minimum 5 minutes
+    const timeoutMs = Math.max(
+      5 * 60_000,
+      (totalBytes / (1024 * 1024 * 1024)) * 10 * 60_000
+    );
+    const timer = setTimeout(() => {
+      if (!settled) {
+        settled = true;
+        cleanup();
+        reject(
+          new Error(
+            `Download timed out after ${Math.round(timeoutMs / 60_000)}min for ${fileName}`
+          )
+        );
+      }
+    }, timeoutMs);
+
+    // Listen for file update events to track progress
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const handleUpdate = (update: any) => {
+      if (update?._ !== "updateFile") return;
+      const file = update.file as TdFile | undefined;
+      if (!file || file.id !== numericId) return;
+
+      const downloaded = file.local.downloaded_size;
+      const percent =
+        totalBytes > 0 ? Math.round((downloaded / totalBytes) * 100) : 0;
+
+      // Log at every 10% increment
+      if (percent >= lastLoggedPercent + 10) {
+        lastLoggedPercent = percent - (percent % 10);
+        log.info(
+          { fileId, fileName, downloaded, totalBytes, percent: `${percent}%` },
+          "Download progress"
+        );
+      }
+
+      // Report to callback
+      onProgress?.({
+        fileId,
+        fileName,
+        downloadedBytes: downloaded,
+        totalBytes,
+        percent,
+        isComplete: file.local.is_downloading_completed,
+      });
+
+      // Download finished
+      if (file.local.is_downloading_completed) {
+        if (!settled) {
+          settled = true;
+          cleanup();
+          verifyAndMove(file.local.path, destPath, totalBytes, fileName, fileId)
+            .then(resolve)
+            .catch(reject);
+        }
+      }
+
+      // Download stopped without completing (network error, cancelled, etc.)
+      if (
+        !file.local.is_downloading_active &&
+        !file.local.is_downloading_completed
+      ) {
+        if (!settled) {
+          settled = true;
+          cleanup();
+          reject(
+            new Error(
+              `Download stopped unexpectedly for ${fileName} ` +
+                `(${downloaded}/${totalBytes} bytes, ${percent}%)`
+            )
+          );
+        }
+      }
+    };
+
+    const cleanup = () => {
+      clearTimeout(timer);
+      client.off("update", handleUpdate);
+    };
+
+    // Subscribe to updates BEFORE starting download
+    client.on("update", handleUpdate);
+
+    // Start async download (non-blocking — progress via updateFile events)
+    client
+      .invoke({
+        _: "downloadFile",
+        file_id: numericId,
+        priority: 32,
+        offset: 0,
+        limit: 0,
+        synchronous: false,
+      })
+      .then((result: unknown) => {
+        // If the file was already cached locally, invoke returns immediately
+        const file = result as TdFile | undefined;
+        if (file?.local?.is_downloading_completed && !settled) {
+          settled = true;
+          cleanup();
+          verifyAndMove(file.local.path, destPath, totalBytes, fileName, fileId)
+            .then(resolve)
+            .catch(reject);
+        }
+      })
+      .catch((err: unknown) => {
+        if (!settled) {
+          settled = true;
+          cleanup();
+          reject(err);
+        }
+      });
+  });
+}
+
+/**
+ * Verify the downloaded file's size matches the expected size,
+ * then move it to the destination path.
+ */
+async function verifyAndMove(
+  localPath: string,
+  destPath: string,
+  expectedBytes: number,
+  fileName: string,
+  fileId: string
+): Promise<void> {
+  const stats = await stat(localPath);
+  const actualBytes = stats.size;
+
+  if (expectedBytes > 0 && actualBytes !== expectedBytes) {
+    log.error(
+      { fileId, fileName, expectedBytes, actualBytes },
+      "Download size mismatch — file is incomplete or corrupted"
+    );
+    throw new Error(
+      `Download verification failed for ${fileName}: ` +
+        `expected ${expectedBytes} bytes, got ${actualBytes} bytes`
+    );
+  }
+
+  log.info(
+    { fileId, fileName, bytes: actualBytes, destPath },
+    "File verified and complete"
+  );
+
+  // Move from TDLib's cache to our temp directory
+  await rename(localPath, destPath);
+}
+
+function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
--- a/worker/src/tdlib/topics.ts
+++ b/worker/src/tdlib/topics.ts
@@ -0,0 +1,222 @@
+import type { Client } from "tdl";
+import { config } from "../util/config.js";
+import { childLogger } from "../util/logger.js";
+import { isArchiveAttachment } from "../archive/detect.js";
+import type { TelegramMessage } from "../archive/multipart.js";
+import type { TelegramPhoto } from "../preview/match.js";
+import type { ChannelScanResult } from "./download.js";
+
+const log = childLogger("topics");
+
+export interface ForumTopic {
+  topicId: bigint;
+  name: string;
+}
+
+/**
+ * Check if a chat is a forum supergroup (topics enabled).
+ */
+export async function isChatForum(
+  client: Client,
+  chatId: bigint
+): Promise<boolean> {
+  try {
+    const chat = (await client.invoke({
+      _: "getChat",
+      chat_id: Number(chatId),
+    })) as {
+      type?: {
+        _: string;
+        supergroup_id?: number;
+        is_forum?: boolean;
+      };
+    };
+
+    if (chat.type?._ === "chatTypeSupergroup" && chat.type.is_forum) {
+      return true;
+    }
+
+    // Also check via getSupergroup for older TDLib versions
+    if (chat.type?._ === "chatTypeSupergroup" && chat.type.supergroup_id) {
+      const sg = (await client.invoke({
+        _: "getSupergroup",
+        supergroup_id: chat.type.supergroup_id,
+      })) as { is_forum?: boolean };
+      return sg.is_forum === true;
+    }
+
+    return false;
+  } catch (err) {
+    log.warn({ err, chatId: chatId.toString() }, "Failed to check if chat is forum");
+    return false;
+  }
+}
+
+/**
+ * Get all forum topics in a supergroup.
+ */
+export async function getForumTopicList(
+  client: Client,
+  chatId: bigint
+): Promise<ForumTopic[]> {
+  const topics: ForumTopic[] = [];
+  let offsetDate = 0;
+  let offsetMessageId = 0;
+  let offsetMessageThreadId = 0;
+
+  // eslint-disable-next-line no-constant-condition
+  while (true) {
+    const result = (await client.invoke({
+      _: "getForumTopics",
+      chat_id: Number(chatId),
+      query: "",
+      offset_date: offsetDate,
+      offset_message_id: offsetMessageId,
+      offset_message_thread_id: offsetMessageThreadId,
+      limit: 100,
+    })) as {
+      topics?: {
+        info?: {
+          message_thread_id?: number;
+          name?: string;
+          is_general?: boolean;
+        };
+      }[];
+      next_offset_date?: number;
+      next_offset_message_id?: number;
+      next_offset_message_thread_id?: number;
+    };
+
+    if (!result.topics || result.topics.length === 0) break;
+
+    for (const t of result.topics) {
+      if (!t.info?.message_thread_id) continue;
+      // Skip the "General" topic — it's not creator-specific
+      if (t.info.is_general) continue;
+
+      topics.push({
+        topicId: BigInt(t.info.message_thread_id),
+        name: t.info.name ?? "Unnamed",
+      });
+    }
+
+    // Check if there are more pages
+    if (
+      !result.next_offset_date &&
+      !result.next_offset_message_id &&
+      !result.next_offset_message_thread_id
+    ) {
+      break;
+    }
+
+    offsetDate = result.next_offset_date ?? 0;
+    offsetMessageId = result.next_offset_message_id ?? 0;
+    offsetMessageThreadId = result.next_offset_message_thread_id ?? 0;
+
+    await sleep(config.apiDelayMs);
+  }
+
+  log.info(
+    { chatId: chatId.toString(), topicCount: topics.length },
+    "Enumerated forum topics"
+  );
+
+  return topics;
+}
+
+/**
+ * Fetch messages from a specific forum topic (thread).
+ * Uses getMessageThreadHistory to scan within a topic.
+ */
+export async function getTopicMessages(
+  client: Client,
+  chatId: bigint,
+  topicId: bigint,
+  fromMessageId?: bigint | null,
+  limit = 100
+): Promise<ChannelScanResult> {
+  const archives: TelegramMessage[] = [];
+  const photos: TelegramPhoto[] = [];
+  let currentFromId = fromMessageId ? Number(fromMessageId) : 0;
+
+  // eslint-disable-next-line no-constant-condition
+  while (true) {
+    const result = (await client.invoke({
+      _: "getMessageThreadHistory",
+      chat_id: Number(chatId),
+      message_id: Number(topicId),
+      from_message_id: currentFromId,
+      offset: 0,
+      limit: Math.min(limit, 100),
+    })) as {
+      messages?: {
+        id: number;
+        date: number;
+        content: {
+          _: string;
+          document?: {
+            file_name?: string;
+            document?: {
+              id: number;
+              size: number;
+            };
+          };
+          photo?: {
+            sizes?: {
+              type: string;
+              photo: { id: number; size: number; expected_size: number };
+              width: number;
+              height: number;
+            }[];
+          };
+          caption?: { text?: string };
+        };
+      }[];
+    };
+
+    if (!result.messages || result.messages.length === 0) break;
+
+    for (const msg of result.messages) {
+      // Check for archive documents
+      const doc = msg.content?.document;
+      if (doc?.file_name && doc.document && isArchiveAttachment(doc.file_name)) {
+        archives.push({
+          id: BigInt(msg.id),
+          fileName: doc.file_name,
+          fileId: String(doc.document.id),
+          fileSize: BigInt(doc.document.size),
+          date: new Date(msg.date * 1000),
+        });
+        continue;
+      }
+
+      // Check for photo messages (potential previews)
+      const photo = msg.content?.photo;
+      const caption = msg.content?.caption?.text ?? "";
+      if (photo?.sizes && photo.sizes.length > 0) {
+        const smallest = photo.sizes[0];
+        photos.push({
+          id: BigInt(msg.id),
+          date: new Date(msg.date * 1000),
+          caption,
+          fileId: String(smallest.photo.id),
+          fileSize: smallest.photo.size || smallest.photo.expected_size,
+        });
+      }
+    }
+
+    currentFromId = result.messages[result.messages.length - 1].id;
+    if (result.messages.length < 100) break;
+
+    await sleep(config.apiDelayMs);
+  }
+
+  return {
+    archives: archives.reverse(),
+    photos: photos.reverse(),
+  };
+}
+
+function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
--- a/worker/src/upload/channel.ts
+++ b/worker/src/upload/channel.ts
@@ -0,0 +1,76 @@
+import type { Client } from "tdl";
+import { config } from "../util/config.js";
+import { childLogger } from "../util/logger.js";
+
+const log = childLogger("upload");
+
+export interface UploadResult {
+  messageId: bigint;
+}
+
+/**
+ * Upload one or more files to a destination Telegram channel.
+ * For multipart archives, each file is sent as a separate message.
+ * Returns the message ID of the first uploaded message.
+ */
+export async function uploadToChannel(
+  client: Client,
+  chatId: bigint,
+  filePaths: string[],
+  caption?: string
+): Promise<UploadResult> {
+  let firstMessageId: bigint | null = null;
+
+  for (let i = 0; i < filePaths.length; i++) {
+    const filePath = filePaths[i];
+    const fileCaption =
+      i === 0 && caption ? caption : undefined;
+
+    log.debug(
+      { chatId: Number(chatId), filePath, part: i + 1, total: filePaths.length },
+      "Uploading file to channel"
+    );
+
+    const result = (await client.invoke({
+      _: "sendMessage",
+      chat_id: Number(chatId),
+      input_message_content: {
+        _: "inputMessageDocument",
+        document: {
+          _: "inputFileLocal",
+          path: filePath,
+        },
+        caption: fileCaption
+          ? {
+              _: "formattedText",
+              text: fileCaption,
+            }
+          : undefined,
+      },
+    })) as { id: number };
+
+    if (i === 0) {
+      firstMessageId = BigInt(result.id);
+    }
+
+    // Rate limit delay between uploads
+    if (i < filePaths.length - 1) {
+      await sleep(config.apiDelayMs);
+    }
+  }
+
+  if (firstMessageId === null) {
+    throw new Error("Upload failed: no messages sent");
+  }
+
+  log.info(
+    { chatId: Number(chatId), messageId: Number(firstMessageId), files: filePaths.length },
+    "Upload complete"
+  );
+
+  return { messageId: firstMessageId };
+}
+
+function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
--- a/worker/src/util/config.ts
+++ b/worker/src/util/config.ts
@@ -0,0 +1,18 @@
+export const config = {
+  databaseUrl: process.env.DATABASE_URL ?? "",
+  workerIntervalMinutes: parseInt(process.env.WORKER_INTERVAL_MINUTES ?? "60", 10),
+  tempDir: process.env.WORKER_TEMP_DIR ?? "/tmp/zips",
+  tdlibStateDir: process.env.TDLIB_STATE_DIR ?? "/data/tdlib",
+  maxZipSizeMB: parseInt(process.env.WORKER_MAX_ZIP_SIZE_MB ?? "4096", 10),
+  logLevel: (process.env.LOG_LEVEL ?? "info") as "debug" | "info" | "warn" | "error",
+  telegramApiId: parseInt(process.env.TELEGRAM_API_ID ?? "0", 10),
+  telegramApiHash: process.env.TELEGRAM_API_HASH ?? "",
+  /** Maximum jitter added to scheduler interval (in minutes) */
+  jitterMinutes: 5,
+  /** Maximum time between multipart archive parts (in hours) */
+  multipartTimeoutHours: 24,
+  /** Delay between Telegram API calls (in ms) to avoid rate limits */
+  apiDelayMs: 1000,
+  /** Max retries for rate-limited requests */
+  maxRetries: 5,
+} as const;
--- a/worker/src/util/logger.ts
+++ b/worker/src/util/logger.ts
@@ -0,0 +1,14 @@
+import pino from "pino";
+import { config } from "./config.js";
+
+export const logger = pino({
+  level: config.logLevel,
+  transport:
+    config.logLevel === "debug"
+      ? { target: "pino/file", options: { destination: 1 } }
+      : undefined,
+});
+
+export function childLogger(name: string, extra?: Record<string, unknown>) {
+  return logger.child({ module: name, ...extra });
+}
--- a/worker/src/worker.ts
+++ b/worker/src/worker.ts
@@ -0,0 +1,665 @@
+import path from "path";
+import { unlink, readdir } from "fs/promises";
+import { config } from "./util/config.js";
+import { childLogger } from "./util/logger.js";
+import { tryAcquireLock, releaseLock } from "./db/locks.js";
+import {
+  getSourceChannelMappings,
+  getDestinationChannel,
+  packageExistsByHash,
+  createPackageWithFiles,
+  createIngestionRun,
+  completeIngestionRun,
+  failIngestionRun,
+  updateLastProcessedMessage,
+  updateRunActivity,
+  setChannelForum,
+  getTopicProgress,
+  upsertTopicProgress,
+} from "./db/queries.js";
+import type { ActivityUpdate } from "./db/queries.js";
+import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
+import { getChannelMessages, downloadFile, downloadPhotoThumbnail } from "./tdlib/download.js";
+import type { DownloadProgress, ChannelScanResult } from "./tdlib/download.js";
+import { isChatForum, getForumTopicList, getTopicMessages } from "./tdlib/topics.js";
+import { matchPreviewToArchive } from "./preview/match.js";
+import { groupArchiveSets } from "./archive/multipart.js";
+import type { ArchiveSet } from "./archive/multipart.js";
+import { extractCreatorFromFileName } from "./archive/creator.js";
+import { hashParts } from "./archive/hash.js";
+import { readZipCentralDirectory } from "./archive/zip-reader.js";
+import { readRarContents } from "./archive/rar-reader.js";
+import { byteLevelSplit } from "./archive/split.js";
+import { uploadToChannel } from "./upload/channel.js";
+import type { TelegramAccount, TelegramChannel } from "@prisma/client";
+import type { Client } from "tdl";
+
+const log = childLogger("worker");
+
+/**
+ * Throttle DB writes for download progress to avoid hammering the DB.
+ * Only writes if at least 2 seconds have passed since the last write.
+ */
+function createThrottledActivityUpdater(runId: string, minIntervalMs = 2000) {
+  let lastWriteTime = 0;
+  let pendingUpdate: ActivityUpdate | null = null;
+  let flushTimer: ReturnType<typeof setTimeout> | null = null;
+
+  const flush = async () => {
+    if (pendingUpdate) {
+      const update = pendingUpdate;
+      pendingUpdate = null;
+      lastWriteTime = Date.now();
+      await updateRunActivity(runId, update).catch(() => {});
+    }
+  };
+
+  return {
+    update: (activity: ActivityUpdate) => {
+      pendingUpdate = activity;
+      const elapsed = Date.now() - lastWriteTime;
+      if (elapsed >= minIntervalMs) {
+        if (flushTimer) clearTimeout(flushTimer);
+        flush();
+      } else if (!flushTimer) {
+        flushTimer = setTimeout(() => {
+          flushTimer = null;
+          flush();
+        }, minIntervalMs - elapsed);
+      }
+    },
+    flush,
+  };
+}
+
+/** Shared context passed to the archive processing pipeline. */
+interface PipelineContext {
+  client: Client;
+  runId: string;
+  channelTitle: string;
+  channel: TelegramChannel;
+  destChannelTelegramId: bigint;
+  destChannelId: string;
+  throttled: ReturnType<typeof createThrottledActivityUpdater>;
+  counters: {
+    messagesScanned: number;
+    zipsFound: number;
+    zipsDuplicate: number;
+    zipsIngested: number;
+  };
+  /** Creator from forum topic name (null for non-forum). */
+  topicCreator: string | null;
+  /** Forum topic ID (null for non-forum). */
+  sourceTopicId: bigint | null;
+  accountLog: ReturnType<typeof childLogger>;
+}
+
+/**
+ * Run a full ingestion cycle for a single Telegram account.
+ * Every step writes live activity to the DB so the admin UI can display it.
+ */
+export async function runWorkerForAccount(
+  account: TelegramAccount
+): Promise<void> {
+  const accountLog = childLogger("worker", { accountId: account.id, phone: account.phone });
+
+  // 1. Acquire advisory lock
+  const acquired = await tryAcquireLock(account.id);
+  if (!acquired) {
+    accountLog.info("Account already locked, skipping");
+    return;
+  }
+
+  let runId: string | undefined;
+
+  try {
+    // 2. Create ingestion run
+    const run = await createIngestionRun(account.id);
+    runId = run.id;
+    const activeRunId = runId;
+    accountLog.info({ runId }, "Ingestion run started");
+
+    const throttled = createThrottledActivityUpdater(activeRunId);
+
+    // 3. Initialize TDLib client
+    await updateRunActivity(activeRunId, {
+      currentActivity: "Connecting to Telegram",
+      currentStep: "connecting",
+    });
+
+    const client = await createTdlibClient({
+      id: account.id,
+      phone: account.phone,
+    });
+
+    const counters = {
+      messagesScanned: 0,
+      zipsFound: 0,
+      zipsDuplicate: 0,
+      zipsIngested: 0,
+    };
+
+    try {
+      // 4. Get assigned source channels and destination
+      const channelMappings = await getSourceChannelMappings(account.id);
+      const destChannel = await getDestinationChannel(account.id);
+
+      if (!destChannel) {
+        throw new Error("No active destination channel configured");
+      }
+
+      for (const mapping of channelMappings) {
+        const channel = mapping.channel;
+
+        // ── Check if channel is a forum ──
+        const forum = await isChatForum(client, channel.telegramId);
+        if (forum !== channel.isForum) {
+          await setChannelForum(channel.id, forum);
+          accountLog.info(
+            { channelId: channel.id, title: channel.title, isForum: forum },
+            "Updated channel forum status"
+          );
+        }
+
+        const pipelineCtx: PipelineContext = {
+          client,
+          runId: activeRunId,
+          channelTitle: channel.title,
+          channel,
+          destChannelTelegramId: destChannel.telegramId,
+          destChannelId: destChannel.id,
+          throttled,
+          counters,
+          topicCreator: null,
+          sourceTopicId: null,
+          accountLog,
+        };
+
+        if (forum) {
+          // ── Forum channel: scan per-topic ──
+          await updateRunActivity(activeRunId, {
+            currentActivity: `Enumerating topics in "${channel.title}"`,
+            currentStep: "scanning",
+            currentChannel: channel.title,
+            currentFile: null,
+            currentFileNum: null,
+            totalFiles: null,
+            downloadedBytes: null,
+            totalBytes: null,
+            downloadPercent: null,
+          });
+
+          const topics = await getForumTopicList(client, channel.telegramId);
+          const topicProgressList = await getTopicProgress(mapping.id);
+
+          accountLog.info(
+            { channelId: channel.id, title: channel.title, topicCount: topics.length },
+            "Scanning forum channel by topic"
+          );
+
+          for (const topic of topics) {
+            const progress = topicProgressList.find(
+              (tp) => tp.topicId === topic.topicId
+            );
+
+            await updateRunActivity(activeRunId, {
+              currentActivity: `Scanning topic "${topic.name}" in "${channel.title}"`,
+              currentStep: "scanning",
+              currentChannel: `${channel.title} › ${topic.name}`,
+              currentFile: null,
+              currentFileNum: null,
+              totalFiles: null,
+              downloadedBytes: null,
+              totalBytes: null,
+              downloadPercent: null,
+            });
+
+            const scanResult = await getTopicMessages(
+              client,
+              channel.telegramId,
+              topic.topicId,
+              progress?.lastProcessedMessageId
+            );
+
+            if (scanResult.archives.length === 0) {
+              accountLog.debug(
+                { channelId: channel.id, topic: topic.name },
+                "No new archives in topic"
+              );
+              continue;
+            }
+
+            accountLog.info(
+              { topic: topic.name, archives: scanResult.archives.length, photos: scanResult.photos.length },
+              "Found messages in topic"
+            );
+
+            // Process archives with topic creator
+            pipelineCtx.topicCreator = topic.name;
+            pipelineCtx.sourceTopicId = topic.topicId;
+            pipelineCtx.channelTitle = `${channel.title} › ${topic.name}`;
+
+            await processArchiveSets(pipelineCtx, scanResult, run.id);
+
+            // Update topic progress
+            const allMsgIds = [
+              ...scanResult.archives.map((m) => m.id),
+              ...scanResult.photos.map((p) => p.id),
+            ];
+            if (allMsgIds.length > 0) {
+              const maxId = allMsgIds.reduce((a, b) => (a > b ? a : b));
+              await upsertTopicProgress(
+                mapping.id,
+                topic.topicId,
+                topic.name,
+                maxId
+              );
+            }
+          }
+        } else {
+          // ── Non-forum channel: flat scan (existing behavior) ──
+          await updateRunActivity(activeRunId, {
+            currentActivity: `Scanning "${channel.title}" for new archives`,
+            currentStep: "scanning",
+            currentChannel: channel.title,
+            currentFile: null,
+            currentFileNum: null,
+            totalFiles: null,
+            downloadedBytes: null,
+            totalBytes: null,
+            downloadPercent: null,
+          });
+
+          accountLog.info(
+            { channelId: channel.id, title: channel.title },
+            "Processing source channel"
+          );
+
+          const scanResult = await getChannelMessages(
+            client,
+            channel.telegramId,
+            mapping.lastProcessedMessageId
+          );
+
+          if (scanResult.archives.length === 0) {
+            accountLog.debug({ channelId: channel.id }, "No new archives");
+            continue;
+          }
+
+          accountLog.info(
+            { archives: scanResult.archives.length, photos: scanResult.photos.length },
+            "Found messages in channel"
+          );
+
+          // For non-forum, creator comes from filename (set to null, resolved per-archive)
+          pipelineCtx.topicCreator = null;
+          pipelineCtx.sourceTopicId = null;
+          pipelineCtx.channelTitle = channel.title;
+
+          await processArchiveSets(pipelineCtx, scanResult, run.id);
+
+          // Update last processed message
+          const allMsgIds = [
+            ...scanResult.archives.map((m) => m.id),
+            ...scanResult.photos.map((p) => p.id),
+          ];
+          if (allMsgIds.length > 0) {
+            const maxId = allMsgIds.reduce((a, b) => (a > b ? a : b));
+            await updateLastProcessedMessage(mapping.id, maxId);
+          }
+        }
+      }
+
+      // ── Done ──
+      await completeIngestionRun(activeRunId, counters);
+      accountLog.info({ counters }, "Ingestion run completed");
+    } finally {
+      await closeTdlibClient(client);
+    }
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    accountLog.error({ err }, "Ingestion run failed");
+    if (runId) {
+      await failIngestionRun(runId, message).catch((e) =>
+        accountLog.error({ e }, "Failed to mark run as failed")
+      );
+    }
+  } finally {
+    await releaseLock(account.id);
+  }
+}
+
+/**
+ * Process a scan result through the archive pipeline:
+ * group → download → hash → dedup → metadata → split → upload → preview → index.
+ */
+async function processArchiveSets(
+  ctx: PipelineContext,
+  scanResult: ChannelScanResult,
+  ingestionRunId: string
+): Promise<void> {
+  const { client, runId, channelTitle, channel, throttled, counters, accountLog } = ctx;
+
+  // Group into archive sets
+  const archiveSets = groupArchiveSets(scanResult.archives);
+  counters.zipsFound += archiveSets.length;
+
+  // Match preview photos to archive sets
+  const previewMatches = matchPreviewToArchive(
+    scanResult.photos,
+    archiveSets.map((s) => ({
+      baseName: s.baseName,
+      firstMessageId: s.parts[0].id,
+      firstMessageDate: s.parts[0].date,
+    }))
+  );
+
+  if (previewMatches.size > 0) {
+    accountLog.info(
+      { matched: previewMatches.size, total: archiveSets.length },
+      "Matched preview photos to archives"
+    );
+  }
+
+  await updateRunActivity(runId, {
+    currentActivity: `Found ${archiveSets.length} archive(s) in "${channelTitle}"`,
+    currentStep: "scanning",
+    currentChannel: channelTitle,
+    totalFiles: archiveSets.length,
+    zipsFound: counters.zipsFound,
+  });
+
+  for (let setIdx = 0; setIdx < archiveSets.length; setIdx++) {
+    await processOneArchiveSet(
+      ctx,
+      archiveSets[setIdx],
+      setIdx,
+      archiveSets.length,
+      previewMatches,
+      ingestionRunId
+    );
+  }
+}
+
+/**
+ * Process a single archive set through the full pipeline.
+ */
+async function processOneArchiveSet(
+  ctx: PipelineContext,
+  archiveSet: ArchiveSet,
+  setIdx: number,
+  totalSets: number,
+  previewMatches: Map<string, { id: bigint; fileId: string }>,
+  ingestionRunId: string
+): Promise<void> {
+  const {
+    client, runId, channelTitle, channel,
+    destChannelTelegramId, destChannelId,
+    throttled, counters, topicCreator, sourceTopicId, accountLog,
+  } = ctx;
+
+  counters.messagesScanned += archiveSet.parts.length;
+  const archiveName = archiveSet.parts[0].fileName;
+  const tempPaths: string[] = [];
+  let splitPaths: string[] = [];
+
+  try {
+    // ── Downloading ──
+    for (let partIdx = 0; partIdx < archiveSet.parts.length; partIdx++) {
+      const part = archiveSet.parts[partIdx];
+      const tempPath = path.join(
+        config.tempDir,
+        `${ingestionRunId}_${part.id}_${part.fileName}`
+      );
+
+      const partLabel = archiveSet.parts.length > 1
+        ? ` (part ${partIdx + 1}/${archiveSet.parts.length})`
+        : "";
+
+      await updateRunActivity(runId, {
+        currentActivity: `Downloading ${part.fileName}${partLabel}`,
+        currentStep: "downloading",
+        currentChannel: channelTitle,
+        currentFile: part.fileName,
+        currentFileNum: setIdx + 1,
+        totalFiles: totalSets,
+        downloadedBytes: 0n,
+        totalBytes: part.fileSize,
+        downloadPercent: 0,
+        messagesScanned: counters.messagesScanned,
+      });
+
+      accountLog.info(
+        {
+          fileName: part.fileName,
+          fileSize: Number(part.fileSize),
+          part: partIdx + 1,
+          totalParts: archiveSet.parts.length,
+        },
+        "Downloading archive part"
+      );
+
+      await downloadFile(
+        client,
+        part.fileId,
+        tempPath,
+        part.fileSize,
+        part.fileName,
+        (progress: DownloadProgress) => {
+          throttled.update({
+            currentActivity: `Downloading ${part.fileName}${partLabel} — ${progress.percent}%`,
+            currentStep: "downloading",
+            currentChannel: channelTitle,
+            currentFile: part.fileName,
+            currentFileNum: setIdx + 1,
+            totalFiles: totalSets,
+            downloadedBytes: BigInt(progress.downloadedBytes),
+            totalBytes: BigInt(progress.totalBytes),
+            downloadPercent: progress.percent,
+          });
+        }
+      );
+      await throttled.flush();
+      tempPaths.push(tempPath);
+    }
+
+    // ── Hashing ──
+    await updateRunActivity(runId, {
+      currentActivity: `Computing hash for ${archiveName}`,
+      currentStep: "hashing",
+      currentChannel: channelTitle,
+      currentFile: archiveName,
+      currentFileNum: setIdx + 1,
+      totalFiles: totalSets,
+      downloadedBytes: null,
+      totalBytes: null,
+      downloadPercent: null,
+    });
+
+    const contentHash = await hashParts(tempPaths);
+
+    // ── Deduplicating ──
+    await updateRunActivity(runId, {
+      currentActivity: `Checking if ${archiveName} is a duplicate`,
+      currentStep: "deduplicating",
+      currentChannel: channelTitle,
+      currentFile: archiveName,
+      currentFileNum: setIdx + 1,
+      totalFiles: totalSets,
+    });
+
+    const exists = await packageExistsByHash(contentHash);
+    if (exists) {
+      counters.zipsDuplicate++;
+      accountLog.debug({ contentHash }, "Duplicate archive, skipping");
+
+      await updateRunActivity(runId, {
+        currentActivity: `Skipped ${archiveName} (duplicate)`,
+        currentStep: "deduplicating",
+        currentChannel: channelTitle,
+        currentFile: archiveName,
+        currentFileNum: setIdx + 1,
+        totalFiles: totalSets,
+        zipsDuplicate: counters.zipsDuplicate,
+      });
+      return;
+    }
+
+    // ── Reading metadata ──
+    await updateRunActivity(runId, {
+      currentActivity: `Reading file list from ${archiveName}`,
+      currentStep: "reading_metadata",
+      currentChannel: channelTitle,
+      currentFile: archiveName,
+      currentFileNum: setIdx + 1,
+      totalFiles: totalSets,
+    });
+
+    let entries: { path: string; fileName: string; extension: string | null; compressedSize: bigint; uncompressedSize: bigint; crc32: string | null }[] = [];
+    try {
+      if (archiveSet.type === "ZIP") {
+        entries = await readZipCentralDirectory(tempPaths);
+      } else {
+        entries = await readRarContents(tempPaths[0]);
+      }
+    } catch (err) {
+      accountLog.warn({ err, baseName: archiveSet.baseName }, "Failed to read archive metadata, ingesting without file list");
+    }
+
+    // ── Splitting (if needed) ──
+    let uploadPaths = tempPaths;
+    const totalSize = archiveSet.parts.reduce(
+      (sum, p) => sum + p.fileSize,
+      0n
+    );
+
+    if (!archiveSet.isMultipart && totalSize > 2n * 1024n * 1024n * 1024n) {
+      await updateRunActivity(runId, {
+        currentActivity: `Splitting ${archiveName} for upload (>2GB)`,
+        currentStep: "splitting",
+        currentChannel: channelTitle,
+        currentFile: archiveName,
+        currentFileNum: setIdx + 1,
+        totalFiles: totalSets,
+      });
+      splitPaths = await byteLevelSplit(tempPaths[0]);
+      uploadPaths = splitPaths;
+    }
+
+    // ── Uploading ──
+    const uploadLabel = uploadPaths.length > 1
+      ? ` (${uploadPaths.length} parts)`
+      : "";
+    await updateRunActivity(runId, {
+      currentActivity: `Uploading ${archiveName} to archive channel${uploadLabel}`,
+      currentStep: "uploading",
+      currentChannel: channelTitle,
+      currentFile: archiveName,
+      currentFileNum: setIdx + 1,
+      totalFiles: totalSets,
+    });
+
+    const destResult = await uploadToChannel(
+      client,
+      destChannelTelegramId,
+      uploadPaths
+    );
+
+    // ── Preview thumbnail ──
+    let previewData: Buffer | null = null;
+    let previewMsgId: bigint | null = null;
+    const matchedPhoto = previewMatches.get(archiveSet.baseName);
+    if (matchedPhoto) {
+      await updateRunActivity(runId, {
+        currentActivity: `Downloading preview image for ${archiveName}`,
+        currentStep: "preview",
+        currentChannel: channelTitle,
+        currentFile: archiveName,
+        currentFileNum: setIdx + 1,
+        totalFiles: totalSets,
+      });
+      previewData = await downloadPhotoThumbnail(client, matchedPhoto.fileId);
+      previewMsgId = matchedPhoto.id;
+    }
+
+    // ── Resolve creator: topic name > filename extraction > null ──
+    const creator = topicCreator ?? extractCreatorFromFileName(archiveName) ?? null;
+
+    // ── Indexing ──
+    await updateRunActivity(runId, {
+      currentActivity: `Saving metadata for ${archiveName} (${entries.length} files)`,
+      currentStep: "indexing",
+      currentChannel: channelTitle,
+      currentFile: archiveName,
+      currentFileNum: setIdx + 1,
+      totalFiles: totalSets,
+    });
+
+    await createPackageWithFiles({
+      contentHash,
+      fileName: archiveName,
+      fileSize: totalSize,
+      archiveType: archiveSet.type,
+      sourceChannelId: channel.id,
+      sourceMessageId: archiveSet.parts[0].id,
+      sourceTopicId,
+      destChannelId,
+      destMessageId: destResult.messageId,
+      isMultipart:
+        archiveSet.parts.length > 1 || uploadPaths.length > 1,
+      partCount: uploadPaths.length,
+      ingestionRunId,
+      creator,
+      previewData,
+      previewMsgId,
+      files: entries,
+    });
+
+    counters.zipsIngested++;
+
+    await updateRunActivity(runId, {
+      currentActivity: `Ingested ${archiveName} (${entries.length} files indexed)`,
+      currentStep: "complete",
+      currentChannel: channelTitle,
+      currentFile: archiveName,
+      currentFileNum: setIdx + 1,
+      totalFiles: totalSets,
+      zipsIngested: counters.zipsIngested,
+    });
+
+    accountLog.info(
+      { fileName: archiveName, contentHash, fileCount: entries.length, creator },
+      "Archive ingested"
+    );
+  } finally {
+    // ALWAYS delete temp files
+    await deleteFiles([...tempPaths, ...splitPaths]);
+  }
+}
+
+async function deleteFiles(paths: string[]): Promise<void> {
+  for (const p of paths) {
+    try {
+      await unlink(p);
+    } catch {
+      // File may already be deleted or never created
+    }
+  }
+}
+
+/**
+ * Clean up any leftover temp files from previous runs.
+ */
+export async function cleanupTempDir(): Promise<void> {
+  try {
+    const files = await readdir(config.tempDir);
+    for (const file of files) {
+      await unlink(path.join(config.tempDir, file)).catch(() => {});
+    }
+    if (files.length > 0) {
+      log.info({ count: files.length }, "Cleaned up stale temp files");
+    }
+  } catch {
+    // Directory might not exist yet
+  }
+}