feat: add preview management, channel controls, invite polish, and recovery

- Auto-extract preview images from ZIP/RAR/7z archives during ingestion - Upload custom preview images via package drawer - Select preview from archive contents with on-demand extraction UI - Manually add Telegram channels by t.me link, username, or invite link - Invite code UX: bulk create, copy link, usage tracking, delete confirm - Incomplete upload recovery: verify dest messages on worker startup - Rebuild package DB by scanning destination channel with live progress Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-11 06:11:15 +00:00 · 2026-03-22 00:09:59 +01:00
parent bf093cdfca
commit ab558e00f5
26 changed files with 3028 additions and 98 deletions
--- a/worker/src/archive/extract-image.ts
+++ b/worker/src/archive/extract-image.ts
@@ -0,0 +1,33 @@
+import path from "path";
+
+const IMAGE_EXTENSIONS = new Set(["jpg", "jpeg", "png", "webp", "gif", "bmp"]);
+
+/**
+ * Check if a file path within an archive is an image.
+ */
+export function isImageFile(filePath: string): boolean {
+  const ext = path.extname(filePath).toLowerCase().slice(1);
+  return IMAGE_EXTENSIONS.has(ext);
+}
+
+/**
+ * Get the MIME type for an image file extension.
+ */
+export function getImageMimeType(filePath: string): string {
+  const ext = path.extname(filePath).toLowerCase().slice(1);
+  switch (ext) {
+    case "jpg":
+    case "jpeg":
+      return "image/jpeg";
+    case "png":
+      return "image/png";
+    case "webp":
+      return "image/webp";
+    case "gif":
+      return "image/gif";
+    case "bmp":
+      return "image/bmp";
+    default:
+      return "application/octet-stream";
+  }
+}
--- a/worker/src/db/queries.ts
+++ b/worker/src/db/queries.ts
@@ -438,3 +438,35 @@ export async function getExistingChannelsByTelegramId(): Promise<Map<string, str
 export async function getAccountById(accountId: string) {
  return db.telegramAccount.findUnique({ where: { id: accountId } });
 }
+
+/**
+ * Find packages that have a destMessageId set (appear uploaded) but may
+ * reference messages that no longer exist in Telegram. These need
+ * verification on startup.
+ *
+ * Groups by destChannelId so the caller can batch-verify per channel.
+ */
+export async function getPackagesWithDestMessage() {
+  return db.package.findMany({
+    where: { destMessageId: { not: null }, destChannelId: { not: null } },
+    select: {
+      id: true,
+      fileName: true,
+      contentHash: true,
+      destChannelId: true,
+      destMessageId: true,
+      sourceChannel: { select: { telegramId: true } },
+    },
+  });
+}
+
+/**
+ * Reset a package's destination fields so it will be re-processed
+ * on the next ingestion run (treated as not-yet-uploaded).
+ */
+export async function resetPackageDestination(packageId: string) {
+  return db.package.update({
+    where: { id: packageId },
+    data: { destChannelId: null, destMessageId: null },
+  });
+}
--- a/worker/src/extract-listener.ts
+++ b/worker/src/extract-listener.ts
@@ -0,0 +1,217 @@
+import path from "path";
+import { mkdir, rm } from "fs/promises";
+import { db } from "./db/client.js";
+import { config } from "./util/config.js";
+import { childLogger } from "./util/logger.js";
+import { withTdlibMutex } from "./util/mutex.js";
+import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
+import { downloadFile } from "./tdlib/download.js";
+import { getActiveAccounts } from "./db/queries.js";
+import { extractPreviewImage } from "./preview/extract.js";
+import { getImageMimeType } from "./archive/extract-image.js";
+
+const log = childLogger("extract-listener");
+
+/**
+ * Process a single archive extract request.
+ * Downloads the archive from Telegram (dest channel), extracts the
+ * requested image file, and writes the result to the DB.
+ */
+export async function processExtractRequest(requestId: string): Promise<void> {
+  const request = await db.archiveExtractRequest.findUnique({
+    where: { id: requestId },
+    include: {
+      package: {
+        select: {
+          id: true,
+          fileName: true,
+          fileSize: true,
+          archiveType: true,
+          destChannelId: true,
+          destMessageId: true,
+          isMultipart: true,
+          partCount: true,
+        },
+      },
+    },
+  });
+
+  if (!request || request.status !== "PENDING") {
+    log.debug({ requestId }, "Extract request not found or not pending");
+    return;
+  }
+
+  const pkg = request.package;
+  if (!pkg.destChannelId || !pkg.destMessageId) {
+    await db.archiveExtractRequest.update({
+      where: { id: requestId },
+      data: { status: "FAILED", error: "Package has no destination upload" },
+    });
+    return;
+  }
+
+  // Multipart archives require downloading and reassembling all parts,
+  // which is too complex for on-demand extraction. Reject early.
+  if (pkg.isMultipart && pkg.partCount > 1) {
+    await db.archiveExtractRequest.update({
+      where: { id: requestId },
+      data: { status: "FAILED", error: "Image extraction is not supported for multipart archives" },
+    });
+    return;
+  }
+
+  // Check for a cached result first: if another request for the same
+  // package+filePath already completed, reuse its data.
+  const cached = await db.archiveExtractRequest.findFirst({
+    where: {
+      packageId: pkg.id,
+      filePath: request.filePath,
+      status: "COMPLETED",
+      imageData: { not: null },
+      id: { not: requestId },
+    },
+    select: { imageData: true, contentType: true },
+  });
+
+  if (cached?.imageData) {
+    log.info({ requestId, filePath: request.filePath }, "Reusing cached extraction result");
+    await db.archiveExtractRequest.update({
+      where: { id: requestId },
+      data: {
+        status: "COMPLETED",
+        imageData: cached.imageData,
+        contentType: cached.contentType,
+      },
+    });
+    return;
+  }
+
+  await db.archiveExtractRequest.update({
+    where: { id: requestId },
+    data: { status: "IN_PROGRESS" },
+  });
+
+  log.info(
+    { requestId, packageId: pkg.id, filePath: request.filePath, archiveType: pkg.archiveType },
+    "Processing extract request"
+  );
+
+  const tempDir = path.join(config.tempDir, `extract_${requestId}`);
+
+  try {
+    await mkdir(tempDir, { recursive: true });
+
+    // Wrap the entire TDLib session in the mutex so no other TDLib
+    // operation can run concurrently (TDLib is single-session).
+    await withTdlibMutex("extract", async () => {
+      const accounts = await getActiveAccounts();
+      if (accounts.length === 0) {
+        throw new Error("No authenticated Telegram accounts available");
+      }
+
+      const account = accounts[0];
+      const client = await createTdlibClient({ id: account.id, phone: account.phone });
+
+      try {
+        // Load chat list so TDLib can find the dest channel
+        try {
+          await client.invoke({
+            _: "getChats",
+            chat_list: { _: "chatListMain" },
+            limit: 1000,
+          });
+        } catch {
+          // May already be loaded
+        }
+
+        // Get the dest channel telegram ID
+        const destChannel = await db.telegramChannel.findUnique({
+          where: { id: pkg.destChannelId! },
+          select: { telegramId: true },
+        });
+
+        if (!destChannel) {
+          throw new Error("Destination channel not found in DB");
+        }
+
+        const chatId = Number(destChannel.telegramId);
+        const messageId = Number(pkg.destMessageId);
+
+        // Get the file_id from the destination message
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const message = await client.invoke({
+          _: "getMessage",
+          chat_id: chatId,
+          message_id: messageId,
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        }) as any;
+
+        const doc = message?.content?.document;
+        if (!doc?.document?.id) {
+          throw new Error("Could not find document in destination message");
+        }
+
+        const fileId = String(doc.document.id);
+        const fileName = doc.file_name || pkg.fileName;
+        const archivePath = path.join(tempDir, fileName);
+
+        log.info(
+          { requestId, fileName, fileId, chatId, messageId },
+          "Downloading archive for extraction"
+        );
+
+        await downloadFile(
+          client,
+          fileId,
+          archivePath,
+          pkg.fileSize,
+          fileName
+        );
+
+        // Extract the requested image using the existing CLI-based extractor.
+        // This pipes the file to stdout (no temp files needed for the extracted image).
+        const imageData = await extractPreviewImage(
+          archivePath,
+          pkg.archiveType as "ZIP" | "RAR" | "SEVEN_Z" | "DOCUMENT",
+          request.filePath
+        );
+
+        if (!imageData) {
+          throw new Error(`Could not extract "${request.filePath}" from archive`);
+        }
+
+        // Cap at 5MB for safety
+        if (imageData.length > 5 * 1024 * 1024) {
+          throw new Error(`Extracted image is too large (${(imageData.length / 1024 / 1024).toFixed(1)}MB)`);
+        }
+
+        const contentType = getImageMimeType(request.filePath);
+
+        await db.archiveExtractRequest.update({
+          where: { id: requestId },
+          data: {
+            status: "COMPLETED",
+            imageData: new Uint8Array(imageData),
+            contentType,
+          },
+        });
+
+        log.info(
+          { requestId, filePath: request.filePath, bytes: imageData.length },
+          "Image extracted successfully"
+        );
+      } finally {
+        await closeTdlibClient(client).catch(() => {});
+      }
+    });
+  } catch (err) {
+    const errMsg = err instanceof Error ? err.message : String(err);
+    log.error({ err, requestId }, "Extract request failed");
+    await db.archiveExtractRequest.update({
+      where: { id: requestId },
+      data: { status: "FAILED", error: errMsg },
+    }).catch(() => {});
+  } finally {
+    await rm(tempDir, { recursive: true, force: true }).catch(() => {});
+  }
+}
--- a/worker/src/fetch-listener.ts
+++ b/worker/src/fetch-listener.ts
@@ -3,7 +3,9 @@ import { pool } from "./db/client.js";
 import { childLogger } from "./util/logger.js";
 import { withTdlibMutex } from "./util/mutex.js";
 import { processFetchRequest } from "./worker.js";
-import { generateInviteLink, createSupergroup } from "./tdlib/chats.js";
+import { processExtractRequest } from "./extract-listener.js";
+import { rebuildPackageDatabase } from "./rebuild.js";
+import { generateInviteLink, createSupergroup, searchPublicChat } from "./tdlib/chats.js";
 import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
 import { triggerImmediateCycle } from "./scheduler.js";
 import {
@@ -13,6 +15,7 @@ import {
  getActiveAccounts,
  upsertChannel,
  ensureAccountChannelLink,
+  updateFetchRequestStatus,
 } from "./db/queries.js";

 const log = childLogger("fetch-listener");
@@ -31,6 +34,8 @@ const RECONNECT_DELAY_MS = 5_000;
 *   - `generate_invite` — payload = channelId → generate invite link for destination
 *   - `create_destination` — payload = JSON { requestId, title } → create supergroup via TDLib
 *   - `ingestion_trigger` — trigger an immediate ingestion cycle
+ *   - `join_channel` — payload = JSON { requestId, input, accountId } → join/lookup channel by link/username
+ *   - `rebuild_packages` — payload = requestId → rebuild package DB from destination channel
 *
 * If the underlying connection is lost, the listener automatically reconnects
 * so that pg_notify signals are never silently dropped.
@@ -47,6 +52,9 @@ async function connectListener(): Promise<void> {
    await pgClient.query("LISTEN generate_invite");
    await pgClient.query("LISTEN create_destination");
    await pgClient.query("LISTEN ingestion_trigger");
+    await pgClient.query("LISTEN join_channel");
+    await pgClient.query("LISTEN archive_extract");
+    await pgClient.query("LISTEN rebuild_packages");

    pgClient.on("notification", (msg) => {
      if (msg.channel === "channel_fetch" && msg.payload) {
@@ -57,6 +65,12 @@ async function connectListener(): Promise<void> {
        handleCreateDestination(msg.payload);
      } else if (msg.channel === "ingestion_trigger") {
        handleIngestionTrigger();
+      } else if (msg.channel === "join_channel" && msg.payload) {
+        handleJoinChannel(msg.payload);
+      } else if (msg.channel === "archive_extract" && msg.payload) {
+        handleArchiveExtract(msg.payload);
+      } else if (msg.channel === "rebuild_packages" && msg.payload) {
+        handleRebuildPackages(msg.payload);
      }
    });

@@ -82,7 +96,7 @@ async function connectListener(): Promise<void> {
      }
    });

-    log.info("Fetch listener started (channel_fetch, generate_invite, create_destination, ingestion_trigger)");
+    log.info("Fetch listener started (channel_fetch, generate_invite, create_destination, ingestion_trigger, join_channel, archive_extract, rebuild_packages)");
  } catch (err) {
    log.error({ err }, "Failed to start fetch listener — retrying");
    scheduleReconnect();
@@ -260,6 +274,217 @@ function handleCreateDestination(payload: string): void {
  });
 }

+// ── Join channel handler ──
+
+/**
+ * Parse a Telegram link/username into its type and identifier.
+ *
+ * Supported formats:
+ *   - @username or username → public chat search
+ *   - https://t.me/username → public chat search
+ *   - https://t.me/+INVITE_HASH → join by invite link
+ *   - https://t.me/joinchat/INVITE_HASH → join by invite link (legacy)
+ */
+function parseTelegramInput(input: string): { type: "username"; username: string } | { type: "invite"; link: string } | null {
+  const trimmed = input.trim();
+
+  // Invite link patterns
+  const invitePatterns = [
+    /^https?:\/\/t\.me\/\+([a-zA-Z0-9_-]+)$/,
+    /^https?:\/\/t\.me\/joinchat\/([a-zA-Z0-9_-]+)$/,
+    /^https?:\/\/telegram\.me\/\+([a-zA-Z0-9_-]+)$/,
+    /^https?:\/\/telegram\.me\/joinchat\/([a-zA-Z0-9_-]+)$/,
+  ];
+
+  for (const pattern of invitePatterns) {
+    if (pattern.test(trimmed)) {
+      return { type: "invite", link: trimmed };
+    }
+  }
+
+  // Public link: https://t.me/username
+  const publicLinkMatch = trimmed.match(/^https?:\/\/(?:t\.me|telegram\.me)\/([a-zA-Z][a-zA-Z0-9_]{3,31})$/);
+  if (publicLinkMatch) {
+    return { type: "username", username: publicLinkMatch[1] };
+  }
+
+  // @username or bare username
+  const usernameMatch = trimmed.match(/^@?([a-zA-Z][a-zA-Z0-9_]{3,31})$/);
+  if (usernameMatch) {
+    return { type: "username", username: usernameMatch[1] };
+  }
+
+  return null;
+}
+
+function handleJoinChannel(payload: string): void {
+  fetchQueue = fetchQueue.then(async () => {
+    let requestId: string | undefined;
+    try {
+      const parsed = JSON.parse(payload) as { requestId: string; input: string; accountId: string };
+      requestId = parsed.requestId;
+
+      await withTdlibMutex("join-channel", async () => {
+        await updateFetchRequestStatus(requestId!, "IN_PROGRESS");
+
+        const accounts = await getActiveAccounts();
+        const account = accounts.find((a) => a.id === parsed.accountId) ?? accounts[0];
+        if (!account) {
+          throw new Error("No authenticated accounts available");
+        }
+
+        const client = await createTdlibClient({ id: account.id, phone: account.phone });
+
+        try {
+          const linkInfo = parseTelegramInput(parsed.input);
+          if (!linkInfo) {
+            throw new Error(
+              "Invalid input. Use a t.me link (e.g. https://t.me/channel_name), " +
+              "an invite link (e.g. https://t.me/+abc123), or a @username."
+            );
+          }
+
+          let chatInfo: { chatId: bigint; title: string; type: string; isForum: boolean };
+
+          if (linkInfo.type === "username") {
+            // Public chat: search by username
+            const result = await searchPublicChat(client, linkInfo.username);
+            if (!result) {
+              throw new Error(`Public channel "@${linkInfo.username}" not found. Check the username and try again.`);
+            }
+            if (result.type !== "channel" && result.type !== "supergroup") {
+              throw new Error(`"@${linkInfo.username}" is a ${result.type}, not a channel or group. Only channels and supergroups are supported.`);
+            }
+            chatInfo = { chatId: result.chatId, title: result.title, type: result.type, isForum: result.isForum };
+          } else {
+            // Private/invite link: join first, then get chat info
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            let joinResult: any;
+            try {
+              joinResult = await client.invoke({
+                _: "joinChatByInviteLink",
+                invite_link: linkInfo.link,
+              });
+            } catch (joinErr: unknown) {
+              const msg = joinErr instanceof Error ? joinErr.message : String(joinErr);
+              // "INVITE_REQUEST_SENT" means the chat requires admin approval
+              if (msg.includes("INVITE_REQUEST_SENT")) {
+                throw new Error("Join request sent. An admin of that channel must approve it before it can be added.");
+              }
+              // Already a member is fine
+              if (!msg.includes("USER_ALREADY_PARTICIPANT") && !msg.includes("INVITE_HASH_EXPIRED")) {
+                throw new Error(`Failed to join via invite link: ${msg}`);
+              }
+              // If already a participant, we need to get chat info from the link
+              // Try checkChatInviteLink to get the chat id
+              try {
+                // eslint-disable-next-line @typescript-eslint/no-explicit-any
+                const checkResult = (await client.invoke({
+                  _: "checkChatInviteLink",
+                  invite_link: linkInfo.link,
+                })) as any;
+                if (checkResult.chat_id) {
+                  joinResult = { id: checkResult.chat_id };
+                } else {
+                  throw joinErr;
+                }
+              } catch {
+                throw joinErr;
+              }
+            }
+
+            // Get full chat info
+            const chatId = joinResult?.id ?? joinResult?.chat_id;
+            if (!chatId) {
+              throw new Error("Joined channel but could not determine chat ID.");
+            }
+
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            const chat = (await client.invoke({ _: "getChat", chat_id: chatId })) as any;
+            let type: string = "other";
+            let isForum = false;
+
+            if (chat.type?._ === "chatTypeSupergroup") {
+              try {
+                // eslint-disable-next-line @typescript-eslint/no-explicit-any
+                const sg = (await client.invoke({
+                  _: "getSupergroup",
+                  supergroup_id: chat.type.supergroup_id,
+                })) as any;
+                type = sg.is_channel ? "channel" : "supergroup";
+                isForum = sg.is_forum ?? false;
+              } catch {
+                type = "supergroup";
+              }
+            } else if (chat.type?._ === "chatTypeBasicGroup") {
+              type = "group";
+            }
+
+            if (type !== "channel" && type !== "supergroup") {
+              throw new Error(`The joined chat is a ${type}, not a channel or group. Only channels and supergroups are supported.`);
+            }
+
+            chatInfo = { chatId: BigInt(chatId), title: chat.title ?? "Unknown", type, isForum };
+          }
+
+          // Upsert channel in DB (active as source by default since user explicitly added it)
+          const channel = await upsertChannel({
+            telegramId: chatInfo.chatId,
+            title: chatInfo.title,
+            type: "SOURCE",
+            isForum: chatInfo.isForum,
+            isActive: true,
+          });
+
+          // Link the account as READER
+          await ensureAccountChannelLink(account.id, channel.id, "READER");
+
+          log.info(
+            { channelId: channel.id, telegramId: chatInfo.chatId.toString(), title: chatInfo.title },
+            "Channel joined and added"
+          );
+
+          await updateFetchRequestStatus(requestId!, "COMPLETED", {
+            resultJson: JSON.stringify({
+              channelId: channel.id,
+              telegramId: chatInfo.chatId.toString(),
+              title: chatInfo.title,
+              type: chatInfo.type,
+              isForum: chatInfo.isForum,
+            }),
+          });
+        } finally {
+          await closeTdlibClient(client);
+        }
+      });
+    } catch (err) {
+      log.error({ err, payload }, "Failed to join channel");
+      if (requestId) {
+        try {
+          await updateFetchRequestStatus(requestId, "FAILED", {
+            error: err instanceof Error ? err.message : String(err),
+          });
+        } catch {
+          // Best-effort
+        }
+      }
+    }
+  });
+}
+
+// ── Archive extract handler ──
+
+function handleArchiveExtract(requestId: string): void {
+  fetchQueue = fetchQueue.then(async () => {
+    try {
+      log.info({ requestId }, "Archive extract request received");
+      await processExtractRequest(requestId);
+    } catch (err) {
+      log.error({ err, requestId }, "Failed to process archive extract request");
+    }
+  });
+}
+
 // ── Ingestion trigger handler ──

 function handleIngestionTrigger(): void {
@@ -272,3 +497,17 @@ function handleIngestionTrigger(): void {
    }
  });
 }
+
+// ── Package database rebuild handler ──
+
+function handleRebuildPackages(requestId: string): void {
+  fetchQueue = fetchQueue.then(async () => {
+    try {
+      await withTdlibMutex("rebuild-packages", () =>
+        rebuildPackageDatabase(requestId)
+      );
+    } catch (err) {
+      log.error({ err, requestId }, "Failed to rebuild package database");
+    }
+  });
+}
--- a/worker/src/index.ts
+++ b/worker/src/index.ts
@@ -3,6 +3,7 @@ import { config } from "./util/config.js";
 import { logger } from "./util/logger.js";
 import { markStaleRunsAsFailed } from "./db/queries.js";
 import { cleanupTempDir } from "./worker.js";
+import { recoverIncompleteUploads } from "./recovery.js";
 import { startScheduler, stopScheduler } from "./scheduler.js";
 import { startFetchListener, stopFetchListener } from "./fetch-listener.js";
 import { db, pool } from "./db/client.js";
@@ -26,6 +27,10 @@ async function main(): Promise<void> {
  await cleanupTempDir();
  await markStaleRunsAsFailed();

+  // Verify destination messages exist for all "uploaded" packages.
+  // Resets any packages whose dest message is missing so they get re-processed.
+  await recoverIncompleteUploads();
+
  // Start the fetch listener (pg_notify for on-demand channel fetching)
  await startFetchListener();

--- a/worker/src/preview/extract.ts
+++ b/worker/src/preview/extract.ts
@@ -0,0 +1,111 @@
+import { execFile } from "child_process";
+import { promisify } from "util";
+import { childLogger } from "../util/logger.js";
+import type { FileEntry } from "../archive/zip-reader.js";
+
+const execFileAsync = promisify(execFile);
+const log = childLogger("preview-extract");
+
+/** Max bytes we'll accept for an extracted preview image (2MB). */
+const MAX_PREVIEW_BYTES = 2 * 1024 * 1024;
+
+/** Image extensions we consider valid previews, in priority order. */
+const IMAGE_EXTENSIONS = new Set(["jpg", "jpeg", "png"]);
+
+/**
+ * Pick the best preview image from the file entries list.
+ *
+ * Prefers files that look like dedicated preview images (01.jpg, insta.jpg,
+ * preview.jpg) over arbitrary images buried in subdirectories.
+ * Skips images that are suspiciously large (>2MB uncompressed).
+ */
+export function pickPreviewFile(entries: FileEntry[]): FileEntry | null {
+  const candidates = entries.filter((e) => {
+    if (!e.extension || !IMAGE_EXTENSIONS.has(e.extension.toLowerCase())) return false;
+    // Skip very large images — they're probably textures, not previews
+    if (e.uncompressedSize > BigInt(MAX_PREVIEW_BYTES)) return false;
+    return true;
+  });
+
+  if (candidates.length === 0) return null;
+
+  // Score candidates: lower depth + preview-like names win
+  const scored = candidates.map((entry) => {
+    const depth = entry.path.split("/").length - 1;
+    const nameLower = entry.fileName.toLowerCase();
+
+    let nameScore = 10; // default
+    // Known preview-like names get priority
+    if (/^(preview|thumb|cover|insta)\b/i.test(nameLower)) {
+      nameScore = 0;
+    } else if (/^0*[1-2]\.(jpe?g|png)$/i.test(nameLower)) {
+      // 01.jpg, 1.jpg, 02.jpg — common preview filenames
+      nameScore = 1;
+    } else if (/^0*[3-9]\.(jpe?g|png)$/i.test(nameLower)) {
+      nameScore = 2;
+    }
+
+    return { entry, score: nameScore + depth };
+  });
+
+  scored.sort((a, b) => a.score - b.score);
+  return scored[0].entry;
+}
+
+/**
+ * Extract a single file from an archive and return its contents as a Buffer.
+ *
+ * Uses the appropriate CLI tool based on archive type:
+ * - ZIP: unzip -p
+ * - RAR: unrar p -inul
+ * - 7Z: 7z e -so
+ */
+export async function extractPreviewImage(
+  archivePath: string,
+  archiveType: "ZIP" | "RAR" | "SEVEN_Z" | "DOCUMENT",
+  filePath: string
+): Promise<Buffer | null> {
+  if (archiveType === "DOCUMENT") return null;
+
+  try {
+    let stdout: Buffer;
+
+    if (archiveType === "ZIP") {
+      const result = await execFileAsync("unzip", ["-p", archivePath, filePath], {
+        timeout: 15000,
+        maxBuffer: MAX_PREVIEW_BYTES,
+        encoding: "buffer",
+      });
+      stdout = result.stdout as unknown as Buffer;
+    } else if (archiveType === "RAR") {
+      const result = await execFileAsync("unrar", ["p", "-inul", archivePath, filePath], {
+        timeout: 15000,
+        maxBuffer: MAX_PREVIEW_BYTES,
+        encoding: "buffer",
+      });
+      stdout = result.stdout as unknown as Buffer;
+    } else {
+      // SEVEN_Z
+      const result = await execFileAsync("7z", ["e", "-so", archivePath, filePath], {
+        timeout: 15000,
+        maxBuffer: MAX_PREVIEW_BYTES,
+        encoding: "buffer",
+      });
+      stdout = result.stdout as unknown as Buffer;
+    }
+
+    if (stdout.length === 0) {
+      log.warn({ archivePath, filePath }, "Extracted preview image is empty");
+      return null;
+    }
+
+    log.debug(
+      { archivePath, filePath, bytes: stdout.length },
+      "Extracted preview image from archive"
+    );
+    return stdout;
+  } catch (err) {
+    log.warn({ err, archivePath, filePath }, "Failed to extract preview image from archive");
+    return null;
+  }
+}
--- a/worker/src/rebuild.ts
+++ b/worker/src/rebuild.ts
@@ -0,0 +1,411 @@
+import type { Client } from "tdl";
+import { config } from "./util/config.js";
+import { childLogger } from "./util/logger.js";
+import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
+import { invokeWithTimeout, MAX_SCAN_PAGES } from "./tdlib/download.js";
+import { isArchiveAttachment } from "./archive/detect.js";
+import { extractCreatorFromFileName } from "./archive/creator.js";
+import { groupArchiveSets } from "./archive/multipart.js";
+import type { TelegramMessage } from "./archive/multipart.js";
+import {
+  getActiveAccounts,
+  getGlobalDestinationChannel,
+} from "./db/queries.js";
+import { db } from "./db/client.js";
+
+const log = childLogger("rebuild");
+
+export interface RebuildProgress {
+  status: "PENDING" | "IN_PROGRESS" | "COMPLETED" | "FAILED";
+  messagesScanned: number;
+  documentsFound: number;
+  packagesCreated: number;
+  packagesSkipped: number;
+  error?: string;
+}
+
+/**
+ * Scan the destination channel for uploaded archive files and rebuild
+ * the package database from what's actually there.
+ *
+ * Uses searchChatMessages (not getChatHistory) because the destination
+ * channel may be a hidden-history supergroup.
+ *
+ * For each document found:
+ *   1. Check if a Package record with that destMessageId already exists -> skip
+ *   2. Try to match by fileName to an existing package without destMessageId -> update it
+ *   3. Otherwise create a minimal Package record (no file listing, no content hash)
+ *
+ * This is a "best-effort" rebuild. It restores the mapping between destination
+ * messages and package records so that the bot can deliver files. It does NOT
+ * re-download archives or rebuild file listings (those require the source channel).
+ */
+export async function rebuildPackageDatabase(
+  requestId: string
+): Promise<void> {
+  log.info({ requestId }, "Starting package database rebuild");
+
+  try {
+    await db.channelFetchRequest.update({
+      where: { id: requestId },
+      data: { status: "IN_PROGRESS" },
+    });
+
+    // Get an authenticated account for TDLib
+    const accounts = await getActiveAccounts();
+    if (accounts.length === 0) {
+      throw new Error("No authenticated accounts available");
+    }
+
+    const destChannel = await getGlobalDestinationChannel();
+    if (!destChannel) {
+      throw new Error("No destination channel configured");
+    }
+
+    const account = accounts[0];
+    const client = await createTdlibClient({
+      id: account.id,
+      phone: account.phone,
+    });
+
+    try {
+      const progress: RebuildProgress = {
+        status: "IN_PROGRESS",
+        messagesScanned: 0,
+        documentsFound: 0,
+        packagesCreated: 0,
+        packagesSkipped: 0,
+      };
+
+      // Write initial progress
+      await updateRebuildProgress(requestId, progress);
+
+      // Scan the destination channel for all document messages
+      const archiveMessages = await scanDestinationChannel(
+        client,
+        destChannel.telegramId,
+        async (scanned) => {
+          progress.messagesScanned = scanned;
+          await updateRebuildProgress(requestId, progress);
+        }
+      );
+
+      progress.documentsFound = archiveMessages.length;
+      await updateRebuildProgress(requestId, progress);
+
+      log.info(
+        {
+          messagesScanned: progress.messagesScanned,
+          documentsFound: archiveMessages.length,
+        },
+        "Destination channel scan complete"
+      );
+
+      // Group into archive sets (handles multipart)
+      const archiveSets = groupArchiveSets(archiveMessages);
+
+      log.info(
+        { archiveSets: archiveSets.length, totalMessages: archiveMessages.length },
+        "Grouped into archive sets"
+      );
+
+      // Get ALL source channels so we can try to match
+      const sourceChannels = await db.telegramChannel.findMany({
+        where: { type: "SOURCE" },
+        select: { id: true, title: true },
+      });
+      // Use the first source channel as a fallback for unmatched packages
+      const fallbackSourceId = sourceChannels[0]?.id ?? null;
+
+      // Process each archive set
+      for (const archiveSet of archiveSets) {
+        const firstPart = archiveSet.parts[0];
+        const fileName = firstPart.fileName;
+        const destMessageId = firstPart.id;
+        const totalSize = archiveSet.parts.reduce(
+          (sum, p) => sum + p.fileSize,
+          0n
+        );
+
+        // 1. Check if a package with this destMessageId already exists
+        const existingByDest = await db.package.findFirst({
+          where: {
+            destChannelId: destChannel.id,
+            destMessageId,
+          },
+          select: { id: true },
+        });
+
+        if (existingByDest) {
+          progress.packagesSkipped++;
+          await updateRebuildProgress(requestId, progress);
+          continue;
+        }
+
+        // 2. Try to match by fileName to an existing package without destMessageId
+        const existingByName = await db.package.findFirst({
+          where: {
+            fileName,
+            destMessageId: null,
+          },
+          select: { id: true },
+        });
+
+        if (existingByName) {
+          // Update existing record with destination info
+          await db.package.update({
+            where: { id: existingByName.id },
+            data: {
+              destChannelId: destChannel.id,
+              destMessageId,
+              isMultipart: archiveSet.parts.length > 1,
+              partCount: archiveSet.parts.length,
+            },
+          });
+          progress.packagesCreated++;
+          log.debug({ fileName, destMessageId: Number(destMessageId) }, "Updated existing package with dest info");
+          await updateRebuildProgress(requestId, progress);
+          continue;
+        }
+
+        // 3. Create a new minimal Package record
+        // We don't have the source message or content hash, so generate a placeholder hash
+        const placeholderHash = `rebuild:${destChannel.id}:${destMessageId}`;
+        const creator = extractCreatorFromFileName(fileName) ?? null;
+        const archiveType = archiveSet.type;
+
+        // We need a sourceChannelId (required FK). Use fallback if available.
+        if (!fallbackSourceId) {
+          log.warn(
+            { fileName },
+            "No source channels exist — cannot create package record without a source channel"
+          );
+          progress.packagesSkipped++;
+          await updateRebuildProgress(requestId, progress);
+          continue;
+        }
+
+        try {
+          await db.package.create({
+            data: {
+              contentHash: placeholderHash,
+              fileName,
+              fileSize: totalSize,
+              archiveType,
+              sourceChannelId: fallbackSourceId,
+              sourceMessageId: 0n, // Unknown — rebuilt from destination
+              destChannelId: destChannel.id,
+              destMessageId,
+              isMultipart: archiveSet.parts.length > 1,
+              partCount: archiveSet.parts.length,
+              fileCount: 0,
+              creator,
+            },
+          });
+          progress.packagesCreated++;
+          log.debug(
+            { fileName, destMessageId: Number(destMessageId), creator },
+            "Created new package from destination"
+          );
+        } catch (err) {
+          // Unique constraint on contentHash — might be a race or duplicate
+          if (err instanceof Error && err.message.includes("Unique constraint")) {
+            log.debug({ fileName, placeholderHash }, "Package already exists (hash conflict), skipping");
+            progress.packagesSkipped++;
+          } else {
+            throw err;
+          }
+        }
+
+        await updateRebuildProgress(requestId, progress);
+      }
+
+      // Done
+      progress.status = "COMPLETED";
+      await updateRebuildProgress(requestId, progress);
+
+      await db.channelFetchRequest.update({
+        where: { id: requestId },
+        data: {
+          status: "COMPLETED",
+          resultJson: JSON.stringify(progress),
+        },
+      });
+
+      log.info(
+        {
+          messagesScanned: progress.messagesScanned,
+          documentsFound: progress.documentsFound,
+          packagesCreated: progress.packagesCreated,
+          packagesSkipped: progress.packagesSkipped,
+        },
+        "Package database rebuild complete"
+      );
+    } finally {
+      await closeTdlibClient(client);
+    }
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    log.error({ err, requestId }, "Package database rebuild failed");
+
+    await db.channelFetchRequest.update({
+      where: { id: requestId },
+      data: {
+        status: "FAILED",
+        error: message,
+        resultJson: JSON.stringify({
+          status: "FAILED",
+          error: message,
+        }),
+      },
+    });
+  }
+}
+
+/**
+ * Scan the destination channel for document messages using searchChatMessages.
+ * Returns archive messages in chronological order (oldest first).
+ */
+async function scanDestinationChannel(
+  client: Client,
+  chatId: bigint,
+  onProgress?: (messagesScanned: number) => Promise<void>
+): Promise<TelegramMessage[]> {
+  const archives: TelegramMessage[] = [];
+  let currentFromId = 0;
+  let totalScanned = 0;
+  let pageCount = 0;
+  let lastProgressUpdate = 0;
+
+  // eslint-disable-next-line no-constant-condition
+  while (true) {
+    if (pageCount >= MAX_SCAN_PAGES) {
+      log.warn(
+        { chatId: chatId.toString(), pageCount, totalScanned },
+        "Hit max page limit for destination scan, stopping"
+      );
+      break;
+    }
+    pageCount++;
+
+    const previousFromId = currentFromId;
+
+    const result = await invokeWithTimeout<{
+      messages?: {
+        id: number;
+        date: number;
+        content: {
+          _: string;
+          document?: {
+            file_name?: string;
+            document?: {
+              id: number;
+              size: number;
+            };
+          };
+        };
+      }[];
+    }>(client, {
+      _: "searchChatMessages",
+      chat_id: Number(chatId),
+      query: "",
+      from_message_id: currentFromId,
+      offset: 0,
+      limit: 100,
+      filter: { _: "searchMessagesFilterDocument" },
+      sender_id: null,
+      message_thread_id: 0,
+      saved_messages_topic_id: 0,
+    });
+
+    if (!result.messages || result.messages.length === 0) break;
+
+    totalScanned += result.messages.length;
+
+    for (const msg of result.messages) {
+      const doc = msg.content?.document;
+      if (doc?.file_name && doc.document && isArchiveAttachment(doc.file_name)) {
+        archives.push({
+          id: BigInt(msg.id),
+          fileName: doc.file_name,
+          fileId: String(doc.document.id),
+          fileSize: BigInt(doc.document.size),
+          date: new Date(msg.date * 1000),
+        });
+      }
+    }
+
+    // Throttle progress updates to every 2 seconds
+    const now = Date.now();
+    if (onProgress && now - lastProgressUpdate >= 2000) {
+      lastProgressUpdate = now;
+      await onProgress(totalScanned);
+    }
+
+    currentFromId = result.messages[result.messages.length - 1].id;
+
+    // Stuck detection
+    if (currentFromId === previousFromId) {
+      log.warn(
+        { chatId: chatId.toString(), currentFromId, totalScanned },
+        "Pagination stuck, breaking"
+      );
+      break;
+    }
+
+    if (result.messages.length < 100) break;
+
+    await sleep(config.apiDelayMs);
+  }
+
+  // Final progress update
+  if (onProgress) {
+    await onProgress(totalScanned);
+  }
+
+  log.info(
+    {
+      chatId: chatId.toString(),
+      archives: archives.length,
+      totalScanned,
+      pages: pageCount,
+    },
+    "Destination channel scan complete"
+  );
+
+  // Reverse to chronological order (oldest first)
+  return archives.reverse();
+}
+
+/**
+ * Update the rebuild progress in the fetch request's resultJson field.
+ * Throttled to avoid excessive DB writes.
+ */
+let lastUpdateTime = 0;
+async function updateRebuildProgress(
+  requestId: string,
+  progress: RebuildProgress
+): Promise<void> {
+  const now = Date.now();
+  // Throttle to every 2 seconds, but always write for status changes
+  if (
+    progress.status !== "IN_PROGRESS" ||
+    now - lastUpdateTime >= 2000
+  ) {
+    lastUpdateTime = now;
+    try {
+      await db.channelFetchRequest.update({
+        where: { id: requestId },
+        data: {
+          resultJson: JSON.stringify(progress),
+        },
+      });
+    } catch {
+      // Best-effort
+    }
+  }
+}
+
+function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
--- a/worker/src/recovery.ts
+++ b/worker/src/recovery.ts
@@ -0,0 +1,187 @@
+import { childLogger } from "./util/logger.js";
+import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
+import { withFloodWait } from "./util/retry.js";
+import {
+  getActiveAccounts,
+  getPackagesWithDestMessage,
+  resetPackageDestination,
+  getGlobalDestinationChannel,
+} from "./db/queries.js";
+import type { Client } from "tdl";
+
+const log = childLogger("recovery");
+
+/**
+ * Verify that destination messages still exist in Telegram for all
+ * packages that claim to be uploaded. If a message is missing (deleted
+ * or never actually committed), reset the package so the next ingestion
+ * run will re-download and re-upload it.
+ *
+ * This handles the case where the worker crashed mid-upload: TDLib may
+ * have returned a temporary message ID that was stored as destMessageId
+ * but the upload never completed server-side, or the message was later
+ * deleted from the destination channel.
+ *
+ * Called once on worker startup, before the scheduler begins.
+ */
+export async function recoverIncompleteUploads(): Promise<void> {
+  const packages = await getPackagesWithDestMessage();
+  if (packages.length === 0) {
+    log.debug("No packages with destination messages to verify");
+    return;
+  }
+
+  // We need a TDLib client to verify messages. Use the first active account.
+  const accounts = await getActiveAccounts();
+  if (accounts.length === 0) {
+    log.info("No active accounts available for upload verification, skipping recovery");
+    return;
+  }
+
+  const destChannel = await getGlobalDestinationChannel();
+  if (!destChannel) {
+    log.info("No destination channel configured, skipping recovery");
+    return;
+  }
+
+  // Group packages by destChannelId for efficient verification
+  const byChannel = new Map<string, typeof packages>();
+  for (const pkg of packages) {
+    const channelId = pkg.destChannelId!;
+    if (!byChannel.has(channelId)) {
+      byChannel.set(channelId, []);
+    }
+    byChannel.get(channelId)!.push(pkg);
+  }
+
+  log.info(
+    { totalPackages: packages.length, channels: byChannel.size },
+    "Verifying destination messages exist in Telegram"
+  );
+
+  const account = accounts[0];
+  let client: Client | undefined;
+
+  try {
+    client = await createTdlibClient({ id: account.id, phone: account.phone });
+
+    // Load the chat list so TDLib can resolve chat IDs
+    try {
+      await client.invoke({
+        _: "getChats",
+        chat_list: { _: "chatListMain" },
+        limit: 1000,
+      });
+    } catch {
+      // May already be loaded
+    }
+
+    let resetCount = 0;
+    let verifiedCount = 0;
+
+    for (const [, channelPackages] of byChannel) {
+      for (const pkg of channelPackages) {
+        const exists = await verifyMessageExists(
+          client,
+          destChannel.telegramId,
+          pkg.destMessageId!
+        );
+
+        if (exists) {
+          verifiedCount++;
+        } else {
+          log.warn(
+            {
+              packageId: pkg.id,
+              fileName: pkg.fileName,
+              destMessageId: Number(pkg.destMessageId),
+            },
+            "Destination message missing in Telegram, resetting package for re-upload"
+          );
+          await resetPackageDestination(pkg.id);
+          resetCount++;
+        }
+      }
+    }
+
+    if (resetCount > 0) {
+      log.info(
+        { resetCount, verifiedCount, totalChecked: packages.length },
+        "Upload recovery complete — packages reset for re-processing"
+      );
+    } else {
+      log.info(
+        { verifiedCount, totalChecked: packages.length },
+        "Upload recovery complete — all destination messages verified"
+      );
+    }
+  } catch (err) {
+    log.error({ err }, "Upload recovery failed (non-fatal, will retry next startup)");
+  } finally {
+    if (client) {
+      await closeTdlibClient(client);
+    }
+  }
+}
+
+/**
+ * Check whether a message exists in a Telegram chat.
+ * Returns false if the message was deleted or never existed.
+ */
+async function verifyMessageExists(
+  client: Client,
+  chatTelegramId: bigint,
+  messageId: bigint
+): Promise<boolean> {
+  try {
+    const result = await withFloodWait(
+      () =>
+        client.invoke({
+          _: "getMessage",
+          chat_id: Number(chatTelegramId),
+          message_id: Number(messageId),
+        }),
+      "getMessage:verify"
+    );
+
+    // TDLib returns the message object if it exists.
+    // A deleted message may return with content type "messageChatDeleteMessage"
+    // or the call may throw. Check that we got a real message with content.
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const msg = result as any;
+    if (!msg || !msg.content) {
+      return false;
+    }
+
+    // Check that the message has document content (our uploads are documents)
+    // A message that exists but has no document content was likely cleared/replaced
+    if (msg.content._ !== "messageDocument") {
+      log.debug(
+        {
+          messageId: Number(messageId),
+          contentType: msg.content._,
+        },
+        "Destination message exists but is not a document"
+      );
+      return false;
+    }
+
+    return true;
+  } catch (err) {
+    // TDLib throws "Message not found" (error code 404) for deleted messages
+    const message = err instanceof Error ? err.message : String(err);
+    const code = (err as { code?: number })?.code;
+
+    if (code === 404 || message.includes("not found") || message.includes("Not Found")) {
+      return false;
+    }
+
+    // For other errors (network issues, etc.), assume the message exists
+    // to avoid incorrectly resetting packages due to transient failures
+    log.warn(
+      { err, messageId: Number(messageId) },
+      "Could not verify message (assuming it exists)"
+    );
+    return true;
+  }
+}
--- a/worker/src/tdlib/chats.ts
+++ b/worker/src/tdlib/chats.ts
@@ -176,6 +176,63 @@ export async function joinChatByInviteLink(
  log.info({ inviteLink }, "Joined chat by invite link");
 }

+/**
+ * Search for a public chat by username.
+ * Returns the chat info if found, or null if not found.
+ */
+export async function searchPublicChat(
+  client: Client,
+  username: string
+): Promise<TelegramChatInfo | null> {
+  try {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const chat = (await withFloodWait(
+      () => client.invoke({
+        _: "searchPublicChat",
+        username,
+      }),
+      "searchPublicChat"
+    )) as any;
+
+    const chatType = chat.type?._;
+    let type: TelegramChatInfo["type"] = "other";
+    let isForum = false;
+
+    if (chatType === "chatTypeSupergroup") {
+      try {
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const sg = (await withFloodWait(
+          () => client.invoke({
+            _: "getSupergroup",
+            supergroup_id: chat.type.supergroup_id,
+          }),
+          "getSupergroup"
+        )) as any;
+
+        type = sg.is_channel ? "channel" : "supergroup";
+        isForum = sg.is_forum ?? false;
+      } catch {
+        type = "supergroup";
+      }
+    } else if (chatType === "chatTypeBasicGroup") {
+      type = "group";
+    } else if (chatType === "chatTypePrivate" || chatType === "chatTypeSecret") {
+      type = "private";
+    }
+
+    log.info({ username, chatId: chat.id, type }, "Found public chat");
+    return {
+      chatId: BigInt(chat.id),
+      title: chat.title ?? username,
+      type,
+      isForum,
+    };
+  } catch (err) {
+    log.warn({ username, err }, "Public chat not found");
+    return null;
+  }
+}
+
 function sleep(ms: number): Promise<void> {
  return new Promise((resolve) => setTimeout(resolve, ms));
 }
--- a/worker/src/worker.ts
+++ b/worker/src/worker.ts
@@ -34,6 +34,7 @@ import { getChannelMessages, downloadFile, downloadPhotoThumbnail } from "./tdli
 import type { DownloadProgress, ChannelScanResult } from "./tdlib/download.js";
 import { isChatForum, getForumTopicList, getTopicMessages } from "./tdlib/topics.js";
 import { matchPreviewToArchive } from "./preview/match.js";
+import { pickPreviewFile, extractPreviewImage } from "./preview/extract.js";
 import { groupArchiveSets } from "./archive/multipart.js";
 import type { ArchiveSet } from "./archive/multipart.js";
 import { extractCreatorFromFileName, extractCreatorFromChannelTitle } from "./archive/creator.js";
@@ -971,6 +972,23 @@ async function processOneArchiveSet(
      previewMsgId = matchedPhoto.id;
    }

+    // ── Fallback: extract preview image from inside the archive ──
+    if (!previewData && entries.length > 0 && archiveSet.type !== "DOCUMENT") {
+      const previewEntry = pickPreviewFile(entries);
+      if (previewEntry) {
+        accountLog.debug(
+          { fileName: archiveName, previewFile: previewEntry.path },
+          "Attempting to extract preview image from archive"
+        );
+        const archiveTypeForExtract = archiveSet.type === "7Z" ? "SEVEN_Z" as const : archiveSet.type as "ZIP" | "RAR";
+        previewData = await extractPreviewImage(
+          tempPaths[0],
+          archiveTypeForExtract,
+          previewEntry.path
+        );
+      }
+    }
+
    // ── Resolve creator: topic name > filename extraction > channel title > null ──
    const creator = topicCreator
      ?? extractCreatorFromFileName(archiveName)