feat: add preview management, channel controls, invite polish, and recovery

- Auto-extract preview images from ZIP/RAR/7z archives during ingestion
- Upload custom preview images via package drawer
- Select preview from archive contents with on-demand extraction UI
- Manually add Telegram channels by t.me link, username, or invite link
- Invite code UX: bulk create, copy link, usage tracking, delete confirm
- Incomplete upload recovery: verify dest messages on worker startup
- Rebuild package DB by scanning destination channel with live progress

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
admin
2026-03-22 00:09:59 +01:00
parent bf093cdfca
commit ab558e00f5
26 changed files with 3028 additions and 98 deletions

View File

@@ -0,0 +1,33 @@
import path from "path";
const IMAGE_EXTENSIONS = new Set(["jpg", "jpeg", "png", "webp", "gif", "bmp"]);
/**
* Check if a file path within an archive is an image.
*/
export function isImageFile(filePath: string): boolean {
const ext = path.extname(filePath).toLowerCase().slice(1);
return IMAGE_EXTENSIONS.has(ext);
}
/**
* Get the MIME type for an image file extension.
*/
export function getImageMimeType(filePath: string): string {
const ext = path.extname(filePath).toLowerCase().slice(1);
switch (ext) {
case "jpg":
case "jpeg":
return "image/jpeg";
case "png":
return "image/png";
case "webp":
return "image/webp";
case "gif":
return "image/gif";
case "bmp":
return "image/bmp";
default:
return "application/octet-stream";
}
}

View File

@@ -438,3 +438,35 @@ export async function getExistingChannelsByTelegramId(): Promise<Map<string, str
export async function getAccountById(accountId: string) {
return db.telegramAccount.findUnique({ where: { id: accountId } });
}
/**
* Find packages that have a destMessageId set (appear uploaded) but may
* reference messages that no longer exist in Telegram. These need
* verification on startup.
*
* Groups by destChannelId so the caller can batch-verify per channel.
*/
export async function getPackagesWithDestMessage() {
return db.package.findMany({
where: { destMessageId: { not: null }, destChannelId: { not: null } },
select: {
id: true,
fileName: true,
contentHash: true,
destChannelId: true,
destMessageId: true,
sourceChannel: { select: { telegramId: true } },
},
});
}
/**
* Reset a package's destination fields so it will be re-processed
* on the next ingestion run (treated as not-yet-uploaded).
*/
export async function resetPackageDestination(packageId: string) {
return db.package.update({
where: { id: packageId },
data: { destChannelId: null, destMessageId: null },
});
}

View File

@@ -0,0 +1,217 @@
import path from "path";
import { mkdir, rm } from "fs/promises";
import { db } from "./db/client.js";
import { config } from "./util/config.js";
import { childLogger } from "./util/logger.js";
import { withTdlibMutex } from "./util/mutex.js";
import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
import { downloadFile } from "./tdlib/download.js";
import { getActiveAccounts } from "./db/queries.js";
import { extractPreviewImage } from "./preview/extract.js";
import { getImageMimeType } from "./archive/extract-image.js";
const log = childLogger("extract-listener");
/**
* Process a single archive extract request.
* Downloads the archive from Telegram (dest channel), extracts the
* requested image file, and writes the result to the DB.
*/
export async function processExtractRequest(requestId: string): Promise<void> {
const request = await db.archiveExtractRequest.findUnique({
where: { id: requestId },
include: {
package: {
select: {
id: true,
fileName: true,
fileSize: true,
archiveType: true,
destChannelId: true,
destMessageId: true,
isMultipart: true,
partCount: true,
},
},
},
});
if (!request || request.status !== "PENDING") {
log.debug({ requestId }, "Extract request not found or not pending");
return;
}
const pkg = request.package;
if (!pkg.destChannelId || !pkg.destMessageId) {
await db.archiveExtractRequest.update({
where: { id: requestId },
data: { status: "FAILED", error: "Package has no destination upload" },
});
return;
}
// Multipart archives require downloading and reassembling all parts,
// which is too complex for on-demand extraction. Reject early.
if (pkg.isMultipart && pkg.partCount > 1) {
await db.archiveExtractRequest.update({
where: { id: requestId },
data: { status: "FAILED", error: "Image extraction is not supported for multipart archives" },
});
return;
}
// Check for a cached result first: if another request for the same
// package+filePath already completed, reuse its data.
const cached = await db.archiveExtractRequest.findFirst({
where: {
packageId: pkg.id,
filePath: request.filePath,
status: "COMPLETED",
imageData: { not: null },
id: { not: requestId },
},
select: { imageData: true, contentType: true },
});
if (cached?.imageData) {
log.info({ requestId, filePath: request.filePath }, "Reusing cached extraction result");
await db.archiveExtractRequest.update({
where: { id: requestId },
data: {
status: "COMPLETED",
imageData: cached.imageData,
contentType: cached.contentType,
},
});
return;
}
await db.archiveExtractRequest.update({
where: { id: requestId },
data: { status: "IN_PROGRESS" },
});
log.info(
{ requestId, packageId: pkg.id, filePath: request.filePath, archiveType: pkg.archiveType },
"Processing extract request"
);
const tempDir = path.join(config.tempDir, `extract_${requestId}`);
try {
await mkdir(tempDir, { recursive: true });
// Wrap the entire TDLib session in the mutex so no other TDLib
// operation can run concurrently (TDLib is single-session).
await withTdlibMutex("extract", async () => {
const accounts = await getActiveAccounts();
if (accounts.length === 0) {
throw new Error("No authenticated Telegram accounts available");
}
const account = accounts[0];
const client = await createTdlibClient({ id: account.id, phone: account.phone });
try {
// Load chat list so TDLib can find the dest channel
try {
await client.invoke({
_: "getChats",
chat_list: { _: "chatListMain" },
limit: 1000,
});
} catch {
// May already be loaded
}
// Get the dest channel telegram ID
const destChannel = await db.telegramChannel.findUnique({
where: { id: pkg.destChannelId! },
select: { telegramId: true },
});
if (!destChannel) {
throw new Error("Destination channel not found in DB");
}
const chatId = Number(destChannel.telegramId);
const messageId = Number(pkg.destMessageId);
// Get the file_id from the destination message
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const message = await client.invoke({
_: "getMessage",
chat_id: chatId,
message_id: messageId,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
}) as any;
const doc = message?.content?.document;
if (!doc?.document?.id) {
throw new Error("Could not find document in destination message");
}
const fileId = String(doc.document.id);
const fileName = doc.file_name || pkg.fileName;
const archivePath = path.join(tempDir, fileName);
log.info(
{ requestId, fileName, fileId, chatId, messageId },
"Downloading archive for extraction"
);
await downloadFile(
client,
fileId,
archivePath,
pkg.fileSize,
fileName
);
// Extract the requested image using the existing CLI-based extractor.
// This pipes the file to stdout (no temp files needed for the extracted image).
const imageData = await extractPreviewImage(
archivePath,
pkg.archiveType as "ZIP" | "RAR" | "SEVEN_Z" | "DOCUMENT",
request.filePath
);
if (!imageData) {
throw new Error(`Could not extract "${request.filePath}" from archive`);
}
// Cap at 5MB for safety
if (imageData.length > 5 * 1024 * 1024) {
throw new Error(`Extracted image is too large (${(imageData.length / 1024 / 1024).toFixed(1)}MB)`);
}
const contentType = getImageMimeType(request.filePath);
await db.archiveExtractRequest.update({
where: { id: requestId },
data: {
status: "COMPLETED",
imageData: new Uint8Array(imageData),
contentType,
},
});
log.info(
{ requestId, filePath: request.filePath, bytes: imageData.length },
"Image extracted successfully"
);
} finally {
await closeTdlibClient(client).catch(() => {});
}
});
} catch (err) {
const errMsg = err instanceof Error ? err.message : String(err);
log.error({ err, requestId }, "Extract request failed");
await db.archiveExtractRequest.update({
where: { id: requestId },
data: { status: "FAILED", error: errMsg },
}).catch(() => {});
} finally {
await rm(tempDir, { recursive: true, force: true }).catch(() => {});
}
}

View File

@@ -3,7 +3,9 @@ import { pool } from "./db/client.js";
import { childLogger } from "./util/logger.js";
import { withTdlibMutex } from "./util/mutex.js";
import { processFetchRequest } from "./worker.js";
import { generateInviteLink, createSupergroup } from "./tdlib/chats.js";
import { processExtractRequest } from "./extract-listener.js";
import { rebuildPackageDatabase } from "./rebuild.js";
import { generateInviteLink, createSupergroup, searchPublicChat } from "./tdlib/chats.js";
import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
import { triggerImmediateCycle } from "./scheduler.js";
import {
@@ -13,6 +15,7 @@ import {
getActiveAccounts,
upsertChannel,
ensureAccountChannelLink,
updateFetchRequestStatus,
} from "./db/queries.js";
const log = childLogger("fetch-listener");
@@ -31,6 +34,8 @@ const RECONNECT_DELAY_MS = 5_000;
* - `generate_invite` — payload = channelId → generate invite link for destination
* - `create_destination` — payload = JSON { requestId, title } → create supergroup via TDLib
* - `ingestion_trigger` — trigger an immediate ingestion cycle
* - `join_channel` — payload = JSON { requestId, input, accountId } → join/lookup channel by link/username
* - `rebuild_packages` — payload = requestId → rebuild package DB from destination channel
*
* If the underlying connection is lost, the listener automatically reconnects
* so that pg_notify signals are never silently dropped.
@@ -47,6 +52,9 @@ async function connectListener(): Promise<void> {
await pgClient.query("LISTEN generate_invite");
await pgClient.query("LISTEN create_destination");
await pgClient.query("LISTEN ingestion_trigger");
await pgClient.query("LISTEN join_channel");
await pgClient.query("LISTEN archive_extract");
await pgClient.query("LISTEN rebuild_packages");
pgClient.on("notification", (msg) => {
if (msg.channel === "channel_fetch" && msg.payload) {
@@ -57,6 +65,12 @@ async function connectListener(): Promise<void> {
handleCreateDestination(msg.payload);
} else if (msg.channel === "ingestion_trigger") {
handleIngestionTrigger();
} else if (msg.channel === "join_channel" && msg.payload) {
handleJoinChannel(msg.payload);
} else if (msg.channel === "archive_extract" && msg.payload) {
handleArchiveExtract(msg.payload);
} else if (msg.channel === "rebuild_packages" && msg.payload) {
handleRebuildPackages(msg.payload);
}
});
@@ -82,7 +96,7 @@ async function connectListener(): Promise<void> {
}
});
log.info("Fetch listener started (channel_fetch, generate_invite, create_destination, ingestion_trigger)");
log.info("Fetch listener started (channel_fetch, generate_invite, create_destination, ingestion_trigger, join_channel, archive_extract, rebuild_packages)");
} catch (err) {
log.error({ err }, "Failed to start fetch listener — retrying");
scheduleReconnect();
@@ -260,6 +274,217 @@ function handleCreateDestination(payload: string): void {
});
}
// ── Join channel handler ──
/**
* Parse a Telegram link/username into its type and identifier.
*
* Supported formats:
* - @username or username → public chat search
* - https://t.me/username → public chat search
* - https://t.me/+INVITE_HASH → join by invite link
* - https://t.me/joinchat/INVITE_HASH → join by invite link (legacy)
*/
function parseTelegramInput(input: string): { type: "username"; username: string } | { type: "invite"; link: string } | null {
const trimmed = input.trim();
// Invite link patterns
const invitePatterns = [
/^https?:\/\/t\.me\/\+([a-zA-Z0-9_-]+)$/,
/^https?:\/\/t\.me\/joinchat\/([a-zA-Z0-9_-]+)$/,
/^https?:\/\/telegram\.me\/\+([a-zA-Z0-9_-]+)$/,
/^https?:\/\/telegram\.me\/joinchat\/([a-zA-Z0-9_-]+)$/,
];
for (const pattern of invitePatterns) {
if (pattern.test(trimmed)) {
return { type: "invite", link: trimmed };
}
}
// Public link: https://t.me/username
const publicLinkMatch = trimmed.match(/^https?:\/\/(?:t\.me|telegram\.me)\/([a-zA-Z][a-zA-Z0-9_]{3,31})$/);
if (publicLinkMatch) {
return { type: "username", username: publicLinkMatch[1] };
}
// @username or bare username
const usernameMatch = trimmed.match(/^@?([a-zA-Z][a-zA-Z0-9_]{3,31})$/);
if (usernameMatch) {
return { type: "username", username: usernameMatch[1] };
}
return null;
}
function handleJoinChannel(payload: string): void {
fetchQueue = fetchQueue.then(async () => {
let requestId: string | undefined;
try {
const parsed = JSON.parse(payload) as { requestId: string; input: string; accountId: string };
requestId = parsed.requestId;
await withTdlibMutex("join-channel", async () => {
await updateFetchRequestStatus(requestId!, "IN_PROGRESS");
const accounts = await getActiveAccounts();
const account = accounts.find((a) => a.id === parsed.accountId) ?? accounts[0];
if (!account) {
throw new Error("No authenticated accounts available");
}
const client = await createTdlibClient({ id: account.id, phone: account.phone });
try {
const linkInfo = parseTelegramInput(parsed.input);
if (!linkInfo) {
throw new Error(
"Invalid input. Use a t.me link (e.g. https://t.me/channel_name), " +
"an invite link (e.g. https://t.me/+abc123), or a @username."
);
}
let chatInfo: { chatId: bigint; title: string; type: string; isForum: boolean };
if (linkInfo.type === "username") {
// Public chat: search by username
const result = await searchPublicChat(client, linkInfo.username);
if (!result) {
throw new Error(`Public channel "@${linkInfo.username}" not found. Check the username and try again.`);
}
if (result.type !== "channel" && result.type !== "supergroup") {
throw new Error(`"@${linkInfo.username}" is a ${result.type}, not a channel or group. Only channels and supergroups are supported.`);
}
chatInfo = { chatId: result.chatId, title: result.title, type: result.type, isForum: result.isForum };
} else {
// Private/invite link: join first, then get chat info
// eslint-disable-next-line @typescript-eslint/no-explicit-any
let joinResult: any;
try {
joinResult = await client.invoke({
_: "joinChatByInviteLink",
invite_link: linkInfo.link,
});
} catch (joinErr: unknown) {
const msg = joinErr instanceof Error ? joinErr.message : String(joinErr);
// "INVITE_REQUEST_SENT" means the chat requires admin approval
if (msg.includes("INVITE_REQUEST_SENT")) {
throw new Error("Join request sent. An admin of that channel must approve it before it can be added.");
}
// Already a member is fine
if (!msg.includes("USER_ALREADY_PARTICIPANT") && !msg.includes("INVITE_HASH_EXPIRED")) {
throw new Error(`Failed to join via invite link: ${msg}`);
}
// If already a participant, we need to get chat info from the link
// Try checkChatInviteLink to get the chat id
try {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const checkResult = (await client.invoke({
_: "checkChatInviteLink",
invite_link: linkInfo.link,
})) as any;
if (checkResult.chat_id) {
joinResult = { id: checkResult.chat_id };
} else {
throw joinErr;
}
} catch {
throw joinErr;
}
}
// Get full chat info
const chatId = joinResult?.id ?? joinResult?.chat_id;
if (!chatId) {
throw new Error("Joined channel but could not determine chat ID.");
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const chat = (await client.invoke({ _: "getChat", chat_id: chatId })) as any;
let type: string = "other";
let isForum = false;
if (chat.type?._ === "chatTypeSupergroup") {
try {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const sg = (await client.invoke({
_: "getSupergroup",
supergroup_id: chat.type.supergroup_id,
})) as any;
type = sg.is_channel ? "channel" : "supergroup";
isForum = sg.is_forum ?? false;
} catch {
type = "supergroup";
}
} else if (chat.type?._ === "chatTypeBasicGroup") {
type = "group";
}
if (type !== "channel" && type !== "supergroup") {
throw new Error(`The joined chat is a ${type}, not a channel or group. Only channels and supergroups are supported.`);
}
chatInfo = { chatId: BigInt(chatId), title: chat.title ?? "Unknown", type, isForum };
}
// Upsert channel in DB (active as source by default since user explicitly added it)
const channel = await upsertChannel({
telegramId: chatInfo.chatId,
title: chatInfo.title,
type: "SOURCE",
isForum: chatInfo.isForum,
isActive: true,
});
// Link the account as READER
await ensureAccountChannelLink(account.id, channel.id, "READER");
log.info(
{ channelId: channel.id, telegramId: chatInfo.chatId.toString(), title: chatInfo.title },
"Channel joined and added"
);
await updateFetchRequestStatus(requestId!, "COMPLETED", {
resultJson: JSON.stringify({
channelId: channel.id,
telegramId: chatInfo.chatId.toString(),
title: chatInfo.title,
type: chatInfo.type,
isForum: chatInfo.isForum,
}),
});
} finally {
await closeTdlibClient(client);
}
});
} catch (err) {
log.error({ err, payload }, "Failed to join channel");
if (requestId) {
try {
await updateFetchRequestStatus(requestId, "FAILED", {
error: err instanceof Error ? err.message : String(err),
});
} catch {
// Best-effort
}
}
}
});
}
// ── Archive extract handler ──
function handleArchiveExtract(requestId: string): void {
fetchQueue = fetchQueue.then(async () => {
try {
log.info({ requestId }, "Archive extract request received");
await processExtractRequest(requestId);
} catch (err) {
log.error({ err, requestId }, "Failed to process archive extract request");
}
});
}
// ── Ingestion trigger handler ──
function handleIngestionTrigger(): void {
@@ -272,3 +497,17 @@ function handleIngestionTrigger(): void {
}
});
}
// ── Package database rebuild handler ──
function handleRebuildPackages(requestId: string): void {
fetchQueue = fetchQueue.then(async () => {
try {
await withTdlibMutex("rebuild-packages", () =>
rebuildPackageDatabase(requestId)
);
} catch (err) {
log.error({ err, requestId }, "Failed to rebuild package database");
}
});
}

View File

@@ -3,6 +3,7 @@ import { config } from "./util/config.js";
import { logger } from "./util/logger.js";
import { markStaleRunsAsFailed } from "./db/queries.js";
import { cleanupTempDir } from "./worker.js";
import { recoverIncompleteUploads } from "./recovery.js";
import { startScheduler, stopScheduler } from "./scheduler.js";
import { startFetchListener, stopFetchListener } from "./fetch-listener.js";
import { db, pool } from "./db/client.js";
@@ -26,6 +27,10 @@ async function main(): Promise<void> {
await cleanupTempDir();
await markStaleRunsAsFailed();
// Verify destination messages exist for all "uploaded" packages.
// Resets any packages whose dest message is missing so they get re-processed.
await recoverIncompleteUploads();
// Start the fetch listener (pg_notify for on-demand channel fetching)
await startFetchListener();

View File

@@ -0,0 +1,111 @@
import { execFile } from "child_process";
import { promisify } from "util";
import { childLogger } from "../util/logger.js";
import type { FileEntry } from "../archive/zip-reader.js";
const execFileAsync = promisify(execFile);
const log = childLogger("preview-extract");
/** Max bytes we'll accept for an extracted preview image (2MB). */
const MAX_PREVIEW_BYTES = 2 * 1024 * 1024;
/** Image extensions we consider valid previews, in priority order. */
const IMAGE_EXTENSIONS = new Set(["jpg", "jpeg", "png"]);
/**
* Pick the best preview image from the file entries list.
*
* Prefers files that look like dedicated preview images (01.jpg, insta.jpg,
* preview.jpg) over arbitrary images buried in subdirectories.
* Skips images that are suspiciously large (>2MB uncompressed).
*/
export function pickPreviewFile(entries: FileEntry[]): FileEntry | null {
const candidates = entries.filter((e) => {
if (!e.extension || !IMAGE_EXTENSIONS.has(e.extension.toLowerCase())) return false;
// Skip very large images — they're probably textures, not previews
if (e.uncompressedSize > BigInt(MAX_PREVIEW_BYTES)) return false;
return true;
});
if (candidates.length === 0) return null;
// Score candidates: lower depth + preview-like names win
const scored = candidates.map((entry) => {
const depth = entry.path.split("/").length - 1;
const nameLower = entry.fileName.toLowerCase();
let nameScore = 10; // default
// Known preview-like names get priority
if (/^(preview|thumb|cover|insta)\b/i.test(nameLower)) {
nameScore = 0;
} else if (/^0*[1-2]\.(jpe?g|png)$/i.test(nameLower)) {
// 01.jpg, 1.jpg, 02.jpg — common preview filenames
nameScore = 1;
} else if (/^0*[3-9]\.(jpe?g|png)$/i.test(nameLower)) {
nameScore = 2;
}
return { entry, score: nameScore + depth };
});
scored.sort((a, b) => a.score - b.score);
return scored[0].entry;
}
/**
* Extract a single file from an archive and return its contents as a Buffer.
*
* Uses the appropriate CLI tool based on archive type:
* - ZIP: unzip -p
* - RAR: unrar p -inul
* - 7Z: 7z e -so
*/
export async function extractPreviewImage(
archivePath: string,
archiveType: "ZIP" | "RAR" | "SEVEN_Z" | "DOCUMENT",
filePath: string
): Promise<Buffer | null> {
if (archiveType === "DOCUMENT") return null;
try {
let stdout: Buffer;
if (archiveType === "ZIP") {
const result = await execFileAsync("unzip", ["-p", archivePath, filePath], {
timeout: 15000,
maxBuffer: MAX_PREVIEW_BYTES,
encoding: "buffer",
});
stdout = result.stdout as unknown as Buffer;
} else if (archiveType === "RAR") {
const result = await execFileAsync("unrar", ["p", "-inul", archivePath, filePath], {
timeout: 15000,
maxBuffer: MAX_PREVIEW_BYTES,
encoding: "buffer",
});
stdout = result.stdout as unknown as Buffer;
} else {
// SEVEN_Z
const result = await execFileAsync("7z", ["e", "-so", archivePath, filePath], {
timeout: 15000,
maxBuffer: MAX_PREVIEW_BYTES,
encoding: "buffer",
});
stdout = result.stdout as unknown as Buffer;
}
if (stdout.length === 0) {
log.warn({ archivePath, filePath }, "Extracted preview image is empty");
return null;
}
log.debug(
{ archivePath, filePath, bytes: stdout.length },
"Extracted preview image from archive"
);
return stdout;
} catch (err) {
log.warn({ err, archivePath, filePath }, "Failed to extract preview image from archive");
return null;
}
}

411
worker/src/rebuild.ts Normal file
View File

@@ -0,0 +1,411 @@
import type { Client } from "tdl";
import { config } from "./util/config.js";
import { childLogger } from "./util/logger.js";
import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
import { invokeWithTimeout, MAX_SCAN_PAGES } from "./tdlib/download.js";
import { isArchiveAttachment } from "./archive/detect.js";
import { extractCreatorFromFileName } from "./archive/creator.js";
import { groupArchiveSets } from "./archive/multipart.js";
import type { TelegramMessage } from "./archive/multipart.js";
import {
getActiveAccounts,
getGlobalDestinationChannel,
} from "./db/queries.js";
import { db } from "./db/client.js";
const log = childLogger("rebuild");
export interface RebuildProgress {
status: "PENDING" | "IN_PROGRESS" | "COMPLETED" | "FAILED";
messagesScanned: number;
documentsFound: number;
packagesCreated: number;
packagesSkipped: number;
error?: string;
}
/**
* Scan the destination channel for uploaded archive files and rebuild
* the package database from what's actually there.
*
* Uses searchChatMessages (not getChatHistory) because the destination
* channel may be a hidden-history supergroup.
*
* For each document found:
* 1. Check if a Package record with that destMessageId already exists -> skip
* 2. Try to match by fileName to an existing package without destMessageId -> update it
* 3. Otherwise create a minimal Package record (no file listing, no content hash)
*
* This is a "best-effort" rebuild. It restores the mapping between destination
* messages and package records so that the bot can deliver files. It does NOT
* re-download archives or rebuild file listings (those require the source channel).
*/
export async function rebuildPackageDatabase(
requestId: string
): Promise<void> {
log.info({ requestId }, "Starting package database rebuild");
try {
await db.channelFetchRequest.update({
where: { id: requestId },
data: { status: "IN_PROGRESS" },
});
// Get an authenticated account for TDLib
const accounts = await getActiveAccounts();
if (accounts.length === 0) {
throw new Error("No authenticated accounts available");
}
const destChannel = await getGlobalDestinationChannel();
if (!destChannel) {
throw new Error("No destination channel configured");
}
const account = accounts[0];
const client = await createTdlibClient({
id: account.id,
phone: account.phone,
});
try {
const progress: RebuildProgress = {
status: "IN_PROGRESS",
messagesScanned: 0,
documentsFound: 0,
packagesCreated: 0,
packagesSkipped: 0,
};
// Write initial progress
await updateRebuildProgress(requestId, progress);
// Scan the destination channel for all document messages
const archiveMessages = await scanDestinationChannel(
client,
destChannel.telegramId,
async (scanned) => {
progress.messagesScanned = scanned;
await updateRebuildProgress(requestId, progress);
}
);
progress.documentsFound = archiveMessages.length;
await updateRebuildProgress(requestId, progress);
log.info(
{
messagesScanned: progress.messagesScanned,
documentsFound: archiveMessages.length,
},
"Destination channel scan complete"
);
// Group into archive sets (handles multipart)
const archiveSets = groupArchiveSets(archiveMessages);
log.info(
{ archiveSets: archiveSets.length, totalMessages: archiveMessages.length },
"Grouped into archive sets"
);
// Get ALL source channels so we can try to match
const sourceChannels = await db.telegramChannel.findMany({
where: { type: "SOURCE" },
select: { id: true, title: true },
});
// Use the first source channel as a fallback for unmatched packages
const fallbackSourceId = sourceChannels[0]?.id ?? null;
// Process each archive set
for (const archiveSet of archiveSets) {
const firstPart = archiveSet.parts[0];
const fileName = firstPart.fileName;
const destMessageId = firstPart.id;
const totalSize = archiveSet.parts.reduce(
(sum, p) => sum + p.fileSize,
0n
);
// 1. Check if a package with this destMessageId already exists
const existingByDest = await db.package.findFirst({
where: {
destChannelId: destChannel.id,
destMessageId,
},
select: { id: true },
});
if (existingByDest) {
progress.packagesSkipped++;
await updateRebuildProgress(requestId, progress);
continue;
}
// 2. Try to match by fileName to an existing package without destMessageId
const existingByName = await db.package.findFirst({
where: {
fileName,
destMessageId: null,
},
select: { id: true },
});
if (existingByName) {
// Update existing record with destination info
await db.package.update({
where: { id: existingByName.id },
data: {
destChannelId: destChannel.id,
destMessageId,
isMultipart: archiveSet.parts.length > 1,
partCount: archiveSet.parts.length,
},
});
progress.packagesCreated++;
log.debug({ fileName, destMessageId: Number(destMessageId) }, "Updated existing package with dest info");
await updateRebuildProgress(requestId, progress);
continue;
}
// 3. Create a new minimal Package record
// We don't have the source message or content hash, so generate a placeholder hash
const placeholderHash = `rebuild:${destChannel.id}:${destMessageId}`;
const creator = extractCreatorFromFileName(fileName) ?? null;
const archiveType = archiveSet.type;
// We need a sourceChannelId (required FK). Use fallback if available.
if (!fallbackSourceId) {
log.warn(
{ fileName },
"No source channels exist — cannot create package record without a source channel"
);
progress.packagesSkipped++;
await updateRebuildProgress(requestId, progress);
continue;
}
try {
await db.package.create({
data: {
contentHash: placeholderHash,
fileName,
fileSize: totalSize,
archiveType,
sourceChannelId: fallbackSourceId,
sourceMessageId: 0n, // Unknown — rebuilt from destination
destChannelId: destChannel.id,
destMessageId,
isMultipart: archiveSet.parts.length > 1,
partCount: archiveSet.parts.length,
fileCount: 0,
creator,
},
});
progress.packagesCreated++;
log.debug(
{ fileName, destMessageId: Number(destMessageId), creator },
"Created new package from destination"
);
} catch (err) {
// Unique constraint on contentHash — might be a race or duplicate
if (err instanceof Error && err.message.includes("Unique constraint")) {
log.debug({ fileName, placeholderHash }, "Package already exists (hash conflict), skipping");
progress.packagesSkipped++;
} else {
throw err;
}
}
await updateRebuildProgress(requestId, progress);
}
// Done
progress.status = "COMPLETED";
await updateRebuildProgress(requestId, progress);
await db.channelFetchRequest.update({
where: { id: requestId },
data: {
status: "COMPLETED",
resultJson: JSON.stringify(progress),
},
});
log.info(
{
messagesScanned: progress.messagesScanned,
documentsFound: progress.documentsFound,
packagesCreated: progress.packagesCreated,
packagesSkipped: progress.packagesSkipped,
},
"Package database rebuild complete"
);
} finally {
await closeTdlibClient(client);
}
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
log.error({ err, requestId }, "Package database rebuild failed");
await db.channelFetchRequest.update({
where: { id: requestId },
data: {
status: "FAILED",
error: message,
resultJson: JSON.stringify({
status: "FAILED",
error: message,
}),
},
});
}
}
/**
* Scan the destination channel for document messages using searchChatMessages.
* Returns archive messages in chronological order (oldest first).
*/
async function scanDestinationChannel(
client: Client,
chatId: bigint,
onProgress?: (messagesScanned: number) => Promise<void>
): Promise<TelegramMessage[]> {
const archives: TelegramMessage[] = [];
let currentFromId = 0;
let totalScanned = 0;
let pageCount = 0;
let lastProgressUpdate = 0;
// eslint-disable-next-line no-constant-condition
while (true) {
if (pageCount >= MAX_SCAN_PAGES) {
log.warn(
{ chatId: chatId.toString(), pageCount, totalScanned },
"Hit max page limit for destination scan, stopping"
);
break;
}
pageCount++;
const previousFromId = currentFromId;
const result = await invokeWithTimeout<{
messages?: {
id: number;
date: number;
content: {
_: string;
document?: {
file_name?: string;
document?: {
id: number;
size: number;
};
};
};
}[];
}>(client, {
_: "searchChatMessages",
chat_id: Number(chatId),
query: "",
from_message_id: currentFromId,
offset: 0,
limit: 100,
filter: { _: "searchMessagesFilterDocument" },
sender_id: null,
message_thread_id: 0,
saved_messages_topic_id: 0,
});
if (!result.messages || result.messages.length === 0) break;
totalScanned += result.messages.length;
for (const msg of result.messages) {
const doc = msg.content?.document;
if (doc?.file_name && doc.document && isArchiveAttachment(doc.file_name)) {
archives.push({
id: BigInt(msg.id),
fileName: doc.file_name,
fileId: String(doc.document.id),
fileSize: BigInt(doc.document.size),
date: new Date(msg.date * 1000),
});
}
}
// Throttle progress updates to every 2 seconds
const now = Date.now();
if (onProgress && now - lastProgressUpdate >= 2000) {
lastProgressUpdate = now;
await onProgress(totalScanned);
}
currentFromId = result.messages[result.messages.length - 1].id;
// Stuck detection
if (currentFromId === previousFromId) {
log.warn(
{ chatId: chatId.toString(), currentFromId, totalScanned },
"Pagination stuck, breaking"
);
break;
}
if (result.messages.length < 100) break;
await sleep(config.apiDelayMs);
}
// Final progress update
if (onProgress) {
await onProgress(totalScanned);
}
log.info(
{
chatId: chatId.toString(),
archives: archives.length,
totalScanned,
pages: pageCount,
},
"Destination channel scan complete"
);
// Reverse to chronological order (oldest first)
return archives.reverse();
}
/**
* Update the rebuild progress in the fetch request's resultJson field.
* Throttled to avoid excessive DB writes.
*/
let lastUpdateTime = 0;
async function updateRebuildProgress(
requestId: string,
progress: RebuildProgress
): Promise<void> {
const now = Date.now();
// Throttle to every 2 seconds, but always write for status changes
if (
progress.status !== "IN_PROGRESS" ||
now - lastUpdateTime >= 2000
) {
lastUpdateTime = now;
try {
await db.channelFetchRequest.update({
where: { id: requestId },
data: {
resultJson: JSON.stringify(progress),
},
});
} catch {
// Best-effort
}
}
}
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}

187
worker/src/recovery.ts Normal file
View File

@@ -0,0 +1,187 @@
import { childLogger } from "./util/logger.js";
import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
import { withFloodWait } from "./util/retry.js";
import {
getActiveAccounts,
getPackagesWithDestMessage,
resetPackageDestination,
getGlobalDestinationChannel,
} from "./db/queries.js";
import type { Client } from "tdl";
const log = childLogger("recovery");
/**
* Verify that destination messages still exist in Telegram for all
* packages that claim to be uploaded. If a message is missing (deleted
* or never actually committed), reset the package so the next ingestion
* run will re-download and re-upload it.
*
* This handles the case where the worker crashed mid-upload: TDLib may
* have returned a temporary message ID that was stored as destMessageId
* but the upload never completed server-side, or the message was later
* deleted from the destination channel.
*
* Called once on worker startup, before the scheduler begins.
*/
export async function recoverIncompleteUploads(): Promise<void> {
const packages = await getPackagesWithDestMessage();
if (packages.length === 0) {
log.debug("No packages with destination messages to verify");
return;
}
// We need a TDLib client to verify messages. Use the first active account.
const accounts = await getActiveAccounts();
if (accounts.length === 0) {
log.info("No active accounts available for upload verification, skipping recovery");
return;
}
const destChannel = await getGlobalDestinationChannel();
if (!destChannel) {
log.info("No destination channel configured, skipping recovery");
return;
}
// Group packages by destChannelId for efficient verification
const byChannel = new Map<string, typeof packages>();
for (const pkg of packages) {
const channelId = pkg.destChannelId!;
if (!byChannel.has(channelId)) {
byChannel.set(channelId, []);
}
byChannel.get(channelId)!.push(pkg);
}
log.info(
{ totalPackages: packages.length, channels: byChannel.size },
"Verifying destination messages exist in Telegram"
);
const account = accounts[0];
let client: Client | undefined;
try {
client = await createTdlibClient({ id: account.id, phone: account.phone });
// Load the chat list so TDLib can resolve chat IDs
try {
await client.invoke({
_: "getChats",
chat_list: { _: "chatListMain" },
limit: 1000,
});
} catch {
// May already be loaded
}
let resetCount = 0;
let verifiedCount = 0;
for (const [, channelPackages] of byChannel) {
for (const pkg of channelPackages) {
const exists = await verifyMessageExists(
client,
destChannel.telegramId,
pkg.destMessageId!
);
if (exists) {
verifiedCount++;
} else {
log.warn(
{
packageId: pkg.id,
fileName: pkg.fileName,
destMessageId: Number(pkg.destMessageId),
},
"Destination message missing in Telegram, resetting package for re-upload"
);
await resetPackageDestination(pkg.id);
resetCount++;
}
}
}
if (resetCount > 0) {
log.info(
{ resetCount, verifiedCount, totalChecked: packages.length },
"Upload recovery complete — packages reset for re-processing"
);
} else {
log.info(
{ verifiedCount, totalChecked: packages.length },
"Upload recovery complete — all destination messages verified"
);
}
} catch (err) {
log.error({ err }, "Upload recovery failed (non-fatal, will retry next startup)");
} finally {
if (client) {
await closeTdlibClient(client);
}
}
}
/**
* Check whether a message exists in a Telegram chat.
* Returns false if the message was deleted or never existed.
*/
async function verifyMessageExists(
client: Client,
chatTelegramId: bigint,
messageId: bigint
): Promise<boolean> {
try {
const result = await withFloodWait(
() =>
client.invoke({
_: "getMessage",
chat_id: Number(chatTelegramId),
message_id: Number(messageId),
}),
"getMessage:verify"
);
// TDLib returns the message object if it exists.
// A deleted message may return with content type "messageChatDeleteMessage"
// or the call may throw. Check that we got a real message with content.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const msg = result as any;
if (!msg || !msg.content) {
return false;
}
// Check that the message has document content (our uploads are documents)
// A message that exists but has no document content was likely cleared/replaced
if (msg.content._ !== "messageDocument") {
log.debug(
{
messageId: Number(messageId),
contentType: msg.content._,
},
"Destination message exists but is not a document"
);
return false;
}
return true;
} catch (err) {
// TDLib throws "Message not found" (error code 404) for deleted messages
const message = err instanceof Error ? err.message : String(err);
const code = (err as { code?: number })?.code;
if (code === 404 || message.includes("not found") || message.includes("Not Found")) {
return false;
}
// For other errors (network issues, etc.), assume the message exists
// to avoid incorrectly resetting packages due to transient failures
log.warn(
{ err, messageId: Number(messageId) },
"Could not verify message (assuming it exists)"
);
return true;
}
}

View File

@@ -176,6 +176,63 @@ export async function joinChatByInviteLink(
log.info({ inviteLink }, "Joined chat by invite link");
}
/**
* Search for a public chat by username.
* Returns the chat info if found, or null if not found.
*/
export async function searchPublicChat(
client: Client,
username: string
): Promise<TelegramChatInfo | null> {
try {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const chat = (await withFloodWait(
() => client.invoke({
_: "searchPublicChat",
username,
}),
"searchPublicChat"
)) as any;
const chatType = chat.type?._;
let type: TelegramChatInfo["type"] = "other";
let isForum = false;
if (chatType === "chatTypeSupergroup") {
try {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const sg = (await withFloodWait(
() => client.invoke({
_: "getSupergroup",
supergroup_id: chat.type.supergroup_id,
}),
"getSupergroup"
)) as any;
type = sg.is_channel ? "channel" : "supergroup";
isForum = sg.is_forum ?? false;
} catch {
type = "supergroup";
}
} else if (chatType === "chatTypeBasicGroup") {
type = "group";
} else if (chatType === "chatTypePrivate" || chatType === "chatTypeSecret") {
type = "private";
}
log.info({ username, chatId: chat.id, type }, "Found public chat");
return {
chatId: BigInt(chat.id),
title: chat.title ?? username,
type,
isForum,
};
} catch (err) {
log.warn({ username, err }, "Public chat not found");
return null;
}
}
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}

View File

@@ -34,6 +34,7 @@ import { getChannelMessages, downloadFile, downloadPhotoThumbnail } from "./tdli
import type { DownloadProgress, ChannelScanResult } from "./tdlib/download.js";
import { isChatForum, getForumTopicList, getTopicMessages } from "./tdlib/topics.js";
import { matchPreviewToArchive } from "./preview/match.js";
import { pickPreviewFile, extractPreviewImage } from "./preview/extract.js";
import { groupArchiveSets } from "./archive/multipart.js";
import type { ArchiveSet } from "./archive/multipart.js";
import { extractCreatorFromFileName, extractCreatorFromChannelTitle } from "./archive/creator.js";
@@ -971,6 +972,23 @@ async function processOneArchiveSet(
previewMsgId = matchedPhoto.id;
}
// ── Fallback: extract preview image from inside the archive ──
if (!previewData && entries.length > 0 && archiveSet.type !== "DOCUMENT") {
const previewEntry = pickPreviewFile(entries);
if (previewEntry) {
accountLog.debug(
{ fileName: archiveName, previewFile: previewEntry.path },
"Attempting to extract preview image from archive"
);
const archiveTypeForExtract = archiveSet.type === "7Z" ? "SEVEN_Z" as const : archiveSet.type as "ZIP" | "RAR";
previewData = await extractPreviewImage(
tempPaths[0],
archiveTypeForExtract,
previewEntry.path
);
}
}
// ── Resolve creator: topic name > filename extraction > channel title > null ──
const creator = topicCreator
?? extractCreatorFromFileName(archiveName)