mirror of
https://github.com/xCyanGrizzly/DragonsStash.git
synced 2026-05-10 22:01:16 +00:00
feat: add preview management, channel controls, invite polish, and recovery
- Auto-extract preview images from ZIP/RAR/7z archives during ingestion - Upload custom preview images via package drawer - Select preview from archive contents with on-demand extraction UI - Manually add Telegram channels by t.me link, username, or invite link - Invite code UX: bulk create, copy link, usage tracking, delete confirm - Incomplete upload recovery: verify dest messages on worker startup - Rebuild package DB by scanning destination channel with live progress Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3,7 +3,7 @@ FROM node:20-bookworm-slim AS deps
|
||||
|
||||
RUN sed -i 's/^Components: main$/Components: main non-free/' /etc/apt/sources.list.d/debian.sources && \
|
||||
apt-get update && apt-get install -y \
|
||||
libssl-dev zlib1g-dev unrar p7zip-full \
|
||||
libssl-dev zlib1g-dev unzip unrar p7zip-full \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
@@ -26,7 +26,7 @@ FROM node:20-bookworm-slim AS runner
|
||||
|
||||
RUN sed -i 's/^Components: main$/Components: main non-free/' /etc/apt/sources.list.d/debian.sources && \
|
||||
apt-get update && apt-get install -y \
|
||||
libssl3 zlib1g unrar p7zip-full \
|
||||
libssl3 zlib1g unzip unrar p7zip-full \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
33
worker/src/archive/extract-image.ts
Normal file
33
worker/src/archive/extract-image.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
import path from "path";
|
||||
|
||||
const IMAGE_EXTENSIONS = new Set(["jpg", "jpeg", "png", "webp", "gif", "bmp"]);
|
||||
|
||||
/**
|
||||
* Check if a file path within an archive is an image.
|
||||
*/
|
||||
export function isImageFile(filePath: string): boolean {
|
||||
const ext = path.extname(filePath).toLowerCase().slice(1);
|
||||
return IMAGE_EXTENSIONS.has(ext);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the MIME type for an image file extension.
|
||||
*/
|
||||
export function getImageMimeType(filePath: string): string {
|
||||
const ext = path.extname(filePath).toLowerCase().slice(1);
|
||||
switch (ext) {
|
||||
case "jpg":
|
||||
case "jpeg":
|
||||
return "image/jpeg";
|
||||
case "png":
|
||||
return "image/png";
|
||||
case "webp":
|
||||
return "image/webp";
|
||||
case "gif":
|
||||
return "image/gif";
|
||||
case "bmp":
|
||||
return "image/bmp";
|
||||
default:
|
||||
return "application/octet-stream";
|
||||
}
|
||||
}
|
||||
@@ -438,3 +438,35 @@ export async function getExistingChannelsByTelegramId(): Promise<Map<string, str
|
||||
export async function getAccountById(accountId: string) {
|
||||
return db.telegramAccount.findUnique({ where: { id: accountId } });
|
||||
}
|
||||
|
||||
/**
|
||||
* Find packages that have a destMessageId set (appear uploaded) but may
|
||||
* reference messages that no longer exist in Telegram. These need
|
||||
* verification on startup.
|
||||
*
|
||||
* Groups by destChannelId so the caller can batch-verify per channel.
|
||||
*/
|
||||
export async function getPackagesWithDestMessage() {
|
||||
return db.package.findMany({
|
||||
where: { destMessageId: { not: null }, destChannelId: { not: null } },
|
||||
select: {
|
||||
id: true,
|
||||
fileName: true,
|
||||
contentHash: true,
|
||||
destChannelId: true,
|
||||
destMessageId: true,
|
||||
sourceChannel: { select: { telegramId: true } },
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset a package's destination fields so it will be re-processed
|
||||
* on the next ingestion run (treated as not-yet-uploaded).
|
||||
*/
|
||||
export async function resetPackageDestination(packageId: string) {
|
||||
return db.package.update({
|
||||
where: { id: packageId },
|
||||
data: { destChannelId: null, destMessageId: null },
|
||||
});
|
||||
}
|
||||
|
||||
217
worker/src/extract-listener.ts
Normal file
217
worker/src/extract-listener.ts
Normal file
@@ -0,0 +1,217 @@
|
||||
import path from "path";
|
||||
import { mkdir, rm } from "fs/promises";
|
||||
import { db } from "./db/client.js";
|
||||
import { config } from "./util/config.js";
|
||||
import { childLogger } from "./util/logger.js";
|
||||
import { withTdlibMutex } from "./util/mutex.js";
|
||||
import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
|
||||
import { downloadFile } from "./tdlib/download.js";
|
||||
import { getActiveAccounts } from "./db/queries.js";
|
||||
import { extractPreviewImage } from "./preview/extract.js";
|
||||
import { getImageMimeType } from "./archive/extract-image.js";
|
||||
|
||||
const log = childLogger("extract-listener");
|
||||
|
||||
/**
|
||||
* Process a single archive extract request.
|
||||
* Downloads the archive from Telegram (dest channel), extracts the
|
||||
* requested image file, and writes the result to the DB.
|
||||
*/
|
||||
export async function processExtractRequest(requestId: string): Promise<void> {
|
||||
const request = await db.archiveExtractRequest.findUnique({
|
||||
where: { id: requestId },
|
||||
include: {
|
||||
package: {
|
||||
select: {
|
||||
id: true,
|
||||
fileName: true,
|
||||
fileSize: true,
|
||||
archiveType: true,
|
||||
destChannelId: true,
|
||||
destMessageId: true,
|
||||
isMultipart: true,
|
||||
partCount: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
if (!request || request.status !== "PENDING") {
|
||||
log.debug({ requestId }, "Extract request not found or not pending");
|
||||
return;
|
||||
}
|
||||
|
||||
const pkg = request.package;
|
||||
if (!pkg.destChannelId || !pkg.destMessageId) {
|
||||
await db.archiveExtractRequest.update({
|
||||
where: { id: requestId },
|
||||
data: { status: "FAILED", error: "Package has no destination upload" },
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Multipart archives require downloading and reassembling all parts,
|
||||
// which is too complex for on-demand extraction. Reject early.
|
||||
if (pkg.isMultipart && pkg.partCount > 1) {
|
||||
await db.archiveExtractRequest.update({
|
||||
where: { id: requestId },
|
||||
data: { status: "FAILED", error: "Image extraction is not supported for multipart archives" },
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for a cached result first: if another request for the same
|
||||
// package+filePath already completed, reuse its data.
|
||||
const cached = await db.archiveExtractRequest.findFirst({
|
||||
where: {
|
||||
packageId: pkg.id,
|
||||
filePath: request.filePath,
|
||||
status: "COMPLETED",
|
||||
imageData: { not: null },
|
||||
id: { not: requestId },
|
||||
},
|
||||
select: { imageData: true, contentType: true },
|
||||
});
|
||||
|
||||
if (cached?.imageData) {
|
||||
log.info({ requestId, filePath: request.filePath }, "Reusing cached extraction result");
|
||||
await db.archiveExtractRequest.update({
|
||||
where: { id: requestId },
|
||||
data: {
|
||||
status: "COMPLETED",
|
||||
imageData: cached.imageData,
|
||||
contentType: cached.contentType,
|
||||
},
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
await db.archiveExtractRequest.update({
|
||||
where: { id: requestId },
|
||||
data: { status: "IN_PROGRESS" },
|
||||
});
|
||||
|
||||
log.info(
|
||||
{ requestId, packageId: pkg.id, filePath: request.filePath, archiveType: pkg.archiveType },
|
||||
"Processing extract request"
|
||||
);
|
||||
|
||||
const tempDir = path.join(config.tempDir, `extract_${requestId}`);
|
||||
|
||||
try {
|
||||
await mkdir(tempDir, { recursive: true });
|
||||
|
||||
// Wrap the entire TDLib session in the mutex so no other TDLib
|
||||
// operation can run concurrently (TDLib is single-session).
|
||||
await withTdlibMutex("extract", async () => {
|
||||
const accounts = await getActiveAccounts();
|
||||
if (accounts.length === 0) {
|
||||
throw new Error("No authenticated Telegram accounts available");
|
||||
}
|
||||
|
||||
const account = accounts[0];
|
||||
const client = await createTdlibClient({ id: account.id, phone: account.phone });
|
||||
|
||||
try {
|
||||
// Load chat list so TDLib can find the dest channel
|
||||
try {
|
||||
await client.invoke({
|
||||
_: "getChats",
|
||||
chat_list: { _: "chatListMain" },
|
||||
limit: 1000,
|
||||
});
|
||||
} catch {
|
||||
// May already be loaded
|
||||
}
|
||||
|
||||
// Get the dest channel telegram ID
|
||||
const destChannel = await db.telegramChannel.findUnique({
|
||||
where: { id: pkg.destChannelId! },
|
||||
select: { telegramId: true },
|
||||
});
|
||||
|
||||
if (!destChannel) {
|
||||
throw new Error("Destination channel not found in DB");
|
||||
}
|
||||
|
||||
const chatId = Number(destChannel.telegramId);
|
||||
const messageId = Number(pkg.destMessageId);
|
||||
|
||||
// Get the file_id from the destination message
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const message = await client.invoke({
|
||||
_: "getMessage",
|
||||
chat_id: chatId,
|
||||
message_id: messageId,
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
}) as any;
|
||||
|
||||
const doc = message?.content?.document;
|
||||
if (!doc?.document?.id) {
|
||||
throw new Error("Could not find document in destination message");
|
||||
}
|
||||
|
||||
const fileId = String(doc.document.id);
|
||||
const fileName = doc.file_name || pkg.fileName;
|
||||
const archivePath = path.join(tempDir, fileName);
|
||||
|
||||
log.info(
|
||||
{ requestId, fileName, fileId, chatId, messageId },
|
||||
"Downloading archive for extraction"
|
||||
);
|
||||
|
||||
await downloadFile(
|
||||
client,
|
||||
fileId,
|
||||
archivePath,
|
||||
pkg.fileSize,
|
||||
fileName
|
||||
);
|
||||
|
||||
// Extract the requested image using the existing CLI-based extractor.
|
||||
// This pipes the file to stdout (no temp files needed for the extracted image).
|
||||
const imageData = await extractPreviewImage(
|
||||
archivePath,
|
||||
pkg.archiveType as "ZIP" | "RAR" | "SEVEN_Z" | "DOCUMENT",
|
||||
request.filePath
|
||||
);
|
||||
|
||||
if (!imageData) {
|
||||
throw new Error(`Could not extract "${request.filePath}" from archive`);
|
||||
}
|
||||
|
||||
// Cap at 5MB for safety
|
||||
if (imageData.length > 5 * 1024 * 1024) {
|
||||
throw new Error(`Extracted image is too large (${(imageData.length / 1024 / 1024).toFixed(1)}MB)`);
|
||||
}
|
||||
|
||||
const contentType = getImageMimeType(request.filePath);
|
||||
|
||||
await db.archiveExtractRequest.update({
|
||||
where: { id: requestId },
|
||||
data: {
|
||||
status: "COMPLETED",
|
||||
imageData: new Uint8Array(imageData),
|
||||
contentType,
|
||||
},
|
||||
});
|
||||
|
||||
log.info(
|
||||
{ requestId, filePath: request.filePath, bytes: imageData.length },
|
||||
"Image extracted successfully"
|
||||
);
|
||||
} finally {
|
||||
await closeTdlibClient(client).catch(() => {});
|
||||
}
|
||||
});
|
||||
} catch (err) {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
log.error({ err, requestId }, "Extract request failed");
|
||||
await db.archiveExtractRequest.update({
|
||||
where: { id: requestId },
|
||||
data: { status: "FAILED", error: errMsg },
|
||||
}).catch(() => {});
|
||||
} finally {
|
||||
await rm(tempDir, { recursive: true, force: true }).catch(() => {});
|
||||
}
|
||||
}
|
||||
@@ -3,7 +3,9 @@ import { pool } from "./db/client.js";
|
||||
import { childLogger } from "./util/logger.js";
|
||||
import { withTdlibMutex } from "./util/mutex.js";
|
||||
import { processFetchRequest } from "./worker.js";
|
||||
import { generateInviteLink, createSupergroup } from "./tdlib/chats.js";
|
||||
import { processExtractRequest } from "./extract-listener.js";
|
||||
import { rebuildPackageDatabase } from "./rebuild.js";
|
||||
import { generateInviteLink, createSupergroup, searchPublicChat } from "./tdlib/chats.js";
|
||||
import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
|
||||
import { triggerImmediateCycle } from "./scheduler.js";
|
||||
import {
|
||||
@@ -13,6 +15,7 @@ import {
|
||||
getActiveAccounts,
|
||||
upsertChannel,
|
||||
ensureAccountChannelLink,
|
||||
updateFetchRequestStatus,
|
||||
} from "./db/queries.js";
|
||||
|
||||
const log = childLogger("fetch-listener");
|
||||
@@ -31,6 +34,8 @@ const RECONNECT_DELAY_MS = 5_000;
|
||||
* - `generate_invite` — payload = channelId → generate invite link for destination
|
||||
* - `create_destination` — payload = JSON { requestId, title } → create supergroup via TDLib
|
||||
* - `ingestion_trigger` — trigger an immediate ingestion cycle
|
||||
* - `join_channel` — payload = JSON { requestId, input, accountId } → join/lookup channel by link/username
|
||||
* - `rebuild_packages` — payload = requestId → rebuild package DB from destination channel
|
||||
*
|
||||
* If the underlying connection is lost, the listener automatically reconnects
|
||||
* so that pg_notify signals are never silently dropped.
|
||||
@@ -47,6 +52,9 @@ async function connectListener(): Promise<void> {
|
||||
await pgClient.query("LISTEN generate_invite");
|
||||
await pgClient.query("LISTEN create_destination");
|
||||
await pgClient.query("LISTEN ingestion_trigger");
|
||||
await pgClient.query("LISTEN join_channel");
|
||||
await pgClient.query("LISTEN archive_extract");
|
||||
await pgClient.query("LISTEN rebuild_packages");
|
||||
|
||||
pgClient.on("notification", (msg) => {
|
||||
if (msg.channel === "channel_fetch" && msg.payload) {
|
||||
@@ -57,6 +65,12 @@ async function connectListener(): Promise<void> {
|
||||
handleCreateDestination(msg.payload);
|
||||
} else if (msg.channel === "ingestion_trigger") {
|
||||
handleIngestionTrigger();
|
||||
} else if (msg.channel === "join_channel" && msg.payload) {
|
||||
handleJoinChannel(msg.payload);
|
||||
} else if (msg.channel === "archive_extract" && msg.payload) {
|
||||
handleArchiveExtract(msg.payload);
|
||||
} else if (msg.channel === "rebuild_packages" && msg.payload) {
|
||||
handleRebuildPackages(msg.payload);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -82,7 +96,7 @@ async function connectListener(): Promise<void> {
|
||||
}
|
||||
});
|
||||
|
||||
log.info("Fetch listener started (channel_fetch, generate_invite, create_destination, ingestion_trigger)");
|
||||
log.info("Fetch listener started (channel_fetch, generate_invite, create_destination, ingestion_trigger, join_channel, archive_extract, rebuild_packages)");
|
||||
} catch (err) {
|
||||
log.error({ err }, "Failed to start fetch listener — retrying");
|
||||
scheduleReconnect();
|
||||
@@ -260,6 +274,217 @@ function handleCreateDestination(payload: string): void {
|
||||
});
|
||||
}
|
||||
|
||||
// ── Join channel handler ──
|
||||
|
||||
/**
|
||||
* Parse a Telegram link/username into its type and identifier.
|
||||
*
|
||||
* Supported formats:
|
||||
* - @username or username → public chat search
|
||||
* - https://t.me/username → public chat search
|
||||
* - https://t.me/+INVITE_HASH → join by invite link
|
||||
* - https://t.me/joinchat/INVITE_HASH → join by invite link (legacy)
|
||||
*/
|
||||
function parseTelegramInput(input: string): { type: "username"; username: string } | { type: "invite"; link: string } | null {
|
||||
const trimmed = input.trim();
|
||||
|
||||
// Invite link patterns
|
||||
const invitePatterns = [
|
||||
/^https?:\/\/t\.me\/\+([a-zA-Z0-9_-]+)$/,
|
||||
/^https?:\/\/t\.me\/joinchat\/([a-zA-Z0-9_-]+)$/,
|
||||
/^https?:\/\/telegram\.me\/\+([a-zA-Z0-9_-]+)$/,
|
||||
/^https?:\/\/telegram\.me\/joinchat\/([a-zA-Z0-9_-]+)$/,
|
||||
];
|
||||
|
||||
for (const pattern of invitePatterns) {
|
||||
if (pattern.test(trimmed)) {
|
||||
return { type: "invite", link: trimmed };
|
||||
}
|
||||
}
|
||||
|
||||
// Public link: https://t.me/username
|
||||
const publicLinkMatch = trimmed.match(/^https?:\/\/(?:t\.me|telegram\.me)\/([a-zA-Z][a-zA-Z0-9_]{3,31})$/);
|
||||
if (publicLinkMatch) {
|
||||
return { type: "username", username: publicLinkMatch[1] };
|
||||
}
|
||||
|
||||
// @username or bare username
|
||||
const usernameMatch = trimmed.match(/^@?([a-zA-Z][a-zA-Z0-9_]{3,31})$/);
|
||||
if (usernameMatch) {
|
||||
return { type: "username", username: usernameMatch[1] };
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function handleJoinChannel(payload: string): void {
|
||||
fetchQueue = fetchQueue.then(async () => {
|
||||
let requestId: string | undefined;
|
||||
try {
|
||||
const parsed = JSON.parse(payload) as { requestId: string; input: string; accountId: string };
|
||||
requestId = parsed.requestId;
|
||||
|
||||
await withTdlibMutex("join-channel", async () => {
|
||||
await updateFetchRequestStatus(requestId!, "IN_PROGRESS");
|
||||
|
||||
const accounts = await getActiveAccounts();
|
||||
const account = accounts.find((a) => a.id === parsed.accountId) ?? accounts[0];
|
||||
if (!account) {
|
||||
throw new Error("No authenticated accounts available");
|
||||
}
|
||||
|
||||
const client = await createTdlibClient({ id: account.id, phone: account.phone });
|
||||
|
||||
try {
|
||||
const linkInfo = parseTelegramInput(parsed.input);
|
||||
if (!linkInfo) {
|
||||
throw new Error(
|
||||
"Invalid input. Use a t.me link (e.g. https://t.me/channel_name), " +
|
||||
"an invite link (e.g. https://t.me/+abc123), or a @username."
|
||||
);
|
||||
}
|
||||
|
||||
let chatInfo: { chatId: bigint; title: string; type: string; isForum: boolean };
|
||||
|
||||
if (linkInfo.type === "username") {
|
||||
// Public chat: search by username
|
||||
const result = await searchPublicChat(client, linkInfo.username);
|
||||
if (!result) {
|
||||
throw new Error(`Public channel "@${linkInfo.username}" not found. Check the username and try again.`);
|
||||
}
|
||||
if (result.type !== "channel" && result.type !== "supergroup") {
|
||||
throw new Error(`"@${linkInfo.username}" is a ${result.type}, not a channel or group. Only channels and supergroups are supported.`);
|
||||
}
|
||||
chatInfo = { chatId: result.chatId, title: result.title, type: result.type, isForum: result.isForum };
|
||||
} else {
|
||||
// Private/invite link: join first, then get chat info
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
let joinResult: any;
|
||||
try {
|
||||
joinResult = await client.invoke({
|
||||
_: "joinChatByInviteLink",
|
||||
invite_link: linkInfo.link,
|
||||
});
|
||||
} catch (joinErr: unknown) {
|
||||
const msg = joinErr instanceof Error ? joinErr.message : String(joinErr);
|
||||
// "INVITE_REQUEST_SENT" means the chat requires admin approval
|
||||
if (msg.includes("INVITE_REQUEST_SENT")) {
|
||||
throw new Error("Join request sent. An admin of that channel must approve it before it can be added.");
|
||||
}
|
||||
// Already a member is fine
|
||||
if (!msg.includes("USER_ALREADY_PARTICIPANT") && !msg.includes("INVITE_HASH_EXPIRED")) {
|
||||
throw new Error(`Failed to join via invite link: ${msg}`);
|
||||
}
|
||||
// If already a participant, we need to get chat info from the link
|
||||
// Try checkChatInviteLink to get the chat id
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const checkResult = (await client.invoke({
|
||||
_: "checkChatInviteLink",
|
||||
invite_link: linkInfo.link,
|
||||
})) as any;
|
||||
if (checkResult.chat_id) {
|
||||
joinResult = { id: checkResult.chat_id };
|
||||
} else {
|
||||
throw joinErr;
|
||||
}
|
||||
} catch {
|
||||
throw joinErr;
|
||||
}
|
||||
}
|
||||
|
||||
// Get full chat info
|
||||
const chatId = joinResult?.id ?? joinResult?.chat_id;
|
||||
if (!chatId) {
|
||||
throw new Error("Joined channel but could not determine chat ID.");
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const chat = (await client.invoke({ _: "getChat", chat_id: chatId })) as any;
|
||||
let type: string = "other";
|
||||
let isForum = false;
|
||||
|
||||
if (chat.type?._ === "chatTypeSupergroup") {
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const sg = (await client.invoke({
|
||||
_: "getSupergroup",
|
||||
supergroup_id: chat.type.supergroup_id,
|
||||
})) as any;
|
||||
type = sg.is_channel ? "channel" : "supergroup";
|
||||
isForum = sg.is_forum ?? false;
|
||||
} catch {
|
||||
type = "supergroup";
|
||||
}
|
||||
} else if (chat.type?._ === "chatTypeBasicGroup") {
|
||||
type = "group";
|
||||
}
|
||||
|
||||
if (type !== "channel" && type !== "supergroup") {
|
||||
throw new Error(`The joined chat is a ${type}, not a channel or group. Only channels and supergroups are supported.`);
|
||||
}
|
||||
|
||||
chatInfo = { chatId: BigInt(chatId), title: chat.title ?? "Unknown", type, isForum };
|
||||
}
|
||||
|
||||
// Upsert channel in DB (active as source by default since user explicitly added it)
|
||||
const channel = await upsertChannel({
|
||||
telegramId: chatInfo.chatId,
|
||||
title: chatInfo.title,
|
||||
type: "SOURCE",
|
||||
isForum: chatInfo.isForum,
|
||||
isActive: true,
|
||||
});
|
||||
|
||||
// Link the account as READER
|
||||
await ensureAccountChannelLink(account.id, channel.id, "READER");
|
||||
|
||||
log.info(
|
||||
{ channelId: channel.id, telegramId: chatInfo.chatId.toString(), title: chatInfo.title },
|
||||
"Channel joined and added"
|
||||
);
|
||||
|
||||
await updateFetchRequestStatus(requestId!, "COMPLETED", {
|
||||
resultJson: JSON.stringify({
|
||||
channelId: channel.id,
|
||||
telegramId: chatInfo.chatId.toString(),
|
||||
title: chatInfo.title,
|
||||
type: chatInfo.type,
|
||||
isForum: chatInfo.isForum,
|
||||
}),
|
||||
});
|
||||
} finally {
|
||||
await closeTdlibClient(client);
|
||||
}
|
||||
});
|
||||
} catch (err) {
|
||||
log.error({ err, payload }, "Failed to join channel");
|
||||
if (requestId) {
|
||||
try {
|
||||
await updateFetchRequestStatus(requestId, "FAILED", {
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
});
|
||||
} catch {
|
||||
// Best-effort
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// ── Archive extract handler ──
|
||||
|
||||
function handleArchiveExtract(requestId: string): void {
|
||||
fetchQueue = fetchQueue.then(async () => {
|
||||
try {
|
||||
log.info({ requestId }, "Archive extract request received");
|
||||
await processExtractRequest(requestId);
|
||||
} catch (err) {
|
||||
log.error({ err, requestId }, "Failed to process archive extract request");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// ── Ingestion trigger handler ──
|
||||
|
||||
function handleIngestionTrigger(): void {
|
||||
@@ -272,3 +497,17 @@ function handleIngestionTrigger(): void {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// ── Package database rebuild handler ──
|
||||
|
||||
function handleRebuildPackages(requestId: string): void {
|
||||
fetchQueue = fetchQueue.then(async () => {
|
||||
try {
|
||||
await withTdlibMutex("rebuild-packages", () =>
|
||||
rebuildPackageDatabase(requestId)
|
||||
);
|
||||
} catch (err) {
|
||||
log.error({ err, requestId }, "Failed to rebuild package database");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ import { config } from "./util/config.js";
|
||||
import { logger } from "./util/logger.js";
|
||||
import { markStaleRunsAsFailed } from "./db/queries.js";
|
||||
import { cleanupTempDir } from "./worker.js";
|
||||
import { recoverIncompleteUploads } from "./recovery.js";
|
||||
import { startScheduler, stopScheduler } from "./scheduler.js";
|
||||
import { startFetchListener, stopFetchListener } from "./fetch-listener.js";
|
||||
import { db, pool } from "./db/client.js";
|
||||
@@ -26,6 +27,10 @@ async function main(): Promise<void> {
|
||||
await cleanupTempDir();
|
||||
await markStaleRunsAsFailed();
|
||||
|
||||
// Verify destination messages exist for all "uploaded" packages.
|
||||
// Resets any packages whose dest message is missing so they get re-processed.
|
||||
await recoverIncompleteUploads();
|
||||
|
||||
// Start the fetch listener (pg_notify for on-demand channel fetching)
|
||||
await startFetchListener();
|
||||
|
||||
|
||||
111
worker/src/preview/extract.ts
Normal file
111
worker/src/preview/extract.ts
Normal file
@@ -0,0 +1,111 @@
|
||||
import { execFile } from "child_process";
|
||||
import { promisify } from "util";
|
||||
import { childLogger } from "../util/logger.js";
|
||||
import type { FileEntry } from "../archive/zip-reader.js";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
const log = childLogger("preview-extract");
|
||||
|
||||
/** Max bytes we'll accept for an extracted preview image (2MB). */
|
||||
const MAX_PREVIEW_BYTES = 2 * 1024 * 1024;
|
||||
|
||||
/** Image extensions we consider valid previews, in priority order. */
|
||||
const IMAGE_EXTENSIONS = new Set(["jpg", "jpeg", "png"]);
|
||||
|
||||
/**
|
||||
* Pick the best preview image from the file entries list.
|
||||
*
|
||||
* Prefers files that look like dedicated preview images (01.jpg, insta.jpg,
|
||||
* preview.jpg) over arbitrary images buried in subdirectories.
|
||||
* Skips images that are suspiciously large (>2MB uncompressed).
|
||||
*/
|
||||
export function pickPreviewFile(entries: FileEntry[]): FileEntry | null {
|
||||
const candidates = entries.filter((e) => {
|
||||
if (!e.extension || !IMAGE_EXTENSIONS.has(e.extension.toLowerCase())) return false;
|
||||
// Skip very large images — they're probably textures, not previews
|
||||
if (e.uncompressedSize > BigInt(MAX_PREVIEW_BYTES)) return false;
|
||||
return true;
|
||||
});
|
||||
|
||||
if (candidates.length === 0) return null;
|
||||
|
||||
// Score candidates: lower depth + preview-like names win
|
||||
const scored = candidates.map((entry) => {
|
||||
const depth = entry.path.split("/").length - 1;
|
||||
const nameLower = entry.fileName.toLowerCase();
|
||||
|
||||
let nameScore = 10; // default
|
||||
// Known preview-like names get priority
|
||||
if (/^(preview|thumb|cover|insta)\b/i.test(nameLower)) {
|
||||
nameScore = 0;
|
||||
} else if (/^0*[1-2]\.(jpe?g|png)$/i.test(nameLower)) {
|
||||
// 01.jpg, 1.jpg, 02.jpg — common preview filenames
|
||||
nameScore = 1;
|
||||
} else if (/^0*[3-9]\.(jpe?g|png)$/i.test(nameLower)) {
|
||||
nameScore = 2;
|
||||
}
|
||||
|
||||
return { entry, score: nameScore + depth };
|
||||
});
|
||||
|
||||
scored.sort((a, b) => a.score - b.score);
|
||||
return scored[0].entry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a single file from an archive and return its contents as a Buffer.
|
||||
*
|
||||
* Uses the appropriate CLI tool based on archive type:
|
||||
* - ZIP: unzip -p
|
||||
* - RAR: unrar p -inul
|
||||
* - 7Z: 7z e -so
|
||||
*/
|
||||
export async function extractPreviewImage(
|
||||
archivePath: string,
|
||||
archiveType: "ZIP" | "RAR" | "SEVEN_Z" | "DOCUMENT",
|
||||
filePath: string
|
||||
): Promise<Buffer | null> {
|
||||
if (archiveType === "DOCUMENT") return null;
|
||||
|
||||
try {
|
||||
let stdout: Buffer;
|
||||
|
||||
if (archiveType === "ZIP") {
|
||||
const result = await execFileAsync("unzip", ["-p", archivePath, filePath], {
|
||||
timeout: 15000,
|
||||
maxBuffer: MAX_PREVIEW_BYTES,
|
||||
encoding: "buffer",
|
||||
});
|
||||
stdout = result.stdout as unknown as Buffer;
|
||||
} else if (archiveType === "RAR") {
|
||||
const result = await execFileAsync("unrar", ["p", "-inul", archivePath, filePath], {
|
||||
timeout: 15000,
|
||||
maxBuffer: MAX_PREVIEW_BYTES,
|
||||
encoding: "buffer",
|
||||
});
|
||||
stdout = result.stdout as unknown as Buffer;
|
||||
} else {
|
||||
// SEVEN_Z
|
||||
const result = await execFileAsync("7z", ["e", "-so", archivePath, filePath], {
|
||||
timeout: 15000,
|
||||
maxBuffer: MAX_PREVIEW_BYTES,
|
||||
encoding: "buffer",
|
||||
});
|
||||
stdout = result.stdout as unknown as Buffer;
|
||||
}
|
||||
|
||||
if (stdout.length === 0) {
|
||||
log.warn({ archivePath, filePath }, "Extracted preview image is empty");
|
||||
return null;
|
||||
}
|
||||
|
||||
log.debug(
|
||||
{ archivePath, filePath, bytes: stdout.length },
|
||||
"Extracted preview image from archive"
|
||||
);
|
||||
return stdout;
|
||||
} catch (err) {
|
||||
log.warn({ err, archivePath, filePath }, "Failed to extract preview image from archive");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
411
worker/src/rebuild.ts
Normal file
411
worker/src/rebuild.ts
Normal file
@@ -0,0 +1,411 @@
|
||||
import type { Client } from "tdl";
|
||||
import { config } from "./util/config.js";
|
||||
import { childLogger } from "./util/logger.js";
|
||||
import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
|
||||
import { invokeWithTimeout, MAX_SCAN_PAGES } from "./tdlib/download.js";
|
||||
import { isArchiveAttachment } from "./archive/detect.js";
|
||||
import { extractCreatorFromFileName } from "./archive/creator.js";
|
||||
import { groupArchiveSets } from "./archive/multipart.js";
|
||||
import type { TelegramMessage } from "./archive/multipart.js";
|
||||
import {
|
||||
getActiveAccounts,
|
||||
getGlobalDestinationChannel,
|
||||
} from "./db/queries.js";
|
||||
import { db } from "./db/client.js";
|
||||
|
||||
const log = childLogger("rebuild");
|
||||
|
||||
export interface RebuildProgress {
|
||||
status: "PENDING" | "IN_PROGRESS" | "COMPLETED" | "FAILED";
|
||||
messagesScanned: number;
|
||||
documentsFound: number;
|
||||
packagesCreated: number;
|
||||
packagesSkipped: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan the destination channel for uploaded archive files and rebuild
|
||||
* the package database from what's actually there.
|
||||
*
|
||||
* Uses searchChatMessages (not getChatHistory) because the destination
|
||||
* channel may be a hidden-history supergroup.
|
||||
*
|
||||
* For each document found:
|
||||
* 1. Check if a Package record with that destMessageId already exists -> skip
|
||||
* 2. Try to match by fileName to an existing package without destMessageId -> update it
|
||||
* 3. Otherwise create a minimal Package record (no file listing, no content hash)
|
||||
*
|
||||
* This is a "best-effort" rebuild. It restores the mapping between destination
|
||||
* messages and package records so that the bot can deliver files. It does NOT
|
||||
* re-download archives or rebuild file listings (those require the source channel).
|
||||
*/
|
||||
export async function rebuildPackageDatabase(
|
||||
requestId: string
|
||||
): Promise<void> {
|
||||
log.info({ requestId }, "Starting package database rebuild");
|
||||
|
||||
try {
|
||||
await db.channelFetchRequest.update({
|
||||
where: { id: requestId },
|
||||
data: { status: "IN_PROGRESS" },
|
||||
});
|
||||
|
||||
// Get an authenticated account for TDLib
|
||||
const accounts = await getActiveAccounts();
|
||||
if (accounts.length === 0) {
|
||||
throw new Error("No authenticated accounts available");
|
||||
}
|
||||
|
||||
const destChannel = await getGlobalDestinationChannel();
|
||||
if (!destChannel) {
|
||||
throw new Error("No destination channel configured");
|
||||
}
|
||||
|
||||
const account = accounts[0];
|
||||
const client = await createTdlibClient({
|
||||
id: account.id,
|
||||
phone: account.phone,
|
||||
});
|
||||
|
||||
try {
|
||||
const progress: RebuildProgress = {
|
||||
status: "IN_PROGRESS",
|
||||
messagesScanned: 0,
|
||||
documentsFound: 0,
|
||||
packagesCreated: 0,
|
||||
packagesSkipped: 0,
|
||||
};
|
||||
|
||||
// Write initial progress
|
||||
await updateRebuildProgress(requestId, progress);
|
||||
|
||||
// Scan the destination channel for all document messages
|
||||
const archiveMessages = await scanDestinationChannel(
|
||||
client,
|
||||
destChannel.telegramId,
|
||||
async (scanned) => {
|
||||
progress.messagesScanned = scanned;
|
||||
await updateRebuildProgress(requestId, progress);
|
||||
}
|
||||
);
|
||||
|
||||
progress.documentsFound = archiveMessages.length;
|
||||
await updateRebuildProgress(requestId, progress);
|
||||
|
||||
log.info(
|
||||
{
|
||||
messagesScanned: progress.messagesScanned,
|
||||
documentsFound: archiveMessages.length,
|
||||
},
|
||||
"Destination channel scan complete"
|
||||
);
|
||||
|
||||
// Group into archive sets (handles multipart)
|
||||
const archiveSets = groupArchiveSets(archiveMessages);
|
||||
|
||||
log.info(
|
||||
{ archiveSets: archiveSets.length, totalMessages: archiveMessages.length },
|
||||
"Grouped into archive sets"
|
||||
);
|
||||
|
||||
// Get ALL source channels so we can try to match
|
||||
const sourceChannels = await db.telegramChannel.findMany({
|
||||
where: { type: "SOURCE" },
|
||||
select: { id: true, title: true },
|
||||
});
|
||||
// Use the first source channel as a fallback for unmatched packages
|
||||
const fallbackSourceId = sourceChannels[0]?.id ?? null;
|
||||
|
||||
// Process each archive set
|
||||
for (const archiveSet of archiveSets) {
|
||||
const firstPart = archiveSet.parts[0];
|
||||
const fileName = firstPart.fileName;
|
||||
const destMessageId = firstPart.id;
|
||||
const totalSize = archiveSet.parts.reduce(
|
||||
(sum, p) => sum + p.fileSize,
|
||||
0n
|
||||
);
|
||||
|
||||
// 1. Check if a package with this destMessageId already exists
|
||||
const existingByDest = await db.package.findFirst({
|
||||
where: {
|
||||
destChannelId: destChannel.id,
|
||||
destMessageId,
|
||||
},
|
||||
select: { id: true },
|
||||
});
|
||||
|
||||
if (existingByDest) {
|
||||
progress.packagesSkipped++;
|
||||
await updateRebuildProgress(requestId, progress);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 2. Try to match by fileName to an existing package without destMessageId
|
||||
const existingByName = await db.package.findFirst({
|
||||
where: {
|
||||
fileName,
|
||||
destMessageId: null,
|
||||
},
|
||||
select: { id: true },
|
||||
});
|
||||
|
||||
if (existingByName) {
|
||||
// Update existing record with destination info
|
||||
await db.package.update({
|
||||
where: { id: existingByName.id },
|
||||
data: {
|
||||
destChannelId: destChannel.id,
|
||||
destMessageId,
|
||||
isMultipart: archiveSet.parts.length > 1,
|
||||
partCount: archiveSet.parts.length,
|
||||
},
|
||||
});
|
||||
progress.packagesCreated++;
|
||||
log.debug({ fileName, destMessageId: Number(destMessageId) }, "Updated existing package with dest info");
|
||||
await updateRebuildProgress(requestId, progress);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 3. Create a new minimal Package record
|
||||
// We don't have the source message or content hash, so generate a placeholder hash
|
||||
const placeholderHash = `rebuild:${destChannel.id}:${destMessageId}`;
|
||||
const creator = extractCreatorFromFileName(fileName) ?? null;
|
||||
const archiveType = archiveSet.type;
|
||||
|
||||
// We need a sourceChannelId (required FK). Use fallback if available.
|
||||
if (!fallbackSourceId) {
|
||||
log.warn(
|
||||
{ fileName },
|
||||
"No source channels exist — cannot create package record without a source channel"
|
||||
);
|
||||
progress.packagesSkipped++;
|
||||
await updateRebuildProgress(requestId, progress);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
await db.package.create({
|
||||
data: {
|
||||
contentHash: placeholderHash,
|
||||
fileName,
|
||||
fileSize: totalSize,
|
||||
archiveType,
|
||||
sourceChannelId: fallbackSourceId,
|
||||
sourceMessageId: 0n, // Unknown — rebuilt from destination
|
||||
destChannelId: destChannel.id,
|
||||
destMessageId,
|
||||
isMultipart: archiveSet.parts.length > 1,
|
||||
partCount: archiveSet.parts.length,
|
||||
fileCount: 0,
|
||||
creator,
|
||||
},
|
||||
});
|
||||
progress.packagesCreated++;
|
||||
log.debug(
|
||||
{ fileName, destMessageId: Number(destMessageId), creator },
|
||||
"Created new package from destination"
|
||||
);
|
||||
} catch (err) {
|
||||
// Unique constraint on contentHash — might be a race or duplicate
|
||||
if (err instanceof Error && err.message.includes("Unique constraint")) {
|
||||
log.debug({ fileName, placeholderHash }, "Package already exists (hash conflict), skipping");
|
||||
progress.packagesSkipped++;
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
await updateRebuildProgress(requestId, progress);
|
||||
}
|
||||
|
||||
// Done
|
||||
progress.status = "COMPLETED";
|
||||
await updateRebuildProgress(requestId, progress);
|
||||
|
||||
await db.channelFetchRequest.update({
|
||||
where: { id: requestId },
|
||||
data: {
|
||||
status: "COMPLETED",
|
||||
resultJson: JSON.stringify(progress),
|
||||
},
|
||||
});
|
||||
|
||||
log.info(
|
||||
{
|
||||
messagesScanned: progress.messagesScanned,
|
||||
documentsFound: progress.documentsFound,
|
||||
packagesCreated: progress.packagesCreated,
|
||||
packagesSkipped: progress.packagesSkipped,
|
||||
},
|
||||
"Package database rebuild complete"
|
||||
);
|
||||
} finally {
|
||||
await closeTdlibClient(client);
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
log.error({ err, requestId }, "Package database rebuild failed");
|
||||
|
||||
await db.channelFetchRequest.update({
|
||||
where: { id: requestId },
|
||||
data: {
|
||||
status: "FAILED",
|
||||
error: message,
|
||||
resultJson: JSON.stringify({
|
||||
status: "FAILED",
|
||||
error: message,
|
||||
}),
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan the destination channel for document messages using searchChatMessages.
|
||||
* Returns archive messages in chronological order (oldest first).
|
||||
*/
|
||||
async function scanDestinationChannel(
|
||||
client: Client,
|
||||
chatId: bigint,
|
||||
onProgress?: (messagesScanned: number) => Promise<void>
|
||||
): Promise<TelegramMessage[]> {
|
||||
const archives: TelegramMessage[] = [];
|
||||
let currentFromId = 0;
|
||||
let totalScanned = 0;
|
||||
let pageCount = 0;
|
||||
let lastProgressUpdate = 0;
|
||||
|
||||
// eslint-disable-next-line no-constant-condition
|
||||
while (true) {
|
||||
if (pageCount >= MAX_SCAN_PAGES) {
|
||||
log.warn(
|
||||
{ chatId: chatId.toString(), pageCount, totalScanned },
|
||||
"Hit max page limit for destination scan, stopping"
|
||||
);
|
||||
break;
|
||||
}
|
||||
pageCount++;
|
||||
|
||||
const previousFromId = currentFromId;
|
||||
|
||||
const result = await invokeWithTimeout<{
|
||||
messages?: {
|
||||
id: number;
|
||||
date: number;
|
||||
content: {
|
||||
_: string;
|
||||
document?: {
|
||||
file_name?: string;
|
||||
document?: {
|
||||
id: number;
|
||||
size: number;
|
||||
};
|
||||
};
|
||||
};
|
||||
}[];
|
||||
}>(client, {
|
||||
_: "searchChatMessages",
|
||||
chat_id: Number(chatId),
|
||||
query: "",
|
||||
from_message_id: currentFromId,
|
||||
offset: 0,
|
||||
limit: 100,
|
||||
filter: { _: "searchMessagesFilterDocument" },
|
||||
sender_id: null,
|
||||
message_thread_id: 0,
|
||||
saved_messages_topic_id: 0,
|
||||
});
|
||||
|
||||
if (!result.messages || result.messages.length === 0) break;
|
||||
|
||||
totalScanned += result.messages.length;
|
||||
|
||||
for (const msg of result.messages) {
|
||||
const doc = msg.content?.document;
|
||||
if (doc?.file_name && doc.document && isArchiveAttachment(doc.file_name)) {
|
||||
archives.push({
|
||||
id: BigInt(msg.id),
|
||||
fileName: doc.file_name,
|
||||
fileId: String(doc.document.id),
|
||||
fileSize: BigInt(doc.document.size),
|
||||
date: new Date(msg.date * 1000),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Throttle progress updates to every 2 seconds
|
||||
const now = Date.now();
|
||||
if (onProgress && now - lastProgressUpdate >= 2000) {
|
||||
lastProgressUpdate = now;
|
||||
await onProgress(totalScanned);
|
||||
}
|
||||
|
||||
currentFromId = result.messages[result.messages.length - 1].id;
|
||||
|
||||
// Stuck detection
|
||||
if (currentFromId === previousFromId) {
|
||||
log.warn(
|
||||
{ chatId: chatId.toString(), currentFromId, totalScanned },
|
||||
"Pagination stuck, breaking"
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
if (result.messages.length < 100) break;
|
||||
|
||||
await sleep(config.apiDelayMs);
|
||||
}
|
||||
|
||||
// Final progress update
|
||||
if (onProgress) {
|
||||
await onProgress(totalScanned);
|
||||
}
|
||||
|
||||
log.info(
|
||||
{
|
||||
chatId: chatId.toString(),
|
||||
archives: archives.length,
|
||||
totalScanned,
|
||||
pages: pageCount,
|
||||
},
|
||||
"Destination channel scan complete"
|
||||
);
|
||||
|
||||
// Reverse to chronological order (oldest first)
|
||||
return archives.reverse();
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the rebuild progress in the fetch request's resultJson field.
|
||||
* Throttled to avoid excessive DB writes.
|
||||
*/
|
||||
let lastUpdateTime = 0;
|
||||
async function updateRebuildProgress(
|
||||
requestId: string,
|
||||
progress: RebuildProgress
|
||||
): Promise<void> {
|
||||
const now = Date.now();
|
||||
// Throttle to every 2 seconds, but always write for status changes
|
||||
if (
|
||||
progress.status !== "IN_PROGRESS" ||
|
||||
now - lastUpdateTime >= 2000
|
||||
) {
|
||||
lastUpdateTime = now;
|
||||
try {
|
||||
await db.channelFetchRequest.update({
|
||||
where: { id: requestId },
|
||||
data: {
|
||||
resultJson: JSON.stringify(progress),
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
// Best-effort
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
187
worker/src/recovery.ts
Normal file
187
worker/src/recovery.ts
Normal file
@@ -0,0 +1,187 @@
|
||||
import { childLogger } from "./util/logger.js";
|
||||
import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
|
||||
import { withFloodWait } from "./util/retry.js";
|
||||
import {
|
||||
getActiveAccounts,
|
||||
getPackagesWithDestMessage,
|
||||
resetPackageDestination,
|
||||
getGlobalDestinationChannel,
|
||||
} from "./db/queries.js";
|
||||
import type { Client } from "tdl";
|
||||
|
||||
const log = childLogger("recovery");
|
||||
|
||||
/**
|
||||
* Verify that destination messages still exist in Telegram for all
|
||||
* packages that claim to be uploaded. If a message is missing (deleted
|
||||
* or never actually committed), reset the package so the next ingestion
|
||||
* run will re-download and re-upload it.
|
||||
*
|
||||
* This handles the case where the worker crashed mid-upload: TDLib may
|
||||
* have returned a temporary message ID that was stored as destMessageId
|
||||
* but the upload never completed server-side, or the message was later
|
||||
* deleted from the destination channel.
|
||||
*
|
||||
* Called once on worker startup, before the scheduler begins.
|
||||
*/
|
||||
export async function recoverIncompleteUploads(): Promise<void> {
|
||||
const packages = await getPackagesWithDestMessage();
|
||||
if (packages.length === 0) {
|
||||
log.debug("No packages with destination messages to verify");
|
||||
return;
|
||||
}
|
||||
|
||||
// We need a TDLib client to verify messages. Use the first active account.
|
||||
const accounts = await getActiveAccounts();
|
||||
if (accounts.length === 0) {
|
||||
log.info("No active accounts available for upload verification, skipping recovery");
|
||||
return;
|
||||
}
|
||||
|
||||
const destChannel = await getGlobalDestinationChannel();
|
||||
if (!destChannel) {
|
||||
log.info("No destination channel configured, skipping recovery");
|
||||
return;
|
||||
}
|
||||
|
||||
// Group packages by destChannelId for efficient verification
|
||||
const byChannel = new Map<string, typeof packages>();
|
||||
for (const pkg of packages) {
|
||||
const channelId = pkg.destChannelId!;
|
||||
if (!byChannel.has(channelId)) {
|
||||
byChannel.set(channelId, []);
|
||||
}
|
||||
byChannel.get(channelId)!.push(pkg);
|
||||
}
|
||||
|
||||
log.info(
|
||||
{ totalPackages: packages.length, channels: byChannel.size },
|
||||
"Verifying destination messages exist in Telegram"
|
||||
);
|
||||
|
||||
const account = accounts[0];
|
||||
let client: Client | undefined;
|
||||
|
||||
try {
|
||||
client = await createTdlibClient({ id: account.id, phone: account.phone });
|
||||
|
||||
// Load the chat list so TDLib can resolve chat IDs
|
||||
try {
|
||||
await client.invoke({
|
||||
_: "getChats",
|
||||
chat_list: { _: "chatListMain" },
|
||||
limit: 1000,
|
||||
});
|
||||
} catch {
|
||||
// May already be loaded
|
||||
}
|
||||
|
||||
let resetCount = 0;
|
||||
let verifiedCount = 0;
|
||||
|
||||
for (const [, channelPackages] of byChannel) {
|
||||
for (const pkg of channelPackages) {
|
||||
const exists = await verifyMessageExists(
|
||||
client,
|
||||
destChannel.telegramId,
|
||||
pkg.destMessageId!
|
||||
);
|
||||
|
||||
if (exists) {
|
||||
verifiedCount++;
|
||||
} else {
|
||||
log.warn(
|
||||
{
|
||||
packageId: pkg.id,
|
||||
fileName: pkg.fileName,
|
||||
destMessageId: Number(pkg.destMessageId),
|
||||
},
|
||||
"Destination message missing in Telegram, resetting package for re-upload"
|
||||
);
|
||||
await resetPackageDestination(pkg.id);
|
||||
resetCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (resetCount > 0) {
|
||||
log.info(
|
||||
{ resetCount, verifiedCount, totalChecked: packages.length },
|
||||
"Upload recovery complete — packages reset for re-processing"
|
||||
);
|
||||
} else {
|
||||
log.info(
|
||||
{ verifiedCount, totalChecked: packages.length },
|
||||
"Upload recovery complete — all destination messages verified"
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
log.error({ err }, "Upload recovery failed (non-fatal, will retry next startup)");
|
||||
} finally {
|
||||
if (client) {
|
||||
await closeTdlibClient(client);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a message exists in a Telegram chat.
|
||||
* Returns false if the message was deleted or never existed.
|
||||
*/
|
||||
async function verifyMessageExists(
|
||||
client: Client,
|
||||
chatTelegramId: bigint,
|
||||
messageId: bigint
|
||||
): Promise<boolean> {
|
||||
try {
|
||||
const result = await withFloodWait(
|
||||
() =>
|
||||
client.invoke({
|
||||
_: "getMessage",
|
||||
chat_id: Number(chatTelegramId),
|
||||
message_id: Number(messageId),
|
||||
}),
|
||||
"getMessage:verify"
|
||||
);
|
||||
|
||||
// TDLib returns the message object if it exists.
|
||||
// A deleted message may return with content type "messageChatDeleteMessage"
|
||||
// or the call may throw. Check that we got a real message with content.
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const msg = result as any;
|
||||
if (!msg || !msg.content) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that the message has document content (our uploads are documents)
|
||||
// A message that exists but has no document content was likely cleared/replaced
|
||||
if (msg.content._ !== "messageDocument") {
|
||||
log.debug(
|
||||
{
|
||||
messageId: Number(messageId),
|
||||
contentType: msg.content._,
|
||||
},
|
||||
"Destination message exists but is not a document"
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
} catch (err) {
|
||||
// TDLib throws "Message not found" (error code 404) for deleted messages
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
const code = (err as { code?: number })?.code;
|
||||
|
||||
if (code === 404 || message.includes("not found") || message.includes("Not Found")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// For other errors (network issues, etc.), assume the message exists
|
||||
// to avoid incorrectly resetting packages due to transient failures
|
||||
log.warn(
|
||||
{ err, messageId: Number(messageId) },
|
||||
"Could not verify message (assuming it exists)"
|
||||
);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -176,6 +176,63 @@ export async function joinChatByInviteLink(
|
||||
log.info({ inviteLink }, "Joined chat by invite link");
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for a public chat by username.
|
||||
* Returns the chat info if found, or null if not found.
|
||||
*/
|
||||
export async function searchPublicChat(
|
||||
client: Client,
|
||||
username: string
|
||||
): Promise<TelegramChatInfo | null> {
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const chat = (await withFloodWait(
|
||||
() => client.invoke({
|
||||
_: "searchPublicChat",
|
||||
username,
|
||||
}),
|
||||
"searchPublicChat"
|
||||
)) as any;
|
||||
|
||||
const chatType = chat.type?._;
|
||||
let type: TelegramChatInfo["type"] = "other";
|
||||
let isForum = false;
|
||||
|
||||
if (chatType === "chatTypeSupergroup") {
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const sg = (await withFloodWait(
|
||||
() => client.invoke({
|
||||
_: "getSupergroup",
|
||||
supergroup_id: chat.type.supergroup_id,
|
||||
}),
|
||||
"getSupergroup"
|
||||
)) as any;
|
||||
|
||||
type = sg.is_channel ? "channel" : "supergroup";
|
||||
isForum = sg.is_forum ?? false;
|
||||
} catch {
|
||||
type = "supergroup";
|
||||
}
|
||||
} else if (chatType === "chatTypeBasicGroup") {
|
||||
type = "group";
|
||||
} else if (chatType === "chatTypePrivate" || chatType === "chatTypeSecret") {
|
||||
type = "private";
|
||||
}
|
||||
|
||||
log.info({ username, chatId: chat.id, type }, "Found public chat");
|
||||
return {
|
||||
chatId: BigInt(chat.id),
|
||||
title: chat.title ?? username,
|
||||
type,
|
||||
isForum,
|
||||
};
|
||||
} catch (err) {
|
||||
log.warn({ username, err }, "Public chat not found");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
@@ -34,6 +34,7 @@ import { getChannelMessages, downloadFile, downloadPhotoThumbnail } from "./tdli
|
||||
import type { DownloadProgress, ChannelScanResult } from "./tdlib/download.js";
|
||||
import { isChatForum, getForumTopicList, getTopicMessages } from "./tdlib/topics.js";
|
||||
import { matchPreviewToArchive } from "./preview/match.js";
|
||||
import { pickPreviewFile, extractPreviewImage } from "./preview/extract.js";
|
||||
import { groupArchiveSets } from "./archive/multipart.js";
|
||||
import type { ArchiveSet } from "./archive/multipart.js";
|
||||
import { extractCreatorFromFileName, extractCreatorFromChannelTitle } from "./archive/creator.js";
|
||||
@@ -971,6 +972,23 @@ async function processOneArchiveSet(
|
||||
previewMsgId = matchedPhoto.id;
|
||||
}
|
||||
|
||||
// ── Fallback: extract preview image from inside the archive ──
|
||||
if (!previewData && entries.length > 0 && archiveSet.type !== "DOCUMENT") {
|
||||
const previewEntry = pickPreviewFile(entries);
|
||||
if (previewEntry) {
|
||||
accountLog.debug(
|
||||
{ fileName: archiveName, previewFile: previewEntry.path },
|
||||
"Attempting to extract preview image from archive"
|
||||
);
|
||||
const archiveTypeForExtract = archiveSet.type === "7Z" ? "SEVEN_Z" as const : archiveSet.type as "ZIP" | "RAR";
|
||||
previewData = await extractPreviewImage(
|
||||
tempPaths[0],
|
||||
archiveTypeForExtract,
|
||||
previewEntry.path
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Resolve creator: topic name > filename extraction > channel title > null ──
|
||||
const creator = topicCreator
|
||||
?? extractCreatorFromFileName(archiveName)
|
||||
|
||||
Reference in New Issue
Block a user