Fix worker getting stuck during sync: add timeouts, stuck detection, and safety limits

- Add invokeWithTimeout wrapper for TDLib API calls (2min timeout per call)
- Add stuck detection to getChannelMessages: break if from_message_id doesn't advance
- Add stuck detection to getTopicMessages: same protection for topic scanning
- Add stuck detection to getForumTopicList: break if pagination offsets don't advance
- Add max page limit (5000) to all scanning loops to prevent infinite pagination
- Add mutex wait timeout (30min) to prevent indefinite blocking when holder hangs
- Add cycle timeout (4h default, configurable via WORKER_CYCLE_TIMEOUT_MINUTES)
- Fix end-of-page detection to use actual limit value instead of hardcoded 100

Co-authored-by: xCyanGrizzly <53275238+xCyanGrizzly@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2026-03-05 13:14:53 +00:00
parent ad71346468
commit 9adbdb2a77
73 changed files with 3945 additions and 40 deletions

31
worker/dist/tdlib/chats.d.ts vendored Normal file
View File

@@ -0,0 +1,31 @@
import type { Client } from "tdl";
export interface TelegramChatInfo {
chatId: bigint;
title: string;
type: "channel" | "supergroup" | "group" | "private" | "other";
isForum: boolean;
memberCount?: number;
}
/**
* Fetch all chats the account is a member of.
* Uses TDLib's getChats to load the chat list, then getChat for details.
* Filters to channels and supergroups only (groups/privates are not useful for ingestion).
*/
export declare function getAccountChats(client: Client): Promise<TelegramChatInfo[]>;
/**
* Generate an invite link for a chat. The account must be an admin or have
* invite link permissions.
*/
export declare function generateInviteLink(client: Client, chatId: bigint): Promise<string>;
/**
* Create a new supergroup (private group) via TDLib.
* Returns the chat ID and title.
*/
export declare function createSupergroup(client: Client, title: string): Promise<{
chatId: bigint;
title: string;
}>;
/**
* Join a chat using an invite link.
*/
export declare function joinChatByInviteLink(client: Client, inviteLink: string): Promise<void>;

124
worker/dist/tdlib/chats.js vendored Normal file
View File

@@ -0,0 +1,124 @@
import { childLogger } from "../util/logger.js";
import { config } from "../util/config.js";
const log = childLogger("chats");
/**
* Fetch all chats the account is a member of.
* Uses TDLib's getChats to load the chat list, then getChat for details.
* Filters to channels and supergroups only (groups/privates are not useful for ingestion).
*/
export async function getAccountChats(client) {
const chats = [];
// Load main chat list — TDLib loads in batches
let offsetOrder = "9223372036854775807"; // max int64 as string
let offsetChatId = 0;
let hasMore = true;
while (hasMore) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const result = (await client.invoke({
_: "getChats",
chat_list: { _: "chatListMain" },
limit: 100,
}));
if (!result.chat_ids || result.chat_ids.length === 0) {
break;
}
for (const chatId of result.chat_ids) {
try {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const chat = (await client.invoke({
_: "getChat",
chat_id: chatId,
}));
const chatType = chat.type?._;
let type = "other";
let isForum = false;
if (chatType === "chatTypeSupergroup") {
// Get supergroup details to check if it's a channel or group
try {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const sg = (await client.invoke({
_: "getSupergroup",
supergroup_id: chat.type.supergroup_id,
}));
type = sg.is_channel ? "channel" : "supergroup";
isForum = sg.is_forum ?? false;
}
catch {
type = "supergroup";
}
}
else if (chatType === "chatTypeBasicGroup") {
type = "group";
}
else if (chatType === "chatTypePrivate" || chatType === "chatTypeSecret") {
type = "private";
}
// Only include channels and supergroups
if (type === "channel" || type === "supergroup") {
chats.push({
chatId: BigInt(chatId),
title: chat.title ?? `Chat ${chatId}`,
type,
isForum,
});
}
}
catch (err) {
log.warn({ chatId, err }, "Failed to get chat details, skipping");
}
}
// getChats with chatListMain returns all chats at once in newer TDLib versions
// So we break after the first batch
hasMore = false;
await sleep(config.apiDelayMs);
}
log.info({ total: chats.length }, "Fetched channels/supergroups from Telegram");
return chats;
}
/**
* Generate an invite link for a chat. The account must be an admin or have
* invite link permissions.
*/
export async function generateInviteLink(client, chatId) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const result = (await client.invoke({
_: "createChatInviteLink",
chat_id: Number(chatId),
name: "DragonsStash Auto-Join",
creates_join_request: false,
}));
const link = result.invite_link;
log.info({ chatId: chatId.toString(), link }, "Generated invite link");
return link;
}
/**
* Create a new supergroup (private group) via TDLib.
* Returns the chat ID and title.
*/
export async function createSupergroup(client, title) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const result = (await client.invoke({
_: "createNewSupergroupChat",
title,
is_forum: false,
is_channel: false,
description: "DragonsStash archive destination — all accounts write here",
}));
const chatId = BigInt(result.id);
log.info({ chatId: chatId.toString(), title }, "Created new supergroup");
return { chatId, title: result.title ?? title };
}
/**
* Join a chat using an invite link.
*/
export async function joinChatByInviteLink(client, inviteLink) {
await client.invoke({
_: "joinChatByInviteLink",
invite_link: inviteLink,
});
log.info({ inviteLink }, "Joined chat by invite link");
}
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
//# sourceMappingURL=chats.js.map

1
worker/dist/tdlib/chats.js.map vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"chats.js","sourceRoot":"","sources":["../../src/tdlib/chats.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAE3C,MAAM,GAAG,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;AAUjC;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,MAAc;IAEd,MAAM,KAAK,GAAuB,EAAE,CAAC;IAErC,+CAA+C;IAC/C,IAAI,WAAW,GAAG,qBAAqB,CAAC,CAAC,sBAAsB;IAC/D,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,IAAI,OAAO,GAAG,IAAI,CAAC;IAEnB,OAAO,OAAO,EAAE,CAAC;QACf,8DAA8D;QAC9D,MAAM,MAAM,GAAG,CAAC,MAAM,MAAM,CAAC,MAAM,CAAC;YAClC,CAAC,EAAE,UAAU;YACb,SAAS,EAAE,EAAE,CAAC,EAAE,cAAc,EAAE;YAChC,KAAK,EAAE,GAAG;SACX,CAAC,CAA2B,CAAC;QAE9B,IAAI,CAAC,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACrD,MAAM;QACR,CAAC;QAED,KAAK,MAAM,MAAM,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACrC,IAAI,CAAC;gBACH,8DAA8D;gBAC9D,MAAM,IAAI,GAAG,CAAC,MAAM,MAAM,CAAC,MAAM,CAAC;oBAChC,CAAC,EAAE,SAAS;oBACZ,OAAO,EAAE,MAAM;iBAChB,CAAC,CAAQ,CAAC;gBAEX,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;gBAC9B,IAAI,IAAI,GAA6B,OAAO,CAAC;gBAC7C,IAAI,OAAO,GAAG,KAAK,CAAC;gBAEpB,IAAI,QAAQ,KAAK,oBAAoB,EAAE,CAAC;oBACtC,6DAA6D;oBAC7D,IAAI,CAAC;wBACH,8DAA8D;wBAC9D,MAAM,EAAE,GAAG,CAAC,MAAM,MAAM,CAAC,MAAM,CAAC;4BAC9B,CAAC,EAAE,eAAe;4BAClB,aAAa,EAAE,IAAI,CAAC,IAAI,CAAC,aAAa;yBACvC,CAAC,CAAQ,CAAC;wBAEX,IAAI,GAAG,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,YAAY,CAAC;wBAChD,OAAO,GAAG,EAAE,CAAC,QAAQ,IAAI,KAAK,CAAC;oBACjC,CAAC;oBAAC,MAAM,CAAC;wBACP,IAAI,GAAG,YAAY,CAAC;oBACtB,CAAC;gBACH,CAAC;qBAAM,IAAI,QAAQ,KAAK,oBAAoB,EAAE,CAAC;oBAC7C,IAAI,GAAG,OAAO,CAAC;gBACjB,CAAC;qBAAM,IAAI,QAAQ,KAAK,iBAAiB,IAAI,QAAQ,KAAK,gBAAgB,EAAE,CAAC;oBAC3E,IAAI,GAAG,SAAS,CAAC;gBACnB,CAAC;gBAED,wCAAwC;gBACxC,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,YAAY,EAAE,CAAC;oBAChD,KAAK,CAAC,IAAI,CAAC;wBACT,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC;wBACtB,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,QAAQ,MAAM,EAAE;wBACrC,IAAI;wBACJ,OAAO;qBACR,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,GAAG,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,GAAG,EAAE,EAAE,sCAAsC,CAAC,CAAC;YACpE,CAAC;QACH,CAAC;QAED,+EAA+E;QAC/E,oCAAoC;QACpC,OAAO,GAAG,KAAK,CAAC;QAEhB,MAAM,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;IACjC,CAAC;IAED,GAAG,CAAC,IAAI,CACN,EAAE,KAAK,EAAE,KAAK,CAAC,MAAM,EAAE,EACvB,4CAA4C,CAC7C,CAAC;IAEF,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,MAAc,EACd,MAAc;IAEd,8DAA8D;IAC9D,MAAM,MAAM,GAAG,CAAC,MAAM,MAAM,CAAC,MAAM,CAAC;QAClC,CAAC,EAAE,sBAAsB;QACzB,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC;QACvB,IAAI,EAAE,wBAAwB;QAC9B,oBAAoB,EAAE,KAAK;KAC5B,CAAC,CAAQ,CAAC;IAEX,MAAM,IAAI,GAAG,MAAM,CAAC,WAAqB,CAAC;IAC1C,GAAG,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,QAAQ,EAAE,EAAE,IAAI,EAAE,EAAE,uBAAuB,CAAC,CAAC;IACvE,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,MAAc,EACd,KAAa;IAEb,8DAA8D;IAC9D,MAAM,MAAM,GAAG,CAAC,MAAM,MAAM,CAAC,MAAM,CAAC;QAClC,CAAC,EAAE,yBAAyB;QAC5B,KAAK;QACL,QAAQ,EAAE,KAAK;QACf,UAAU,EAAE,KAAK;QACjB,WAAW,EAAE,4DAA4D;KAC1E,CAAC,CAAQ,CAAC;IAEX,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IACjC,GAAG,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,QAAQ,EAAE,EAAE,KAAK,EAAE,EAAE,wBAAwB,CAAC,CAAC;IACzE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,KAAK,EAAE,CAAC;AAClD,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,MAAc,EACd,UAAkB;IAElB,MAAM,MAAM,CAAC,MAAM,CAAC;QAClB,CAAC,EAAE,sBAAsB;QACzB,WAAW,EAAE,UAAU;KACxB,CAAC,CAAC;IACH,GAAG,CAAC,IAAI,CAAC,EAAE,UAAU,EAAE,EAAE,4BAA4B,CAAC,CAAC;AACzD,CAAC;AAED,SAAS,KAAK,CAAC,EAAU;IACvB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;AAC3D,CAAC"}

18
worker/dist/tdlib/client.d.ts vendored Normal file
View File

@@ -0,0 +1,18 @@
import { type Client } from "tdl";
interface AccountConfig {
id: string;
phone: string;
}
/**
* Create and authenticate a TDLib client for a Telegram account.
* Authentication flow communicates with the admin UI via the database:
* - Worker sets authState to AWAITING_CODE when TDLib asks for phone code
* - Admin enters the code via UI, which writes it to authCode field
* - Worker polls DB for the code and feeds it to TDLib
*/
export declare function createTdlibClient(account: AccountConfig): Promise<Client>;
/**
* Close a TDLib client gracefully.
*/
export declare function closeTdlibClient(client: Client): Promise<void>;
export {};

96
worker/dist/tdlib/client.js vendored Normal file
View File

@@ -0,0 +1,96 @@
import tdl, { createClient } from "tdl";
import { getTdjson } from "prebuilt-tdlib";
import path from "path";
import { config } from "../util/config.js";
import { childLogger } from "../util/logger.js";
import { updateAccountAuthState, getAccountAuthCode, } from "../db/queries.js";
const log = childLogger("tdlib-client");
// Configure tdl to use the prebuilt tdjson shared library
tdl.configure({ tdjson: getTdjson() });
/**
* Create and authenticate a TDLib client for a Telegram account.
* Authentication flow communicates with the admin UI via the database:
* - Worker sets authState to AWAITING_CODE when TDLib asks for phone code
* - Admin enters the code via UI, which writes it to authCode field
* - Worker polls DB for the code and feeds it to TDLib
*/
export async function createTdlibClient(account) {
const dbPath = path.join(config.tdlibStateDir, account.id);
const client = createClient({
apiId: config.telegramApiId,
apiHash: config.telegramApiHash,
databaseDirectory: dbPath,
filesDirectory: path.join(dbPath, "files"),
});
client.on("error", (err) => {
log.error({ err, accountId: account.id }, "TDLib client error");
});
try {
await client.login(() => ({
getPhoneNumber: async () => {
log.info({ accountId: account.id }, "TDLib requesting phone number");
return account.phone;
},
getAuthCode: async () => {
log.info({ accountId: account.id }, "TDLib requesting auth code");
await updateAccountAuthState(account.id, "AWAITING_CODE");
// Poll database for the code entered via admin UI
const code = await pollForAuthCode(account.id);
if (!code) {
throw new Error("Auth code not provided within timeout");
}
// Clear the code after reading
await updateAccountAuthState(account.id, "AUTHENTICATED", null);
return code;
},
getPassword: async () => {
log.info({ accountId: account.id }, "TDLib requesting 2FA password");
await updateAccountAuthState(account.id, "AWAITING_PASSWORD");
// Poll database for the password entered via admin UI
const code = await pollForAuthCode(account.id);
if (!code) {
throw new Error("2FA password not provided within timeout");
}
await updateAccountAuthState(account.id, "AUTHENTICATED", null);
return code;
},
}));
await updateAccountAuthState(account.id, "AUTHENTICATED");
log.info({ accountId: account.id }, "TDLib client authenticated");
return client;
}
catch (err) {
log.error({ err, accountId: account.id }, "TDLib authentication failed");
await updateAccountAuthState(account.id, "EXPIRED");
throw err;
}
}
/**
* Poll the database every 5 seconds for an auth code, up to 5 minutes.
*/
async function pollForAuthCode(accountId, timeoutMs = 300_000) {
const start = Date.now();
while (Date.now() - start < timeoutMs) {
const result = await getAccountAuthCode(accountId);
if (result?.authCode) {
return result.authCode;
}
await sleep(5000);
}
return null;
}
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Close a TDLib client gracefully.
*/
export async function closeTdlibClient(client) {
try {
await client.close();
}
catch (err) {
log.warn({ err }, "Error closing TDLib client");
}
}
//# sourceMappingURL=client.js.map

1
worker/dist/tdlib/client.js.map vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"client.js","sourceRoot":"","sources":["../../src/tdlib/client.ts"],"names":[],"mappings":"AAAA,OAAO,GAAG,EAAE,EAAE,YAAY,EAAe,MAAM,KAAK,CAAC;AACrD,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAC3C,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EACL,sBAAsB,EACtB,kBAAkB,GACnB,MAAM,kBAAkB,CAAC;AAE1B,MAAM,GAAG,GAAG,WAAW,CAAC,cAAc,CAAC,CAAC;AAExC,0DAA0D;AAC1D,GAAG,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,CAAC,CAAC;AAOvC;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,OAAsB;IAEtB,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,OAAO,CAAC,EAAE,CAAC,CAAC;IAE3D,MAAM,MAAM,GAAG,YAAY,CAAC;QAC1B,KAAK,EAAE,MAAM,CAAC,aAAa;QAC3B,OAAO,EAAE,MAAM,CAAC,eAAe;QAC/B,iBAAiB,EAAE,MAAM;QACzB,cAAc,EAAE,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,OAAO,CAAC;KAC3C,CAAC,CAAC;IAEH,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;QACzB,GAAG,CAAC,KAAK,CAAC,EAAE,GAAG,EAAE,SAAS,EAAE,OAAO,CAAC,EAAE,EAAE,EAAE,oBAAoB,CAAC,CAAC;IAClE,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,MAAM,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC;YACxB,cAAc,EAAE,KAAK,IAAI,EAAE;gBACzB,GAAG,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,EAAE,EAAE,EAAE,+BAA+B,CAAC,CAAC;gBACrE,OAAO,OAAO,CAAC,KAAK,CAAC;YACvB,CAAC;YACD,WAAW,EAAE,KAAK,IAAI,EAAE;gBACtB,GAAG,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,EAAE,EAAE,EAAE,4BAA4B,CAAC,CAAC;gBAClE,MAAM,sBAAsB,CAAC,OAAO,CAAC,EAAE,EAAE,eAAe,CAAC,CAAC;gBAE1D,kDAAkD;gBAClD,MAAM,IAAI,GAAG,MAAM,eAAe,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;gBAC/C,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;gBAC3D,CAAC;gBAED,+BAA+B;gBAC/B,MAAM,sBAAsB,CAAC,OAAO,CAAC,EAAE,EAAE,eAAe,EAAE,IAAI,CAAC,CAAC;gBAChE,OAAO,IAAI,CAAC;YACd,CAAC;YACD,WAAW,EAAE,KAAK,IAAI,EAAE;gBACtB,GAAG,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,EAAE,EAAE,EAAE,+BAA+B,CAAC,CAAC;gBACrE,MAAM,sBAAsB,CAAC,OAAO,CAAC,EAAE,EAAE,mBAAmB,CAAC,CAAC;gBAE9D,sDAAsD;gBACtD,MAAM,IAAI,GAAG,MAAM,eAAe,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;gBAC/C,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC,CAAC;gBAC9D,CAAC;gBAED,MAAM,sBAAsB,CAAC,OAAO,CAAC,EAAE,EAAE,eAAe,EAAE,IAAI,CAAC,CAAC;gBAChE,OAAO,IAAI,CAAC;YACd,CAAC;SACF,CAAC,CAAC,CAAC;QAEJ,MAAM,sBAAsB,CAAC,OAAO,CAAC,EAAE,EAAE,eAAe,CAAC,CAAC;QAC1D,GAAG,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,EAAE,EAAE,EAAE,4BAA4B,CAAC,CAAC;QAClE,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,KAAK,CAAC,EAAE,GAAG,EAAE,SAAS,EAAE,OAAO,CAAC,EAAE,EAAE,EAAE,6BAA6B,CAAC,CAAC;QACzE,MAAM,sBAAsB,CAAC,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC,CAAC;QACpD,MAAM,GAAG,CAAC;IACZ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,eAAe,CAC5B,SAAiB,EACjB,SAAS,GAAG,OAAO;IAEnB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,OAAO,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,GAAG,SAAS,EAAE,CAAC;QACtC,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,SAAS,CAAC,CAAC;QACnD,IAAI,MAAM,EAAE,QAAQ,EAAE,CAAC;YACrB,OAAO,MAAM,CAAC,QAAQ,CAAC;QACzB,CAAC;QACD,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC;IACpB,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,KAAK,CAAC,EAAU;IACvB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;AAC3D,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,MAAc;IACnD,IAAI,CAAC;QACH,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;IACvB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,EAAE,4BAA4B,CAAC,CAAC;IAClD,CAAC;AACH,CAAC"}

67
worker/dist/tdlib/download.d.ts vendored Normal file
View File

@@ -0,0 +1,67 @@
import type { Client } from "tdl";
import type { TelegramMessage } from "../archive/multipart.js";
import type { TelegramPhoto } from "../preview/match.js";
/** Maximum number of pages to scan per channel/topic to prevent infinite loops */
export declare const MAX_SCAN_PAGES = 5000;
/** Timeout for a single TDLib API call (ms) */
export declare const INVOKE_TIMEOUT_MS = 120000;
export interface ChannelScanResult {
archives: TelegramMessage[];
photos: TelegramPhoto[];
totalScanned: number;
}
export type ScanProgressCallback = (messagesScanned: number) => void;
/**
* Invoke a TDLib method with a timeout to prevent indefinite hangs.
* If TDLib does not respond within the timeout, the promise rejects.
*/
export declare function invokeWithTimeout<T>(client: Client, request: Record<string, any>, timeoutMs?: number): Promise<T>;
/**
* Fetch messages from a channel, stopping once we've scanned past the
* last-processed boundary (with one page of lookback for multipart safety).
* Collects both archive attachments AND photo messages (for preview matching).
* Returns messages in chronological order (oldest first).
*
* When `lastProcessedMessageId` is null (first run), scans everything.
* The worker applies a post-grouping filter to skip fully-processed sets,
* and keeps `packageExistsBySourceMessage` as a safety net.
*
* Safety features:
* - Max page limit to prevent infinite loops
* - Stuck detection: breaks if from_message_id stops advancing
* - Timeout on each TDLib API call
*/
export declare function getChannelMessages(client: Client, chatId: bigint, lastProcessedMessageId?: bigint | null, limit?: number, onProgress?: ScanProgressCallback): Promise<ChannelScanResult>;
/**
* Download a photo thumbnail from Telegram and return its raw bytes.
* Uses synchronous download (photos are small, typically < 100KB).
* Returns null if download fails (non-critical).
*/
export declare function downloadPhotoThumbnail(client: Client, fileId: string): Promise<Buffer | null>;
export interface DownloadProgress {
fileId: string;
fileName: string;
downloadedBytes: number;
totalBytes: number;
percent: number;
isComplete: boolean;
}
export type ProgressCallback = (progress: DownloadProgress) => void;
/**
* Download a file from Telegram to a local path with progress tracking
* and integrity verification.
*
* Progress flow:
* 1. Starts async download via TDLib
* 2. Listens for `updateFile` events to track download progress
* 3. Logs progress at every 10% increment
* 4. Once complete, verifies the local file size matches the expected size
* 5. Moves the file from TDLib's cache to the destination path
*
* Verification:
* - Compares actual file size on disk to the expected size from Telegram
* - Throws on mismatch (partial/corrupt download)
* - Throws on timeout (configurable, scales with file size)
* - Throws if download stops without completing (network error, etc.)
*/
export declare function downloadFile(client: Client, fileId: string, destPath: string, expectedSize: bigint, fileName: string, onProgress?: ProgressCallback): Promise<void>;

307
worker/dist/tdlib/download.js vendored Normal file
View File

@@ -0,0 +1,307 @@
import { readFile, rename, copyFile, unlink, stat } from "fs/promises";
import { config } from "../util/config.js";
import { childLogger } from "../util/logger.js";
import { isArchiveAttachment } from "../archive/detect.js";
const log = childLogger("download");
/** Maximum number of pages to scan per channel/topic to prevent infinite loops */
export const MAX_SCAN_PAGES = 5000;
/** Timeout for a single TDLib API call (ms) */
export const INVOKE_TIMEOUT_MS = 120_000; // 2 minutes
/**
* Invoke a TDLib method with a timeout to prevent indefinite hangs.
* If TDLib does not respond within the timeout, the promise rejects.
*/
export async function invokeWithTimeout(client,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
request, timeoutMs = INVOKE_TIMEOUT_MS) {
return new Promise((resolve, reject) => {
const timer = setTimeout(() => {
reject(new Error(`TDLib invoke timed out after ${timeoutMs}ms for ${request._}`));
}, timeoutMs);
client.invoke(request)
.then((result) => {
clearTimeout(timer);
resolve(result);
})
.catch((err) => {
clearTimeout(timer);
reject(err);
});
});
}
/**
* Fetch messages from a channel, stopping once we've scanned past the
* last-processed boundary (with one page of lookback for multipart safety).
* Collects both archive attachments AND photo messages (for preview matching).
* Returns messages in chronological order (oldest first).
*
* When `lastProcessedMessageId` is null (first run), scans everything.
* The worker applies a post-grouping filter to skip fully-processed sets,
* and keeps `packageExistsBySourceMessage` as a safety net.
*
* Safety features:
* - Max page limit to prevent infinite loops
* - Stuck detection: breaks if from_message_id stops advancing
* - Timeout on each TDLib API call
*/
export async function getChannelMessages(client, chatId, lastProcessedMessageId, limit = 100, onProgress) {
const archives = [];
const photos = [];
const boundary = lastProcessedMessageId ? Number(lastProcessedMessageId) : null;
let currentFromId = 0;
let totalScanned = 0;
let pageCount = 0;
// eslint-disable-next-line no-constant-condition
while (true) {
if (pageCount >= MAX_SCAN_PAGES) {
log.warn({ chatId: chatId.toString(), pageCount, totalScanned }, "Hit max page limit for channel scan, stopping");
break;
}
pageCount++;
const previousFromId = currentFromId;
const result = await invokeWithTimeout(client, {
_: "getChatHistory",
chat_id: Number(chatId),
from_message_id: currentFromId,
offset: 0,
limit: Math.min(limit, 100),
only_local: false,
});
if (!result.messages || result.messages.length === 0)
break;
totalScanned += result.messages.length;
for (const msg of result.messages) {
// Check for archive documents
const doc = msg.content?.document;
if (doc?.file_name && doc.document && isArchiveAttachment(doc.file_name)) {
archives.push({
id: BigInt(msg.id),
fileName: doc.file_name,
fileId: String(doc.document.id),
fileSize: BigInt(doc.document.size),
date: new Date(msg.date * 1000),
});
continue;
}
// Check for photo messages (potential previews)
const photo = msg.content?.photo;
const caption = msg.content?.caption?.text ?? "";
if (photo?.sizes && photo.sizes.length > 0) {
const smallest = photo.sizes[0];
photos.push({
id: BigInt(msg.id),
date: new Date(msg.date * 1000),
caption,
fileId: String(smallest.photo.id),
fileSize: smallest.photo.size || smallest.photo.expected_size,
});
}
}
// Report scanning progress after each page
onProgress?.(totalScanned);
currentFromId = result.messages[result.messages.length - 1].id;
// Stuck detection: if from_message_id didn't advance, break to prevent infinite loop
if (currentFromId === previousFromId) {
log.warn({ chatId: chatId.toString(), currentFromId, totalScanned }, "Pagination stuck (from_message_id not advancing), breaking");
break;
}
// Stop scanning once we've gone past the boundary (this page is the lookback)
if (boundary && currentFromId < boundary)
break;
if (result.messages.length < Math.min(limit, 100))
break;
// Rate limit delay
await sleep(config.apiDelayMs);
}
log.info({ chatId: chatId.toString(), archives: archives.length, photos: photos.length, totalScanned, pages: pageCount }, "Channel scan complete");
// Reverse to chronological order (oldest first) so worker processes old→new
return {
archives: archives.reverse(),
photos: photos.reverse(),
totalScanned,
};
}
/**
* Download a photo thumbnail from Telegram and return its raw bytes.
* Uses synchronous download (photos are small, typically < 100KB).
* Returns null if download fails (non-critical).
*/
export async function downloadPhotoThumbnail(client, fileId) {
const numericId = parseInt(fileId, 10);
try {
const result = (await client.invoke({
_: "downloadFile",
file_id: numericId,
priority: 1, // Low priority — thumbnails are nice-to-have
offset: 0,
limit: 0,
synchronous: true, // Small file — wait for it
}));
if (result?.local?.is_downloading_completed && result.local.path) {
const data = await readFile(result.local.path);
log.debug({ fileId, bytes: data.length }, "Downloaded photo thumbnail");
return data;
}
}
catch (err) {
log.warn({ fileId, err }, "Failed to download photo thumbnail");
}
return null;
}
/**
* Download a file from Telegram to a local path with progress tracking
* and integrity verification.
*
* Progress flow:
* 1. Starts async download via TDLib
* 2. Listens for `updateFile` events to track download progress
* 3. Logs progress at every 10% increment
* 4. Once complete, verifies the local file size matches the expected size
* 5. Moves the file from TDLib's cache to the destination path
*
* Verification:
* - Compares actual file size on disk to the expected size from Telegram
* - Throws on mismatch (partial/corrupt download)
* - Throws on timeout (configurable, scales with file size)
* - Throws if download stops without completing (network error, etc.)
*/
export async function downloadFile(client, fileId, destPath, expectedSize, fileName, onProgress) {
const numericId = parseInt(fileId, 10);
const totalBytes = Number(expectedSize);
log.info({ fileId, fileName, destPath, totalBytes }, "Starting file download");
// Report initial progress
onProgress?.({
fileId,
fileName,
downloadedBytes: 0,
totalBytes,
percent: 0,
isComplete: false,
});
return new Promise((resolve, reject) => {
let lastLoggedPercent = 0;
let settled = false;
// Timeout: 10 minutes per GB, minimum 5 minutes
const timeoutMs = Math.max(5 * 60_000, (totalBytes / (1024 * 1024 * 1024)) * 10 * 60_000);
const timer = setTimeout(() => {
if (!settled) {
settled = true;
cleanup();
reject(new Error(`Download timed out after ${Math.round(timeoutMs / 60_000)}min for ${fileName}`));
}
}, timeoutMs);
// Listen for file update events to track progress
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const handleUpdate = (update) => {
if (update?._ !== "updateFile")
return;
const file = update.file;
if (!file || file.id !== numericId)
return;
const downloaded = file.local.downloaded_size;
const percent = totalBytes > 0 ? Math.round((downloaded / totalBytes) * 100) : 0;
// Log at every 10% increment
if (percent >= lastLoggedPercent + 10) {
lastLoggedPercent = percent - (percent % 10);
log.info({ fileId, fileName, downloaded, totalBytes, percent: `${percent}%` }, "Download progress");
}
// Report to callback
onProgress?.({
fileId,
fileName,
downloadedBytes: downloaded,
totalBytes,
percent,
isComplete: file.local.is_downloading_completed,
});
// Download finished
if (file.local.is_downloading_completed) {
if (!settled) {
settled = true;
cleanup();
verifyAndMove(file.local.path, destPath, totalBytes, fileName, fileId)
.then(resolve)
.catch(reject);
}
}
// Download stopped without completing (network error, cancelled, etc.)
if (!file.local.is_downloading_active &&
!file.local.is_downloading_completed) {
if (!settled) {
settled = true;
cleanup();
reject(new Error(`Download stopped unexpectedly for ${fileName} ` +
`(${downloaded}/${totalBytes} bytes, ${percent}%)`));
}
}
};
const cleanup = () => {
clearTimeout(timer);
client.off("update", handleUpdate);
};
// Subscribe to updates BEFORE starting download
client.on("update", handleUpdate);
// Start async download (non-blocking — progress via updateFile events)
client
.invoke({
_: "downloadFile",
file_id: numericId,
priority: 32,
offset: 0,
limit: 0,
synchronous: false,
})
.then((result) => {
// If the file was already cached locally, invoke returns immediately
const file = result;
if (file?.local?.is_downloading_completed && !settled) {
settled = true;
cleanup();
verifyAndMove(file.local.path, destPath, totalBytes, fileName, fileId)
.then(resolve)
.catch(reject);
}
})
.catch((err) => {
if (!settled) {
settled = true;
cleanup();
reject(err);
}
});
});
}
/**
* Verify the downloaded file's size matches the expected size,
* then move it to the destination path.
*/
async function verifyAndMove(localPath, destPath, expectedBytes, fileName, fileId) {
const stats = await stat(localPath);
const actualBytes = stats.size;
if (expectedBytes > 0 && actualBytes !== expectedBytes) {
log.error({ fileId, fileName, expectedBytes, actualBytes }, "Download size mismatch — file is incomplete or corrupted");
throw new Error(`Download verification failed for ${fileName}: ` +
`expected ${expectedBytes} bytes, got ${actualBytes} bytes`);
}
log.info({ fileId, fileName, bytes: actualBytes, destPath }, "File verified and complete");
// Move from TDLib's cache to our temp directory.
// Use rename first (fast, same filesystem), fall back to copy+delete
// when source and destination are on different filesystems (EXDEV).
try {
await rename(localPath, destPath);
}
catch (err) {
if (err.code === "EXDEV") {
log.debug({ fileId, fileName }, "Cross-device rename — falling back to copy + unlink");
await copyFile(localPath, destPath);
await unlink(localPath);
}
else {
throw err;
}
}
}
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
//# sourceMappingURL=download.js.map

1
worker/dist/tdlib/download.js.map vendored Normal file

File diff suppressed because one or more lines are too long

32
worker/dist/tdlib/topics.d.ts vendored Normal file
View File

@@ -0,0 +1,32 @@
import type { Client } from "tdl";
import type { ChannelScanResult, ScanProgressCallback } from "./download.js";
export interface ForumTopic {
topicId: bigint;
name: string;
}
/**
* Check if a chat is a forum supergroup (topics enabled).
*/
export declare function isChatForum(client: Client, chatId: bigint): Promise<boolean>;
/**
* Get all forum topics in a supergroup.
* Includes stuck detection and timeout protection on API calls.
*/
export declare function getForumTopicList(client: Client, chatId: bigint): Promise<ForumTopic[]>;
/**
* Fetch messages from a specific forum topic (thread), stopping once
* we've scanned past the last-processed boundary (with one page of lookback).
* Uses searchChatMessages with message_thread_id to scan within a topic.
*
* Returns messages in chronological order (oldest first).
*
* When `lastProcessedMessageId` is null (first run), scans everything.
* The worker applies a post-grouping filter to skip fully-processed sets,
* and keeps `packageExistsBySourceMessage` as a safety net.
*
* Safety features:
* - Max page limit to prevent infinite loops
* - Stuck detection: breaks if from_message_id stops advancing
* - Timeout on each TDLib API call
*/
export declare function getTopicMessages(client: Client, chatId: bigint, topicId: bigint, lastProcessedMessageId?: bigint | null, limit?: number, onProgress?: ScanProgressCallback): Promise<ChannelScanResult>;

196
worker/dist/tdlib/topics.js vendored Normal file
View File

@@ -0,0 +1,196 @@
import { config } from "../util/config.js";
import { childLogger } from "../util/logger.js";
import { isArchiveAttachment } from "../archive/detect.js";
import { invokeWithTimeout, MAX_SCAN_PAGES } from "./download.js";
const log = childLogger("topics");
/**
* Check if a chat is a forum supergroup (topics enabled).
*/
export async function isChatForum(client, chatId) {
try {
const chat = await invokeWithTimeout(client, {
_: "getChat",
chat_id: Number(chatId),
});
if (chat.type?._ === "chatTypeSupergroup" && chat.type.is_forum) {
return true;
}
// Also check via getSupergroup for older TDLib versions
if (chat.type?._ === "chatTypeSupergroup" && chat.type.supergroup_id) {
const sg = await invokeWithTimeout(client, {
_: "getSupergroup",
supergroup_id: chat.type.supergroup_id,
});
return sg.is_forum === true;
}
return false;
}
catch (err) {
log.warn({ err, chatId: chatId.toString() }, "Failed to check if chat is forum");
return false;
}
}
/**
* Get all forum topics in a supergroup.
* Includes stuck detection and timeout protection on API calls.
*/
export async function getForumTopicList(client, chatId) {
const topics = [];
let offsetDate = 0;
let offsetMessageId = 0;
let offsetMessageThreadId = 0;
let pageCount = 0;
// eslint-disable-next-line no-constant-condition
while (true) {
if (pageCount >= MAX_SCAN_PAGES) {
log.warn({ chatId: chatId.toString(), pageCount, topicCount: topics.length }, "Hit max page limit for topic enumeration, stopping");
break;
}
pageCount++;
const prevOffsetDate = offsetDate;
const prevOffsetMessageId = offsetMessageId;
const prevOffsetMessageThreadId = offsetMessageThreadId;
const result = await invokeWithTimeout(client, {
_: "getForumTopics",
chat_id: Number(chatId),
query: "",
offset_date: offsetDate,
offset_message_id: offsetMessageId,
offset_message_thread_id: offsetMessageThreadId,
limit: 100,
});
if (!result.topics || result.topics.length === 0)
break;
for (const t of result.topics) {
if (!t.info?.message_thread_id)
continue;
// Skip the "General" topic — it's not creator-specific
if (t.info.is_general)
continue;
topics.push({
topicId: BigInt(t.info.message_thread_id),
name: t.info.name ?? "Unnamed",
});
}
// Check if there are more pages
if (!result.next_offset_date &&
!result.next_offset_message_id &&
!result.next_offset_message_thread_id) {
break;
}
offsetDate = result.next_offset_date ?? 0;
offsetMessageId = result.next_offset_message_id ?? 0;
offsetMessageThreadId = result.next_offset_message_thread_id ?? 0;
// Stuck detection: if offsets didn't advance, break
if (offsetDate === prevOffsetDate &&
offsetMessageId === prevOffsetMessageId &&
offsetMessageThreadId === prevOffsetMessageThreadId) {
log.warn({ chatId: chatId.toString(), topicCount: topics.length }, "Topic pagination stuck (offsets not advancing), breaking");
break;
}
await sleep(config.apiDelayMs);
}
log.info({ chatId: chatId.toString(), topicCount: topics.length }, "Enumerated forum topics");
return topics;
}
/**
* Fetch messages from a specific forum topic (thread), stopping once
* we've scanned past the last-processed boundary (with one page of lookback).
* Uses searchChatMessages with message_thread_id to scan within a topic.
*
* Returns messages in chronological order (oldest first).
*
* When `lastProcessedMessageId` is null (first run), scans everything.
* The worker applies a post-grouping filter to skip fully-processed sets,
* and keeps `packageExistsBySourceMessage` as a safety net.
*
* Safety features:
* - Max page limit to prevent infinite loops
* - Stuck detection: breaks if from_message_id stops advancing
* - Timeout on each TDLib API call
*/
export async function getTopicMessages(client, chatId, topicId, lastProcessedMessageId, limit = 100, onProgress) {
const archives = [];
const photos = [];
const boundary = lastProcessedMessageId ? Number(lastProcessedMessageId) : null;
let currentFromId = 0;
let totalScanned = 0;
let pageCount = 0;
// eslint-disable-next-line no-constant-condition
while (true) {
if (pageCount >= MAX_SCAN_PAGES) {
log.warn({ chatId: chatId.toString(), topicId: topicId.toString(), pageCount, totalScanned }, "Hit max page limit for topic scan, stopping");
break;
}
pageCount++;
const previousFromId = currentFromId;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const result = await invokeWithTimeout(client, {
_: "searchChatMessages",
chat_id: Number(chatId),
query: "",
message_thread_id: Number(topicId),
from_message_id: currentFromId,
offset: 0,
limit: Math.min(limit, 100),
filter: null,
sender_id: null,
saved_messages_topic_id: 0,
});
if (!result.messages || result.messages.length === 0)
break;
totalScanned += result.messages.length;
for (const msg of result.messages) {
// Check for archive documents
const doc = msg.content?.document;
if (doc?.file_name && doc.document && isArchiveAttachment(doc.file_name)) {
archives.push({
id: BigInt(msg.id),
fileName: doc.file_name,
fileId: String(doc.document.id),
fileSize: BigInt(doc.document.size),
date: new Date(msg.date * 1000),
});
continue;
}
// Check for photo messages (potential previews)
const photo = msg.content?.photo;
const caption = msg.content?.caption?.text ?? "";
if (photo?.sizes && photo.sizes.length > 0) {
const smallest = photo.sizes[0];
photos.push({
id: BigInt(msg.id),
date: new Date(msg.date * 1000),
caption,
fileId: String(smallest.photo.id),
fileSize: smallest.photo.size || smallest.photo.expected_size,
});
}
}
// Report scanning progress after each page
onProgress?.(totalScanned);
currentFromId = result.messages[result.messages.length - 1].id;
// Stuck detection: if from_message_id didn't advance, break to prevent infinite loop
if (currentFromId === previousFromId) {
log.warn({ chatId: chatId.toString(), topicId: topicId.toString(), currentFromId, totalScanned }, "Topic pagination stuck (from_message_id not advancing), breaking");
break;
}
// Stop scanning once we've gone past the boundary (this page is the lookback)
if (boundary && currentFromId < boundary)
break;
if (result.messages.length < Math.min(limit, 100))
break;
await sleep(config.apiDelayMs);
}
log.info({ chatId: chatId.toString(), topicId: topicId.toString(), archives: archives.length, photos: photos.length, totalScanned, pages: pageCount }, "Topic scan complete");
// Reverse to chronological order (oldest first) so worker processes old→new
return {
archives: archives.reverse(),
photos: photos.reverse(),
totalScanned,
};
}
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
//# sourceMappingURL=topics.js.map

1
worker/dist/tdlib/topics.js.map vendored Normal file

File diff suppressed because one or more lines are too long