Fix worker getting stuck during sync: add timeouts, stuck detection, and safety limits

- Add invokeWithTimeout wrapper for TDLib API calls (2min timeout per call)
- Add stuck detection to getChannelMessages: break if from_message_id doesn't advance
- Add stuck detection to getTopicMessages: same protection for topic scanning
- Add stuck detection to getForumTopicList: break if pagination offsets don't advance
- Add max page limit (5000) to all scanning loops to prevent infinite pagination
- Add mutex wait timeout (30min) to prevent indefinite blocking when holder hangs
- Add cycle timeout (4h default, configurable via WORKER_CYCLE_TIMEOUT_MINUTES)
- Fix end-of-page detection to use actual limit value instead of hardcoded 100

Co-authored-by: xCyanGrizzly <53275238+xCyanGrizzly@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2026-03-05 13:14:53 +00:00
parent ad71346468
commit 9adbdb2a77
73 changed files with 3945 additions and 40 deletions

View File

@@ -8,6 +8,12 @@ import type { TelegramPhoto } from "../preview/match.js";
const log = childLogger("download");
/** Maximum number of pages to scan per channel/topic to prevent infinite loops */
export const MAX_SCAN_PAGES = 5000;
/** Timeout for a single TDLib API call (ms) */
export const INVOKE_TIMEOUT_MS = 120_000; // 2 minutes
interface TdPhotoSize {
type: string;
photo: {
@@ -71,6 +77,33 @@ export interface ChannelScanResult {
export type ScanProgressCallback = (messagesScanned: number) => void;
/**
* Invoke a TDLib method with a timeout to prevent indefinite hangs.
* If TDLib does not respond within the timeout, the promise rejects.
*/
export async function invokeWithTimeout<T>(
client: Client,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
request: Record<string, any>,
timeoutMs = INVOKE_TIMEOUT_MS
): Promise<T> {
return new Promise<T>((resolve, reject) => {
const timer = setTimeout(() => {
reject(new Error(`TDLib invoke timed out after ${timeoutMs}ms for ${request._}`));
}, timeoutMs);
(client.invoke(request) as Promise<T>)
.then((result) => {
clearTimeout(timer);
resolve(result);
})
.catch((err) => {
clearTimeout(timer);
reject(err);
});
});
}
/**
* Fetch messages from a channel, stopping once we've scanned past the
* last-processed boundary (with one page of lookback for multipart safety).
@@ -80,6 +113,11 @@ export type ScanProgressCallback = (messagesScanned: number) => void;
* When `lastProcessedMessageId` is null (first run), scans everything.
* The worker applies a post-grouping filter to skip fully-processed sets,
* and keeps `packageExistsBySourceMessage` as a safety net.
*
* Safety features:
* - Max page limit to prevent infinite loops
* - Stuck detection: breaks if from_message_id stops advancing
* - Timeout on each TDLib API call
*/
export async function getChannelMessages(
client: Client,
@@ -94,17 +132,29 @@ export async function getChannelMessages(
let currentFromId = 0;
let totalScanned = 0;
let pageCount = 0;
// eslint-disable-next-line no-constant-condition
while (true) {
const result = (await client.invoke({
if (pageCount >= MAX_SCAN_PAGES) {
log.warn(
{ chatId: chatId.toString(), pageCount, totalScanned },
"Hit max page limit for channel scan, stopping"
);
break;
}
pageCount++;
const previousFromId = currentFromId;
const result = await invokeWithTimeout<{ messages: TdMessage[] }>(client, {
_: "getChatHistory",
chat_id: Number(chatId),
from_message_id: currentFromId,
offset: 0,
limit: Math.min(limit, 100),
only_local: false,
})) as { messages: TdMessage[] };
});
if (!result.messages || result.messages.length === 0) break;
@@ -144,17 +194,26 @@ export async function getChannelMessages(
currentFromId = result.messages[result.messages.length - 1].id;
// Stuck detection: if from_message_id didn't advance, break to prevent infinite loop
if (currentFromId === previousFromId) {
log.warn(
{ chatId: chatId.toString(), currentFromId, totalScanned },
"Pagination stuck (from_message_id not advancing), breaking"
);
break;
}
// Stop scanning once we've gone past the boundary (this page is the lookback)
if (boundary && currentFromId < boundary) break;
if (result.messages.length < 100) break;
if (result.messages.length < Math.min(limit, 100)) break;
// Rate limit delay
await sleep(config.apiDelayMs);
}
log.info(
{ chatId: chatId.toString(), archives: archives.length, photos: photos.length, totalScanned },
{ chatId: chatId.toString(), archives: archives.length, photos: photos.length, totalScanned, pages: pageCount },
"Channel scan complete"
);