Fix worker getting stuck during sync: add timeouts, stuck detection, and safety limits

- Add invokeWithTimeout wrapper for TDLib API calls (2min timeout per call)
- Add stuck detection to getChannelMessages: break if from_message_id doesn't advance
- Add stuck detection to getTopicMessages: same protection for topic scanning
- Add stuck detection to getForumTopicList: break if pagination offsets don't advance
- Add max page limit (5000) to all scanning loops to prevent infinite pagination
- Add mutex wait timeout (30min) to prevent indefinite blocking when holder hangs
- Add cycle timeout (4h default, configurable via WORKER_CYCLE_TIMEOUT_MINUTES)
- Fix end-of-page detection to use actual limit value instead of hardcoded 100

Co-authored-by: xCyanGrizzly <53275238+xCyanGrizzly@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2026-03-05 13:14:53 +00:00
parent ad71346468
commit 9adbdb2a77
73 changed files with 3945 additions and 40 deletions

307
worker/dist/tdlib/download.js vendored Normal file
View File

@@ -0,0 +1,307 @@
import { readFile, rename, copyFile, unlink, stat } from "fs/promises";
import { config } from "../util/config.js";
import { childLogger } from "../util/logger.js";
import { isArchiveAttachment } from "../archive/detect.js";
const log = childLogger("download");
/** Maximum number of pages to scan per channel/topic to prevent infinite loops */
export const MAX_SCAN_PAGES = 5000;
/** Timeout for a single TDLib API call (ms) */
export const INVOKE_TIMEOUT_MS = 120_000; // 2 minutes
/**
* Invoke a TDLib method with a timeout to prevent indefinite hangs.
* If TDLib does not respond within the timeout, the promise rejects.
*/
export async function invokeWithTimeout(client,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
request, timeoutMs = INVOKE_TIMEOUT_MS) {
return new Promise((resolve, reject) => {
const timer = setTimeout(() => {
reject(new Error(`TDLib invoke timed out after ${timeoutMs}ms for ${request._}`));
}, timeoutMs);
client.invoke(request)
.then((result) => {
clearTimeout(timer);
resolve(result);
})
.catch((err) => {
clearTimeout(timer);
reject(err);
});
});
}
/**
* Fetch messages from a channel, stopping once we've scanned past the
* last-processed boundary (with one page of lookback for multipart safety).
* Collects both archive attachments AND photo messages (for preview matching).
* Returns messages in chronological order (oldest first).
*
* When `lastProcessedMessageId` is null (first run), scans everything.
* The worker applies a post-grouping filter to skip fully-processed sets,
* and keeps `packageExistsBySourceMessage` as a safety net.
*
* Safety features:
* - Max page limit to prevent infinite loops
* - Stuck detection: breaks if from_message_id stops advancing
* - Timeout on each TDLib API call
*/
export async function getChannelMessages(client, chatId, lastProcessedMessageId, limit = 100, onProgress) {
const archives = [];
const photos = [];
const boundary = lastProcessedMessageId ? Number(lastProcessedMessageId) : null;
let currentFromId = 0;
let totalScanned = 0;
let pageCount = 0;
// eslint-disable-next-line no-constant-condition
while (true) {
if (pageCount >= MAX_SCAN_PAGES) {
log.warn({ chatId: chatId.toString(), pageCount, totalScanned }, "Hit max page limit for channel scan, stopping");
break;
}
pageCount++;
const previousFromId = currentFromId;
const result = await invokeWithTimeout(client, {
_: "getChatHistory",
chat_id: Number(chatId),
from_message_id: currentFromId,
offset: 0,
limit: Math.min(limit, 100),
only_local: false,
});
if (!result.messages || result.messages.length === 0)
break;
totalScanned += result.messages.length;
for (const msg of result.messages) {
// Check for archive documents
const doc = msg.content?.document;
if (doc?.file_name && doc.document && isArchiveAttachment(doc.file_name)) {
archives.push({
id: BigInt(msg.id),
fileName: doc.file_name,
fileId: String(doc.document.id),
fileSize: BigInt(doc.document.size),
date: new Date(msg.date * 1000),
});
continue;
}
// Check for photo messages (potential previews)
const photo = msg.content?.photo;
const caption = msg.content?.caption?.text ?? "";
if (photo?.sizes && photo.sizes.length > 0) {
const smallest = photo.sizes[0];
photos.push({
id: BigInt(msg.id),
date: new Date(msg.date * 1000),
caption,
fileId: String(smallest.photo.id),
fileSize: smallest.photo.size || smallest.photo.expected_size,
});
}
}
// Report scanning progress after each page
onProgress?.(totalScanned);
currentFromId = result.messages[result.messages.length - 1].id;
// Stuck detection: if from_message_id didn't advance, break to prevent infinite loop
if (currentFromId === previousFromId) {
log.warn({ chatId: chatId.toString(), currentFromId, totalScanned }, "Pagination stuck (from_message_id not advancing), breaking");
break;
}
// Stop scanning once we've gone past the boundary (this page is the lookback)
if (boundary && currentFromId < boundary)
break;
if (result.messages.length < Math.min(limit, 100))
break;
// Rate limit delay
await sleep(config.apiDelayMs);
}
log.info({ chatId: chatId.toString(), archives: archives.length, photos: photos.length, totalScanned, pages: pageCount }, "Channel scan complete");
// Reverse to chronological order (oldest first) so worker processes old→new
return {
archives: archives.reverse(),
photos: photos.reverse(),
totalScanned,
};
}
/**
* Download a photo thumbnail from Telegram and return its raw bytes.
* Uses synchronous download (photos are small, typically < 100KB).
* Returns null if download fails (non-critical).
*/
export async function downloadPhotoThumbnail(client, fileId) {
const numericId = parseInt(fileId, 10);
try {
const result = (await client.invoke({
_: "downloadFile",
file_id: numericId,
priority: 1, // Low priority — thumbnails are nice-to-have
offset: 0,
limit: 0,
synchronous: true, // Small file — wait for it
}));
if (result?.local?.is_downloading_completed && result.local.path) {
const data = await readFile(result.local.path);
log.debug({ fileId, bytes: data.length }, "Downloaded photo thumbnail");
return data;
}
}
catch (err) {
log.warn({ fileId, err }, "Failed to download photo thumbnail");
}
return null;
}
/**
* Download a file from Telegram to a local path with progress tracking
* and integrity verification.
*
* Progress flow:
* 1. Starts async download via TDLib
* 2. Listens for `updateFile` events to track download progress
* 3. Logs progress at every 10% increment
* 4. Once complete, verifies the local file size matches the expected size
* 5. Moves the file from TDLib's cache to the destination path
*
* Verification:
* - Compares actual file size on disk to the expected size from Telegram
* - Throws on mismatch (partial/corrupt download)
* - Throws on timeout (configurable, scales with file size)
* - Throws if download stops without completing (network error, etc.)
*/
export async function downloadFile(client, fileId, destPath, expectedSize, fileName, onProgress) {
const numericId = parseInt(fileId, 10);
const totalBytes = Number(expectedSize);
log.info({ fileId, fileName, destPath, totalBytes }, "Starting file download");
// Report initial progress
onProgress?.({
fileId,
fileName,
downloadedBytes: 0,
totalBytes,
percent: 0,
isComplete: false,
});
return new Promise((resolve, reject) => {
let lastLoggedPercent = 0;
let settled = false;
// Timeout: 10 minutes per GB, minimum 5 minutes
const timeoutMs = Math.max(5 * 60_000, (totalBytes / (1024 * 1024 * 1024)) * 10 * 60_000);
const timer = setTimeout(() => {
if (!settled) {
settled = true;
cleanup();
reject(new Error(`Download timed out after ${Math.round(timeoutMs / 60_000)}min for ${fileName}`));
}
}, timeoutMs);
// Listen for file update events to track progress
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const handleUpdate = (update) => {
if (update?._ !== "updateFile")
return;
const file = update.file;
if (!file || file.id !== numericId)
return;
const downloaded = file.local.downloaded_size;
const percent = totalBytes > 0 ? Math.round((downloaded / totalBytes) * 100) : 0;
// Log at every 10% increment
if (percent >= lastLoggedPercent + 10) {
lastLoggedPercent = percent - (percent % 10);
log.info({ fileId, fileName, downloaded, totalBytes, percent: `${percent}%` }, "Download progress");
}
// Report to callback
onProgress?.({
fileId,
fileName,
downloadedBytes: downloaded,
totalBytes,
percent,
isComplete: file.local.is_downloading_completed,
});
// Download finished
if (file.local.is_downloading_completed) {
if (!settled) {
settled = true;
cleanup();
verifyAndMove(file.local.path, destPath, totalBytes, fileName, fileId)
.then(resolve)
.catch(reject);
}
}
// Download stopped without completing (network error, cancelled, etc.)
if (!file.local.is_downloading_active &&
!file.local.is_downloading_completed) {
if (!settled) {
settled = true;
cleanup();
reject(new Error(`Download stopped unexpectedly for ${fileName} ` +
`(${downloaded}/${totalBytes} bytes, ${percent}%)`));
}
}
};
const cleanup = () => {
clearTimeout(timer);
client.off("update", handleUpdate);
};
// Subscribe to updates BEFORE starting download
client.on("update", handleUpdate);
// Start async download (non-blocking — progress via updateFile events)
client
.invoke({
_: "downloadFile",
file_id: numericId,
priority: 32,
offset: 0,
limit: 0,
synchronous: false,
})
.then((result) => {
// If the file was already cached locally, invoke returns immediately
const file = result;
if (file?.local?.is_downloading_completed && !settled) {
settled = true;
cleanup();
verifyAndMove(file.local.path, destPath, totalBytes, fileName, fileId)
.then(resolve)
.catch(reject);
}
})
.catch((err) => {
if (!settled) {
settled = true;
cleanup();
reject(err);
}
});
});
}
/**
* Verify the downloaded file's size matches the expected size,
* then move it to the destination path.
*/
async function verifyAndMove(localPath, destPath, expectedBytes, fileName, fileId) {
const stats = await stat(localPath);
const actualBytes = stats.size;
if (expectedBytes > 0 && actualBytes !== expectedBytes) {
log.error({ fileId, fileName, expectedBytes, actualBytes }, "Download size mismatch — file is incomplete or corrupted");
throw new Error(`Download verification failed for ${fileName}: ` +
`expected ${expectedBytes} bytes, got ${actualBytes} bytes`);
}
log.info({ fileId, fileName, bytes: actualBytes, destPath }, "File verified and complete");
// Move from TDLib's cache to our temp directory.
// Use rename first (fast, same filesystem), fall back to copy+delete
// when source and destination are on different filesystems (EXDEV).
try {
await rename(localPath, destPath);
}
catch (err) {
if (err.code === "EXDEV") {
log.debug({ fileId, fileName }, "Cross-device rename — falling back to copy + unlink");
await copyFile(localPath, destPath);
await unlink(localPath);
}
else {
throw err;
}
}
}
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
//# sourceMappingURL=download.js.map