Fix worker getting stuck during sync: add timeouts, stuck detection, and safety limits

- Add invokeWithTimeout wrapper for TDLib API calls (2min timeout per call)
- Add stuck detection to getChannelMessages: break if from_message_id doesn't advance
- Add stuck detection to getTopicMessages: same protection for topic scanning
- Add stuck detection to getForumTopicList: break if pagination offsets don't advance
- Add max page limit (5000) to all scanning loops to prevent infinite pagination
- Add mutex wait timeout (30min) to prevent indefinite blocking when holder hangs
- Add cycle timeout (4h default, configurable via WORKER_CYCLE_TIMEOUT_MINUTES)
- Fix end-of-page detection to use actual limit value instead of hardcoded 100

Co-authored-by: xCyanGrizzly <53275238+xCyanGrizzly@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2026-03-05 13:14:53 +00:00
parent ad71346468
commit 9adbdb2a77
73 changed files with 3945 additions and 40 deletions

View File

@@ -4,12 +4,21 @@ const log = childLogger("mutex");
let locked = false;
let holder = "";
const queue: Array<{ resolve: () => void; label: string }> = [];
const queue: Array<{ resolve: () => void; reject: (err: Error) => void; label: string }> = [];
/**
* Maximum time to wait for the TDLib mutex (ms).
* If the mutex is not available within this time, the operation is rejected.
* Default: 30 minutes (long enough for large downloads, short enough to detect hangs).
*/
const MUTEX_WAIT_TIMEOUT_MS = 30 * 60 * 1000;
/**
* Ensures only one TDLib client runs at a time across the entire worker process.
* Both the scheduler (auth, ingestion) and the fetch listener acquire this
* before creating any TDLib client.
*
* Includes a wait timeout to prevent indefinite blocking if the current holder hangs.
*/
export async function withTdlibMutex<T>(
label: string,
@@ -17,7 +26,29 @@ export async function withTdlibMutex<T>(
): Promise<T> {
if (locked) {
log.info({ waiting: label, holder }, "Waiting for TDLib mutex");
await new Promise<void>((resolve) => queue.push({ resolve, label }));
await new Promise<void>((resolve, reject) => {
const entry = { resolve, reject, label };
queue.push(entry);
// Timeout: reject if we've been waiting too long
const timer = setTimeout(() => {
const idx = queue.indexOf(entry);
if (idx !== -1) {
queue.splice(idx, 1);
reject(new Error(
`TDLib mutex wait timeout after ${MUTEX_WAIT_TIMEOUT_MS / 60_000}min ` +
`(waiting: ${label}, holder: ${holder})`
));
}
}, MUTEX_WAIT_TIMEOUT_MS);
// Wrap resolve to clear the timer
const origResolve = entry.resolve;
entry.resolve = () => {
clearTimeout(timer);
origResolve();
};
});
}
locked = true;