mirror of
https://github.com/xCyanGrizzly/DragonsStash.git
synced 2026-05-11 06:11:15 +00:00
fix: auto-recover from TDLib upload stalls by recreating client
When TDLib's event stream degrades, uploads complete (bytes sent) but confirmations never arrive. Previously the worker retried 3x with the same broken client, wasting 60+ min per archive and holding the mutex. - Add UploadStallError class to distinguish stalls from other failures - Reduce stall detection timeout from 5min to 3min (faster detection) - Recreate TDLib client after consecutive upload stalls instead of retrying on the same degraded connection - Add forceReleaseMutex() to prevent cascade failures when one account blocks others via stuck mutex after cycle timeout Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
import { config } from "./util/config.js";
|
||||
import { childLogger } from "./util/logger.js";
|
||||
import { withTdlibMutex } from "./util/mutex.js";
|
||||
import { withTdlibMutex, forceReleaseMutex } from "./util/mutex.js";
|
||||
import { getActiveAccounts, getPendingAccounts } from "./db/queries.js";
|
||||
import { runWorkerForAccount, authenticateAccount } from "./worker.js";
|
||||
import { runIntegrityAudit } from "./audit.js";
|
||||
@@ -90,10 +90,18 @@ async function runCycle(): Promise<void> {
|
||||
|
||||
for (let i = 0; i < results.length; i++) {
|
||||
if (results[i].status === "rejected") {
|
||||
const reason = (results[i] as PromiseRejectedResult).reason;
|
||||
log.error(
|
||||
{ phone: accounts[i].phone, err: (results[i] as PromiseRejectedResult).reason },
|
||||
{ phone: accounts[i].phone, err: reason },
|
||||
"Account ingestion failed"
|
||||
);
|
||||
// If the cycle timed out, force-release the mutex so the next cycle
|
||||
// (or other operations like fetch-channels) can proceed immediately
|
||||
// instead of waiting 30 minutes for the mutex timeout.
|
||||
const errMsg = reason instanceof Error ? reason.message : String(reason);
|
||||
if (errMsg.includes("timed out") || errMsg.includes("mutex wait timeout")) {
|
||||
forceReleaseMutex(accounts[i].phone);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user