mirror of
https://github.com/xCyanGrizzly/DragonsStash.git
synced 2026-06-13 04:31:16 +00:00
Driven by a real production case: secondary account was attached to 17
source channels but ingesting only ~2-3 archives per cycle. Log analysis
showed three distinct issues that this commit addresses.
1. Auto-retry cap (WORKER_MAX_SKIP_ATTEMPTS, default 5)
processArchiveSets now filters out SkippedPackage rows whose
attemptCount has reached the cap. Removing them from the working
list means they are not tracked in minFailedId, so the watermark
cap from d99a506 does not pin progress below them anymore. A bad
file no longer blocks the rest of the channel forever; the user
can manually retry via the UI to reset the count.
2. Account phone in error messages
Every SkippedPackage row and SystemNotification produced from a
failure is now prefixed with [<phone>] in errorMessage / message,
and the JSON context includes accountPhone. When two accounts
share a source channel and only one is failing, the UI tells you
which one.
3. Explicit getChat for destination at run start
loadChats only loads main/archive/folder chat lists. If an account
archived or moved the destination chat, sendMessage failed silently
per-archive. Now we getChat the destination once per cycle; on
failure we record a SystemNotification and skip the account's
entire ingestion cycle (no point downloading what we can't upload).
4. Retry on transient Telegram server errors
The "Turnbase Delivery Folder.7z" failure on the secondary and
"10. Kingdom of the Depth.part1.rar" on the main were both
"Internal Server Error during file upload" — a TG-side hiccup, not
a stall or FLOOD_WAIT. These now retry up to MAX_UPLOAD_RETRIES
with linear backoff (15s, 30s, 45s + jitter) before giving up.
5. Channel-access-lost notification
"Iridium 2 w/ Add-ons [Completed]" has been throwing
"Can't access the chat" every cycle for the secondary. The worker
now surfaces a CHANNEL_ACCESS_LOST notification (deduped to once per
24h per channel/account) so the admin sees it and can re-join or
unlink the channel instead of just losing visibility into the loop.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
370 lines
12 KiB
TypeScript
370 lines
12 KiB
TypeScript
import path from "path";
|
|
import { stat } from "fs/promises";
|
|
import type { Client } from "tdl";
|
|
import { config } from "../util/config.js";
|
|
import { childLogger } from "../util/logger.js";
|
|
import { withFloodWait, extractFloodWaitSeconds } from "../util/retry.js";
|
|
|
|
const log = childLogger("upload");
|
|
|
|
/**
|
|
* Custom error class to distinguish upload stalls from other errors.
|
|
* When consecutive stalls occur, the caller can use this signal to
|
|
* recreate the TDLib client (whose event stream may have degraded).
|
|
*/
|
|
export class UploadStallError extends Error {
|
|
constructor(message: string) {
|
|
super(message);
|
|
this.name = "UploadStallError";
|
|
}
|
|
}
|
|
|
|
export interface UploadResult {
|
|
messageId: bigint;
|
|
messageIds: bigint[];
|
|
}
|
|
|
|
/**
|
|
* Upload one or more files to a destination Telegram channel.
|
|
* For multipart archives, each file is sent as a separate message.
|
|
* Returns the **final** (server-assigned) message ID of the first uploaded message.
|
|
*
|
|
* IMPORTANT: `sendMessage` returns a *temporary* message immediately.
|
|
* The actual file upload happens asynchronously in TDLib. We listen for
|
|
* `updateMessageSendSucceeded` to get the real server-side message ID and
|
|
* to make sure the upload is fully committed before we clean up temp files
|
|
* or close the TDLib client (which would cancel pending uploads).
|
|
*/
|
|
export async function uploadToChannel(
|
|
client: Client,
|
|
chatId: bigint,
|
|
filePaths: string[],
|
|
caption?: string
|
|
): Promise<UploadResult> {
|
|
const allMessageIds: bigint[] = [];
|
|
|
|
for (let i = 0; i < filePaths.length; i++) {
|
|
const filePath = filePaths[i];
|
|
const fileCaption =
|
|
i === 0 && caption ? caption : undefined;
|
|
|
|
const fileName = path.basename(filePath);
|
|
let fileSizeMB = 0;
|
|
try {
|
|
const s = await stat(filePath);
|
|
fileSizeMB = Math.round(s.size / (1024 * 1024));
|
|
} catch {
|
|
// Non-critical
|
|
}
|
|
|
|
log.info(
|
|
{ chatId: Number(chatId), fileName, sizeMB: fileSizeMB, part: i + 1, total: filePaths.length },
|
|
"Uploading file to channel"
|
|
);
|
|
|
|
const serverMsgId = await sendWithRetry(client, chatId, filePath, fileCaption, fileName, fileSizeMB);
|
|
|
|
allMessageIds.push(serverMsgId);
|
|
|
|
// Rate limit delay between uploads
|
|
if (i < filePaths.length - 1) {
|
|
await sleep(config.apiDelayMs);
|
|
}
|
|
}
|
|
|
|
if (allMessageIds.length === 0) {
|
|
throw new Error("Upload failed: no messages sent");
|
|
}
|
|
|
|
log.info(
|
|
{ chatId: Number(chatId), messageId: Number(allMessageIds[0]), files: filePaths.length },
|
|
"All uploads confirmed by Telegram"
|
|
);
|
|
|
|
return { messageId: allMessageIds[0], messageIds: allMessageIds };
|
|
}
|
|
|
|
/**
|
|
* Retry wrapper for sendAndWaitForUpload.
|
|
* Handles:
|
|
* - Rate limits (429 / FLOOD_WAIT) from updateMessageSendFailed — waits and retries
|
|
* - Stall / timeout — retries with a cooldown
|
|
*/
|
|
const MAX_UPLOAD_RETRIES = 3;
|
|
|
|
async function sendWithRetry(
|
|
client: Client,
|
|
chatId: bigint,
|
|
filePath: string,
|
|
caption: string | undefined,
|
|
fileName: string,
|
|
fileSizeMB: number
|
|
): Promise<bigint> {
|
|
for (let attempt = 0; attempt <= MAX_UPLOAD_RETRIES; attempt++) {
|
|
try {
|
|
return await sendAndWaitForUpload(client, chatId, filePath, caption, fileName, fileSizeMB);
|
|
} catch (err) {
|
|
const isLastAttempt = attempt >= MAX_UPLOAD_RETRIES;
|
|
|
|
// Rate limit from Telegram (429 / FLOOD_WAIT / "retry after N")
|
|
const waitSeconds = extractFloodWaitSeconds(err);
|
|
if (waitSeconds !== null && !isLastAttempt) {
|
|
const jitter = 1000 + Math.random() * 4000;
|
|
const waitMs = waitSeconds * 1000 + jitter;
|
|
log.warn(
|
|
{ fileName, attempt: attempt + 1, maxRetries: MAX_UPLOAD_RETRIES, waitSeconds },
|
|
`Upload rate-limited — sleeping ${waitSeconds}s before retry`
|
|
);
|
|
await sleep(waitMs);
|
|
continue;
|
|
}
|
|
|
|
// Stall or timeout — fail fast and let the caller recreate the TDLib
|
|
// client. Retrying on the same degraded event stream wastes ~15 min
|
|
// per attempt because the underlying issue (missing send-success
|
|
// events) is client-level, not transient. The set ends up in
|
|
// SkippedPackage and the caller's watermark cap ensures it gets
|
|
// retried next cycle on a fresh client.
|
|
const errMsg = err instanceof Error ? err.message : "";
|
|
if (errMsg.includes("stalled") || errMsg.includes("timed out")) {
|
|
log.warn(
|
|
{ fileName, attempt: attempt + 1 },
|
|
"Upload stalled — failing fast so caller can recreate TDLib client"
|
|
);
|
|
throw new UploadStallError(
|
|
`Upload stalled for ${fileName}: ${errMsg}`
|
|
);
|
|
}
|
|
|
|
// Transient Telegram server-side error (HTTP 5xx returned via
|
|
// updateMessageSendFailed). These are NOT FLOOD_WAIT, NOT stalls — just
|
|
// TG having a bad moment. They typically resolve on a short backoff, so
|
|
// retry up to MAX_UPLOAD_RETRIES with linear backoff before giving up.
|
|
const lowerMsg = errMsg.toLowerCase();
|
|
const isTransientServerError =
|
|
lowerMsg.includes("internal server error") ||
|
|
lowerMsg.includes("internal error") ||
|
|
lowerMsg.includes("server error") ||
|
|
lowerMsg.includes("bad gateway") ||
|
|
lowerMsg.includes("service unavailable") ||
|
|
lowerMsg.includes("gateway timeout");
|
|
if (isTransientServerError && !isLastAttempt) {
|
|
const backoffMs = 15_000 * (attempt + 1) + Math.random() * 5_000;
|
|
log.warn(
|
|
{ fileName, attempt: attempt + 1, maxRetries: MAX_UPLOAD_RETRIES, backoffMs: Math.round(backoffMs) },
|
|
`Transient Telegram server error — retrying after backoff`
|
|
);
|
|
await sleep(backoffMs);
|
|
continue;
|
|
}
|
|
|
|
throw err;
|
|
}
|
|
}
|
|
throw new Error(`Upload failed after ${MAX_UPLOAD_RETRIES} retries for ${fileName}`);
|
|
}
|
|
|
|
/**
|
|
* Send a single file message and wait for Telegram to confirm the upload.
|
|
* Returns the final server-assigned message ID.
|
|
*
|
|
* IMPORTANT: The update listener is attached BEFORE sending the message to
|
|
* avoid a race where fast uploads (cached files) complete before the listener
|
|
* is registered, which would cause the promise to hang forever.
|
|
*/
|
|
async function sendAndWaitForUpload(
|
|
client: Client,
|
|
chatId: bigint,
|
|
filePath: string,
|
|
caption: string | undefined,
|
|
fileName: string,
|
|
fileSizeMB: number
|
|
): Promise<bigint> {
|
|
return new Promise<bigint>((resolve, reject) => {
|
|
let settled = false;
|
|
let lastLoggedPercent = 0;
|
|
let tempMsgId: number | null = null;
|
|
let uploadStarted = false;
|
|
let lastProgressBytes = 0;
|
|
let lastProgressTime = Date.now();
|
|
|
|
// Events for our message can arrive before `sendMessage` resolves
|
|
// (TDLib emits them while our .then() is still in the microtask queue).
|
|
// Buffer them and replay once tempMsgId is known.
|
|
let pendingSuccess: { oldMsgId: number; finalId: number } | null = null;
|
|
let pendingFailure: { oldMsgId: number; errorMsg: string; code?: number } | null = null;
|
|
|
|
// Timeout: 20 minutes per GB, minimum 15 minutes
|
|
const timeoutMs = Math.max(
|
|
15 * 60_000,
|
|
(fileSizeMB / 1024) * 20 * 60_000
|
|
);
|
|
|
|
const timer = setTimeout(() => {
|
|
if (!settled) {
|
|
settled = true;
|
|
cleanup();
|
|
reject(
|
|
new Error(
|
|
`Upload timed out after ${Math.round(timeoutMs / 60_000)}min for ${fileName}`
|
|
)
|
|
);
|
|
}
|
|
}, timeoutMs);
|
|
|
|
// Stall detection: no progress for 3 minutes after upload started → reject
|
|
// (reduced from 5min — once data is fully sent, confirmation should arrive quickly;
|
|
// a 3min silence strongly indicates a degraded TDLib event stream)
|
|
const STALL_TIMEOUT_MS = 3 * 60_000;
|
|
const stallChecker = setInterval(() => {
|
|
if (settled || !uploadStarted) return;
|
|
const stallMs = Date.now() - lastProgressTime;
|
|
if (stallMs >= STALL_TIMEOUT_MS) {
|
|
settled = true;
|
|
cleanup();
|
|
reject(
|
|
new Error(
|
|
`Upload stalled for ${fileName} — no progress for ${Math.round(stallMs / 60_000)}min`
|
|
)
|
|
);
|
|
}
|
|
}, 30_000);
|
|
|
|
const completeWithSuccess = (finalId: number) => {
|
|
if (settled) return;
|
|
settled = true;
|
|
cleanup();
|
|
log.info(
|
|
{ fileName, tempMsgId, finalMsgId: finalId },
|
|
"Upload confirmed by Telegram"
|
|
);
|
|
resolve(BigInt(finalId));
|
|
};
|
|
|
|
const completeWithFailure = (errorMsg: string, code?: number) => {
|
|
if (settled) return;
|
|
settled = true;
|
|
cleanup();
|
|
const error = new Error(`Upload failed for ${fileName}: ${errorMsg}`);
|
|
(error as Error & { code?: number }).code = code;
|
|
reject(error);
|
|
};
|
|
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
const handleUpdate = (update: any) => {
|
|
// Track upload progress via updateFile events
|
|
if (update?._ === "updateFile") {
|
|
const file = update.file;
|
|
if (file?.remote?.is_uploading_active && file.expected_size > 0) {
|
|
uploadStarted = true;
|
|
|
|
const uploaded = file.remote.uploaded_size ?? 0;
|
|
|
|
// Only reset stall timer when bytes actually advance
|
|
if (uploaded > lastProgressBytes) {
|
|
lastProgressBytes = uploaded;
|
|
lastProgressTime = Date.now();
|
|
}
|
|
const total = file.expected_size;
|
|
const percent = Math.round((uploaded / total) * 100);
|
|
if (percent >= lastLoggedPercent + 20) {
|
|
lastLoggedPercent = percent - (percent % 20);
|
|
log.info(
|
|
{ fileName, uploaded, total, percent: `${percent}%` },
|
|
"Upload progress"
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
// The money event: upload succeeded, we get the final server message ID
|
|
if (update?._ === "updateMessageSendSucceeded") {
|
|
const msg = update.message;
|
|
const oldMsgId: number = update.old_message_id;
|
|
if (tempMsgId === null) {
|
|
// Race: event arrived before our .then() assigned tempMsgId.
|
|
// Buffer it and process once tempMsgId is known.
|
|
pendingSuccess = { oldMsgId, finalId: msg.id };
|
|
return;
|
|
}
|
|
if (oldMsgId === tempMsgId) {
|
|
completeWithSuccess(msg.id);
|
|
}
|
|
}
|
|
|
|
// Upload failed
|
|
if (update?._ === "updateMessageSendFailed") {
|
|
const oldMsgId: number = update.old_message_id;
|
|
const errorMsg: string = update.error?.message ?? "Unknown upload error";
|
|
const code: number | undefined = update.error?.code;
|
|
if (tempMsgId === null) {
|
|
pendingFailure = { oldMsgId, errorMsg, code };
|
|
return;
|
|
}
|
|
if (oldMsgId === tempMsgId) {
|
|
completeWithFailure(errorMsg, code);
|
|
}
|
|
}
|
|
};
|
|
|
|
const cleanup = () => {
|
|
clearTimeout(timer);
|
|
clearInterval(stallChecker);
|
|
client.off("update", handleUpdate);
|
|
};
|
|
|
|
// Attach listener BEFORE sending to avoid missing fast completions
|
|
client.on("update", handleUpdate);
|
|
|
|
// Send the message — this returns a temporary message immediately.
|
|
// Wrapped in withFloodWait to handle Telegram rate limits on upload.
|
|
withFloodWait(
|
|
() =>
|
|
client.invoke({
|
|
_: "sendMessage",
|
|
chat_id: Number(chatId),
|
|
input_message_content: {
|
|
_: "inputMessageDocument",
|
|
document: {
|
|
_: "inputFileLocal",
|
|
path: filePath,
|
|
},
|
|
caption: caption
|
|
? {
|
|
_: "formattedText",
|
|
text: caption,
|
|
}
|
|
: undefined,
|
|
},
|
|
}),
|
|
"sendMessage:upload"
|
|
)
|
|
.then((result) => {
|
|
const tempMsg = result as { id: number };
|
|
tempMsgId = tempMsg.id;
|
|
log.debug(
|
|
{ fileName, tempMsgId },
|
|
"Message queued, waiting for upload confirmation"
|
|
);
|
|
|
|
// Replay any event that arrived before we knew tempMsgId
|
|
if (pendingSuccess && pendingSuccess.oldMsgId === tempMsgId) {
|
|
completeWithSuccess(pendingSuccess.finalId);
|
|
} else if (pendingFailure && pendingFailure.oldMsgId === tempMsgId) {
|
|
completeWithFailure(pendingFailure.errorMsg, pendingFailure.code);
|
|
}
|
|
})
|
|
.catch((err) => {
|
|
if (!settled) {
|
|
settled = true;
|
|
cleanup();
|
|
reject(err);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
function sleep(ms: number): Promise<void> {
|
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|