add TG skill

This commit is contained in:
xCyanGrizzly
2026-03-17 12:59:05 +01:00
parent d7bbb7587e
commit 761d5e0790
30 changed files with 4869 additions and 42 deletions

View File

@@ -86,7 +86,10 @@
"mcp__Claude_Preview__preview_start",
"Bash(cat:*)",
"Bash(grep:*)",
"Bash(wait:*)"
"Bash(wait:*)",
"WebSearch",
"Bash(SKILL_CREATOR_PATH=\"C:\\\\Users\\\\A00963355\\\\.claude\\\\plugins\\\\cache\\\\claude-plugins-official\\\\skill-creator\\\\d5c15b861cd2\\\\skills\\\\skill-creator\" && WORKSPACE=\"C:\\\\Users\\\\A00963355\\\\OneDrive - Amaris Zorggroep\\\\Documents\\\\VScodeProjects\\\\DragonsStash\\\\.claude\\\\skills\\\\tdlib-telegram-workspace\\\\iteration-1\" && python \"$SKILL_CREATOR_PATH/eval-viewer/generate_review.py\" \"$WORKSPACE\" --skill-name \"tdlib-telegram\" --benchmark \"$WORKSPACE/benchmark.json\" --static \"$WORKSPACE/review.html\" 2>&1)",
"Bash(start:*)"
]
}
}

View File

@@ -0,0 +1,46 @@
{
"skill_name": "tdlib-telegram",
"iteration": 1,
"configs": [
{
"name": "with_skill",
"pass_rate": {"mean": 1.0, "stddev": 0.0},
"tokens": {"mean": 53200, "stddev": 14800},
"time_seconds": {"mean": 123.5, "stddev": 16.7}
},
{
"name": "without_skill",
"pass_rate": {"mean": 0.857, "stddev": 0.134},
"tokens": {"mean": 56467, "stddev": 12100},
"time_seconds": {"mean": 156.4, "stddev": 39.7}
}
],
"delta": {
"pass_rate": "+14.3%",
"tokens": "-5.8%",
"time": "-21.0%"
},
"evals": [
{
"name": "broadcast-to-all-users",
"with_skill": {"pass_rate": 1.0, "passed": 5, "total": 5, "tokens": 35365, "time_seconds": 107.6},
"without_skill": {"pass_rate": 0.6, "passed": 3, "total": 5, "tokens": 69214, "time_seconds": 200.2}
},
{
"name": "flood-wait-during-scan",
"with_skill": {"pass_rate": 1.0, "passed": 4, "total": 4, "tokens": 63079, "time_seconds": 140.9},
"without_skill": {"pass_rate": 1.0, "passed": 4, "total": 4, "tokens": 45601, "time_seconds": 122.3}
},
{
"name": "download-and-reupload-file",
"with_skill": {"pass_rate": 1.0, "passed": 5, "total": 5, "tokens": 61157, "time_seconds": 122.1},
"without_skill": {"pass_rate": 1.0, "passed": 5, "total": 5, "tokens": 54587, "time_seconds": 146.7}
}
],
"analyst_notes": [
"The skill's biggest impact was on Eval 1 (broadcast): the baseline MISSED both withFloodWait retry wrapping and inter-message delay — the two most critical patterns for avoiding rate limits during bulk sends. This is exactly the kind of bug the skill is designed to prevent.",
"Eval 2 (FLOOD_WAIT debugging) was a near-tie. Both versions correctly diagnosed the problem and proposed adaptive backoff. The skill version was slightly more thorough: it added pagination-level retry with sleep(waitSec) instead of just re-throwing, meaning it can survive even after withFloodWait's retries are exhausted.",
"Eval 3 (download/reupload) was also close. Both correctly composed existing primitives. The skill version was more explicit about WHY certain patterns matter (referencing the skill's documentation), which helps future maintainers understand the code.",
"The skill version was faster on average (-21% time) and used fewer tokens (-5.8%), likely because the skill front-loaded the knowledge instead of requiring the agent to discover it by reading source files."
]
}

View File

@@ -0,0 +1,12 @@
{
"eval_id": 1,
"eval_name": "broadcast-to-all-users",
"prompt": "Add a new bot command /broadcast that sends a text message to ALL users who have a TelegramLink in the database. The admin triggers it from the web app. Add it to the bot's command handler and create an API endpoint that triggers it.",
"assertions": [
{"text": "uses_sequential_queue: Messages sent one at a time, no Promise.all or concurrent sends", "type": "required"},
{"text": "uses_flood_wait_retry: Each send call wrapped in withFloodWait or equivalent FLOOD_WAIT-aware retry", "type": "required"},
{"text": "has_per_message_delay: Includes a sleep/delay between individual sends", "type": "recommended"},
{"text": "handles_per_user_errors: One failed send does not abort the entire broadcast loop", "type": "required"},
{"text": "has_api_endpoint: Creates an API route or server action to trigger the broadcast from the web app", "type": "required"}
]
}

View File

@@ -0,0 +1,10 @@
{
"run_id": "eval-1-with_skill",
"expectations": [
{"text": "uses_sequential_queue", "passed": true, "evidence": "Uses a for...of loop iterating links one at a time. No Promise.all. Comment: '~20 msgs/sec — well under the 30 msgs/sec global bot limit'"},
{"text": "uses_flood_wait_retry", "passed": true, "evidence": "Each send wrapped in withFloodWait(): 'await withFloodWait(() => sendTextMessage(link.telegramUserId, data.message, \"textParseModeHTML\"))'. Created a new bot/src/util/flood-wait.ts with full extractFloodWaitSeconds + retry logic."},
{"text": "has_per_message_delay", "passed": true, "evidence": "await sleep(50) after each send — explicitly commented as '~20 msgs/sec'"},
{"text": "handles_per_user_errors", "passed": true, "evidence": "try/catch around each send with 'failed++' counter, continues to next user"},
{"text": "has_api_endpoint", "passed": true, "evidence": "Created src/app/api/telegram/bot/broadcast/route.ts with admin auth, 4096-char validation, pg_notify('bot_broadcast')"}
]
}

View File

@@ -0,0 +1,340 @@
# /broadcast Implementation Plan
## Approach
The broadcast feature has two parts:
1. **API endpoint** (`POST /api/telegram/bot/broadcast`) — called from the web app by an admin. It accepts a `message` string, looks up ALL `TelegramLink` records in the database, and triggers the bot to send the message to each linked user via `pg_notify`.
2. **Bot-side handler** — a new `bot_broadcast` pg_notify channel listener in `send-listener.ts` that receives the broadcast payload and sequentially sends the text message to every linked Telegram user.
The `/broadcast` bot command itself is not a user-facing Telegram command (regular users should not be able to trigger it). It is triggered exclusively through the admin API endpoint.
## Skill Patterns Applied
- **Sequential Send Queue** (from skill): Never fire concurrent sends to multiple users. The broadcast iterates users sequentially with `await sleep(50)` between sends (~20 msgs/sec, well under the 30 msgs/sec global bot limit).
- **FLOOD_WAIT handling** (from skill): Every `sendTextMessage` call is wrapped with `withFloodWait()` which extracts the wait duration from errors and retries with jitter.
- **Anti-pattern avoidance**: No `Promise.all(users.map(...))` — that would instantly hit the 30 msg/sec global limit.
- **Message text length limit**: The API endpoint validates that the broadcast message does not exceed 4,096 characters (Telegram's limit from the skill).
---
## File 1: `bot/src/util/flood-wait.ts` (NEW)
Extracted from the skill's recommended FLOOD_WAIT pattern so it can be reused by both existing send logic and the new broadcast logic.
```typescript
import { childLogger } from "./logger.js";
const log = childLogger("flood-wait");
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Extract the mandatory wait duration (in seconds) from a Telegram
* FLOOD_WAIT error. Returns null when the error is not rate-limit related.
*/
export function extractFloodWaitSeconds(err: unknown): number | null {
const message = err instanceof Error ? err.message : String(err);
// Pattern 1: FLOOD_WAIT_30
const flood = message.match(/FLOOD_WAIT_(\d+)/i);
if (flood) return parseInt(flood[1], 10);
// Pattern 2: "retry after 30"
const retry = message.match(/retry after (\d+)/i);
if (retry) return parseInt(retry[1], 10);
// Pattern 3: HTTP 429 without explicit seconds
if (String((err as any)?.code) === "429") return 30;
return null; // Not a rate limit error
}
/**
* Wrap any async Telegram operation with automatic FLOOD_WAIT retry.
* Adds random jitter (1-5 s) to prevent thundering-herd retries.
*/
export async function withFloodWait<T>(
fn: () => Promise<T>,
maxRetries = 5
): Promise<T> {
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
return await fn();
} catch (err) {
const wait = extractFloodWaitSeconds(err);
if (wait === null || attempt >= maxRetries) throw err;
const jitter = 1000 + Math.random() * 4000;
log.warn(
{ wait, attempt, jitter: Math.round(jitter) },
"FLOOD_WAIT received — backing off"
);
await sleep(wait * 1000 + jitter);
}
}
throw new Error("Unreachable");
}
export { sleep };
```
---
## File 2: `bot/src/db/queries.ts` (MODIFIED — add one function)
Add this function at the bottom of the existing file, after the `getGlobalDestinationChannel` function:
```typescript
// ── Broadcast ──
/**
* Fetch ALL TelegramLink records (users who linked their Telegram account).
* Used by the broadcast feature to send a message to every linked user.
*/
export async function getAllTelegramLinks() {
return db.telegramLink.findMany({
select: {
telegramUserId: true,
telegramName: true,
},
});
}
```
---
## File 3: `bot/src/send-listener.ts` (MODIFIED — add broadcast channel)
Add the `bot_broadcast` channel to the existing listener. The changes are:
### 3a. Add import for the new query and flood-wait utility
At the top of the file, update the imports:
```typescript
import {
getPendingSendRequest,
updateSendRequest,
findMatchingSubscriptions,
getGlobalDestinationChannel,
getAllTelegramLinks, // ← NEW
} from "./db/queries.js";
import { copyMessageToUser, sendTextMessage, sendPhotoMessage } from "./tdlib/client.js";
import { withFloodWait, sleep } from "./util/flood-wait.js"; // ← NEW
```
### 3b. Subscribe to the new pg_notify channel
Inside `connectListener()`, after the existing LISTEN statements, add:
```typescript
await pgClient.query("LISTEN bot_broadcast");
```
### 3c. Add the notification handler
Inside the `pgClient.on("notification", ...)` callback, add the new branch:
```typescript
pgClient.on("notification", (msg) => {
if (msg.channel === "bot_send" && msg.payload) {
handleBotSend(msg.payload);
} else if (msg.channel === "new_package" && msg.payload) {
handleNewPackage(msg.payload);
} else if (msg.channel === "bot_broadcast" && msg.payload) { // ← NEW
handleBroadcast(msg.payload);
}
});
```
Update the log message:
```typescript
log.info("Send listener started (bot_send, new_package, bot_broadcast)");
```
### 3d. Add the broadcast handler function
Add this at the bottom of the file (before the existing `escapeHtml` helper):
```typescript
// ── bot_broadcast handler ──
/**
* Handle a broadcast request. The payload is a JSON string:
* { message: string }
*
* Sends the message to every user who has a TelegramLink.
* Uses a sequential loop with a 50 ms delay between sends (~20 msgs/sec)
* to stay well under Telegram's 30 msgs/sec global bot limit.
* Each send is wrapped with withFloodWait to automatically retry on
* rate-limit errors.
*/
async function handleBroadcast(payload: string): Promise<void> {
try {
const data = JSON.parse(payload) as { message: string };
if (!data.message) {
log.warn("Broadcast payload missing message — ignoring");
return;
}
const links = await getAllTelegramLinks();
if (links.length === 0) {
log.info("Broadcast requested but no linked users found");
return;
}
log.info(
{ recipientCount: links.length },
"Starting broadcast to all linked users"
);
let sent = 0;
let failed = 0;
for (const link of links) {
try {
await withFloodWait(() =>
sendTextMessage(link.telegramUserId, data.message, "textParseModeHTML")
);
sent++;
} catch (err) {
failed++;
log.warn(
{ err, telegramUserId: link.telegramUserId.toString() },
"Broadcast send failed for user"
);
}
// ~20 msgs/sec — well under the 30 msgs/sec global bot limit
await sleep(50);
}
log.info({ sent, failed, total: links.length }, "Broadcast completed");
} catch (err) {
log.error({ err, payload }, "Failed to process broadcast");
}
}
```
---
## File 4: `src/app/api/telegram/bot/broadcast/route.ts` (NEW)
This is the Next.js API endpoint that the admin triggers from the web app.
```typescript
import { NextResponse } from "next/server";
import { auth } from "@/lib/auth";
import { prisma } from "@/lib/prisma";
export const dynamic = "force-dynamic";
/**
* POST /api/telegram/bot/broadcast
* Send a text message to ALL users who have a linked Telegram account.
*
* Body: { message: string }
*
* Admin-only. The actual sending is done by the bot process — this endpoint
* simply validates input and fires a pg_notify('bot_broadcast', ...) signal.
*/
export async function POST(request: Request) {
const session = await auth();
if (!session?.user?.id) {
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
}
// Only admins can broadcast
if (session.user.role !== "ADMIN") {
return NextResponse.json(
{ error: "Only admins can send broadcast messages" },
{ status: 403 }
);
}
let body: { message?: string };
try {
body = await request.json();
} catch {
return NextResponse.json({ error: "Invalid JSON body" }, { status: 400 });
}
if (!body.message || typeof body.message !== "string") {
return NextResponse.json(
{ error: "message is required and must be a string" },
{ status: 400 }
);
}
const trimmed = body.message.trim();
if (trimmed.length === 0) {
return NextResponse.json(
{ error: "message must not be empty" },
{ status: 400 }
);
}
// Telegram message text limit is 4096 characters
if (trimmed.length > 4096) {
return NextResponse.json(
{ error: "message exceeds Telegram's 4096-character limit" },
{ status: 400 }
);
}
// Count how many users will receive the broadcast
const recipientCount = await prisma.telegramLink.count();
if (recipientCount === 0) {
return NextResponse.json(
{ error: "No users have linked Telegram accounts" },
{ status: 400 }
);
}
// Notify the bot via pg_notify
const payload = JSON.stringify({ message: trimmed });
try {
await prisma.$queryRawUnsafe(
`SELECT pg_notify('bot_broadcast', $1)`,
payload
);
} catch (err) {
return NextResponse.json(
{ error: "Failed to queue broadcast — bot notification channel unavailable" },
{ status: 500 }
);
}
return NextResponse.json({
status: "QUEUED",
recipientCount,
message: `Broadcast queued for ${recipientCount} user(s)`,
});
}
```
---
## Summary of Changes
| File | Action | Description |
|------|--------|-------------|
| `bot/src/util/flood-wait.ts` | NEW | Reusable FLOOD_WAIT retry + sleep utilities (from skill) |
| `bot/src/db/queries.ts` | MODIFIED | Add `getAllTelegramLinks()` query |
| `bot/src/send-listener.ts` | MODIFIED | Subscribe to `bot_broadcast` channel; add `handleBroadcast()` with sequential send loop |
| `src/app/api/telegram/bot/broadcast/route.ts` | NEW | Admin-only API endpoint that validates input and fires `pg_notify` |
## Skill Patterns Used
1. **Sequential Send Queue**: The broadcast loop sends messages one at a time with `await sleep(50)` between each, keeping throughput at ~20 msgs/sec — safely under the 30 msgs/sec bot global limit documented in the skill.
2. **FLOOD_WAIT handling with `withFloodWait()`**: Every `sendTextMessage` call is wrapped so that if Telegram returns a `FLOOD_WAIT_X` error, the bot automatically waits the required duration plus random jitter (1-5s) before retrying. This prevents the entire broadcast from failing due to a temporary rate limit.
3. **Message text length validation**: The API endpoint rejects messages over 4,096 characters, which is the `message_text_length_max` limit documented in the skill.
4. **No concurrent sends (anti-pattern avoidance)**: The skill explicitly warns against `Promise.all(users.map(...))` for bulk sends. The implementation uses a sequential `for...of` loop instead.
5. **pg_notify pattern**: Follows the existing project convention (seen in `bot_send` and `new_package`) where the web app fires a `pg_notify` and the bot's `send-listener.ts` picks it up — keeping the two processes decoupled.

View File

@@ -0,0 +1,5 @@
{
"total_tokens": 35365,
"duration_ms": 107616,
"total_duration_seconds": 107.6
}

View File

@@ -0,0 +1,10 @@
{
"run_id": "eval-1-without_skill",
"expectations": [
{"text": "uses_sequential_queue", "passed": true, "evidence": "Uses for...of loop over links, sends one at a time. No Promise.all."},
{"text": "uses_flood_wait_retry", "passed": false, "evidence": "No withFloodWait wrapper. Bare sendTextMessage calls in both send-listener handleBroadcast and commands handleBroadcastCommand. No flood-wait.ts utility created."},
{"text": "has_per_message_delay", "passed": false, "evidence": "No sleep/delay between sends in either the send-listener handler or the command handler."},
{"text": "handles_per_user_errors", "passed": true, "evidence": "try/catch per user with sent++/failed++ counters, continues to next user."},
{"text": "has_api_endpoint", "passed": true, "evidence": "Created src/app/api/telegram/bot/broadcast/route.ts with admin auth, 4096-char limit, pg_notify."}
]
}

View File

@@ -0,0 +1,5 @@
{
"total_tokens": 69214,
"duration_ms": 200249,
"total_duration_seconds": 200.2
}

View File

@@ -0,0 +1,11 @@
{
"eval_id": 2,
"eval_name": "flood-wait-during-scan",
"prompt": "The worker keeps crashing with 'FLOOD_WAIT_35' errors when scanning a source channel that has about 10,000 messages. It happens during the getChannelMessages pagination loop. How do I fix this?",
"assertions": [
{"text": "identifies_retry_as_fix: Recommends wrapping pagination calls in FLOOD_WAIT-aware retry logic", "type": "required"},
{"text": "recommends_pagination_delay: Mentions adding or increasing sleep between pages", "type": "required"},
{"text": "respects_retry_after: Fix extracts and uses the actual wait duration from the error, not a hardcoded value", "type": "required"},
{"text": "does_not_reduce_frequency_as_primary_fix: Does not suggest scanning less often as the main solution", "type": "required"}
]
}

View File

@@ -0,0 +1,9 @@
{
"run_id": "eval-2-with_skill",
"expectations": [
{"text": "identifies_retry_as_fix", "passed": true, "evidence": "Identifies that withFloodWait already exists but the pagination loop needs its own adaptive backoff layer. Adds extractFloodWaitSeconds at pagination level."},
{"text": "recommends_pagination_delay", "passed": true, "evidence": "Keeps existing sleep(config.apiDelayMs) and makes it adaptive: doubles on FLOOD_WAIT, relaxes by 0.8x on success."},
{"text": "respects_retry_after", "passed": true, "evidence": "Extracts actual wait duration: 'await sleep(waitSec * 1000 + jitter)' using extractFloodWaitSeconds. Explicitly cites skill rule: 'Always respect the wait duration.'"},
{"text": "does_not_reduce_frequency_as_primary_fix", "passed": true, "evidence": "Primary fix is adaptive backoff at pagination level, not reducing scan frequency. Does not suggest lowering WORKER_INTERVAL_MINUTES."}
]
}

View File

@@ -0,0 +1,378 @@
# Diagnosis: FLOOD_WAIT_35 During Channel Scanning
## Root Cause
The `getChannelMessages` function in `worker/src/tdlib/download.ts` (line 146) paginates through channel history using `getChatHistory` with a fixed 1-second delay between pages (`config.apiDelayMs = 1000`). While each individual `getChatHistory` call is wrapped in `invokeWithTimeout` which delegates to `withFloodWait` for retry logic, the pagination loop itself has a fundamental problem for large channels:
**A 10,000-message channel at 100 messages/page = 100 consecutive API calls.** With only a 1-second delay between pages, this sustained burst of ~1 req/sec for 100+ seconds eventually triggers Telegram's dynamic rate limiter. Once `FLOOD_WAIT_35` fires, `withFloodWait` does correctly sleep for 35+ seconds and retry that single call -- but the loop immediately resumes its 1-second cadence, triggering another FLOOD_WAIT shortly after. After 5 retries (`config.maxRetries = 5`), the function throws and the worker crashes.
The same issue exists in `getTopicMessages` in `worker/src/tdlib/topics.ts` (line 170), which uses identical pagination logic.
## Why the Current Code Is Almost Right But Not Quite
The codebase already has the right building blocks:
1. `withFloodWait` in `worker/src/util/retry.ts` correctly extracts wait durations and adds jitter -- this matches the skill's recommended FLOOD_WAIT handler pattern exactly.
2. `invokeWithTimeout` in `worker/src/tdlib/download.ts` wraps every TDLib call through `withFloodWait`.
3. There is a 1-second inter-page delay (`config.apiDelayMs`).
**The gap:** After a FLOOD_WAIT recovery, the pagination loop does not back off its inter-page delay. It goes right back to 1-second spacing, which is what triggers repeated FLOOD_WAITs until max retries is exhausted.
## The Fix
Apply **adaptive backoff** to the pagination delay: when a FLOOD_WAIT is encountered during scanning, increase the inter-page delay for subsequent pages. This prevents the "recover then immediately re-trigger" cycle.
### Fix 1: Add adaptive delay to `getChannelMessages` (`worker/src/tdlib/download.ts`)
Replace lines 146-250 with:
```typescript
/**
* Fetch messages from a channel, stopping once we've scanned past the
* last-processed boundary (with one page of lookback for multipart safety).
* Collects both archive attachments AND photo messages (for preview matching).
* Returns messages in chronological order (oldest first).
*
* When `lastProcessedMessageId` is null (first run), scans everything.
* The worker applies a post-grouping filter to skip fully-processed sets,
* and keeps `packageExistsBySourceMessage` as a safety net.
*
* Safety features:
* - Max page limit to prevent infinite loops
* - Stuck detection: breaks if from_message_id stops advancing
* - Timeout on each TDLib API call
* - Adaptive delay: backs off when FLOOD_WAIT is encountered
*/
export async function getChannelMessages(
client: Client,
chatId: bigint,
lastProcessedMessageId?: bigint | null,
limit = 100,
onProgress?: ScanProgressCallback
): Promise<ChannelScanResult> {
const archives: TelegramMessage[] = [];
const photos: TelegramPhoto[] = [];
const boundary = lastProcessedMessageId ? Number(lastProcessedMessageId) : null;
let currentFromId = 0;
let totalScanned = 0;
let pageCount = 0;
let currentDelay = config.apiDelayMs; // starts at 1000ms, adapts on FLOOD_WAIT
// eslint-disable-next-line no-constant-condition
while (true) {
if (pageCount >= MAX_SCAN_PAGES) {
log.warn(
{ chatId: chatId.toString(), pageCount, totalScanned },
"Hit max page limit for channel scan, stopping"
);
break;
}
pageCount++;
const previousFromId = currentFromId;
let result: { messages: TdMessage[] };
try {
result = await invokeWithTimeout<{ messages: TdMessage[] }>(client, {
_: "getChatHistory",
chat_id: Number(chatId),
from_message_id: currentFromId,
offset: 0,
limit: Math.min(limit, 100),
only_local: false,
});
} catch (err) {
// If invokeWithTimeout exhausted its retries on FLOOD_WAIT, check if
// we can recover at the pagination level by increasing the delay further.
const waitSec = extractFloodWaitSeconds(err);
if (waitSec !== null) {
// The retry wrapper already slept; bump the inter-page delay to
// prevent the next page from immediately re-triggering.
currentDelay = Math.min(currentDelay * 2, 30_000);
log.warn(
{ chatId: chatId.toString(), newDelay: currentDelay, totalScanned },
"FLOOD_WAIT persisted after retries — increasing inter-page delay and retrying"
);
// Sleep the full flood wait duration + jitter before continuing
const jitter = 1000 + Math.random() * 4000;
await sleep(waitSec * 1000 + jitter);
continue; // retry this page with the new delay
}
throw err; // non-rate-limit error — propagate
}
// Successful call — gradually relax the delay back toward baseline
if (currentDelay > config.apiDelayMs) {
currentDelay = Math.max(config.apiDelayMs, Math.floor(currentDelay * 0.8));
}
if (!result.messages || result.messages.length === 0) break;
totalScanned += result.messages.length;
for (const msg of result.messages) {
// Check for archive documents
const doc = msg.content?.document;
if (doc?.file_name && doc.document && isArchiveAttachment(doc.file_name)) {
archives.push({
id: BigInt(msg.id),
fileName: doc.file_name,
fileId: String(doc.document.id),
fileSize: BigInt(doc.document.size),
date: new Date(msg.date * 1000),
});
continue;
}
// Check for photo messages (potential previews)
const photo = msg.content?.photo;
const caption = msg.content?.caption?.text ?? "";
if (photo?.sizes && photo.sizes.length > 0) {
const smallest = photo.sizes[0];
photos.push({
id: BigInt(msg.id),
date: new Date(msg.date * 1000),
caption,
fileId: String(smallest.photo.id),
fileSize: smallest.photo.size || smallest.photo.expected_size,
});
}
}
// Report scanning progress after each page
onProgress?.(totalScanned);
currentFromId = result.messages[result.messages.length - 1].id;
// Stuck detection: if from_message_id didn't advance, break to prevent infinite loop
if (currentFromId === previousFromId) {
log.warn(
{ chatId: chatId.toString(), currentFromId, totalScanned },
"Pagination stuck (from_message_id not advancing), breaking"
);
break;
}
// Stop scanning once we've gone past the boundary (this page is the lookback)
if (boundary && currentFromId < boundary) break;
if (result.messages.length < Math.min(limit, 100)) break;
// Rate limit delay — adaptive based on FLOOD_WAIT history
await sleep(currentDelay);
}
log.info(
{ chatId: chatId.toString(), archives: archives.length, photos: photos.length, totalScanned, pages: pageCount },
"Channel scan complete"
);
// Reverse to chronological order (oldest first) so worker processes old→new
return {
archives: archives.reverse(),
photos: photos.reverse(),
totalScanned,
};
}
```
You will also need to add the import for `extractFloodWaitSeconds` at the top of `download.ts`:
```typescript
import { withFloodWait, extractFloodWaitSeconds } from "../util/retry.js";
```
### Fix 2: Apply the same pattern to `getTopicMessages` (`worker/src/tdlib/topics.ts`)
The same adaptive delay logic should be applied to the `getTopicMessages` function. Add the import:
```typescript
import { extractFloodWaitSeconds } from "../util/retry.js";
```
Then apply the same changes to the pagination loop (the structure is identical):
```typescript
export async function getTopicMessages(
client: Client,
chatId: bigint,
topicId: bigint,
lastProcessedMessageId?: bigint | null,
limit = 100,
onProgress?: ScanProgressCallback
): Promise<ChannelScanResult> {
const archives: TelegramMessage[] = [];
const photos: TelegramPhoto[] = [];
const boundary = lastProcessedMessageId ? Number(lastProcessedMessageId) : null;
let currentFromId = 0;
let totalScanned = 0;
let pageCount = 0;
let currentDelay = config.apiDelayMs;
// eslint-disable-next-line no-constant-condition
while (true) {
if (pageCount >= MAX_SCAN_PAGES) {
log.warn(
{ chatId: chatId.toString(), topicId: topicId.toString(), pageCount, totalScanned },
"Hit max page limit for topic scan, stopping"
);
break;
}
pageCount++;
const previousFromId = currentFromId;
let result: {
messages?: {
id: number;
date: number;
content: {
_: string;
document?: {
file_name?: string;
document?: {
id: number;
size: number;
};
};
photo?: {
sizes?: {
type: string;
photo: { id: number; size: number; expected_size: number };
width: number;
height: number;
}[];
};
caption?: { text?: string };
};
}[];
};
try {
result = await invokeWithTimeout(client, {
_: "searchChatMessages",
chat_id: Number(chatId),
query: "",
message_thread_id: Number(topicId),
from_message_id: currentFromId,
offset: 0,
limit: Math.min(limit, 100),
filter: null,
sender_id: null,
saved_messages_topic_id: 0,
});
} catch (err) {
const waitSec = extractFloodWaitSeconds(err);
if (waitSec !== null) {
currentDelay = Math.min(currentDelay * 2, 30_000);
log.warn(
{ chatId: chatId.toString(), topicId: topicId.toString(), newDelay: currentDelay, totalScanned },
"FLOOD_WAIT persisted after retries — increasing inter-page delay and retrying"
);
const jitter = 1000 + Math.random() * 4000;
await sleep(waitSec * 1000 + jitter);
continue;
}
throw err;
}
// Successful call — gradually relax the delay back toward baseline
if (currentDelay > config.apiDelayMs) {
currentDelay = Math.max(config.apiDelayMs, Math.floor(currentDelay * 0.8));
}
if (!result.messages || result.messages.length === 0) break;
totalScanned += result.messages.length;
for (const msg of result.messages) {
const doc = msg.content?.document;
if (doc?.file_name && doc.document && isArchiveAttachment(doc.file_name)) {
archives.push({
id: BigInt(msg.id),
fileName: doc.file_name,
fileId: String(doc.document.id),
fileSize: BigInt(doc.document.size),
date: new Date(msg.date * 1000),
});
continue;
}
const photo = msg.content?.photo;
const caption = msg.content?.caption?.text ?? "";
if (photo?.sizes && photo.sizes.length > 0) {
const smallest = photo.sizes[0];
photos.push({
id: BigInt(msg.id),
date: new Date(msg.date * 1000),
caption,
fileId: String(smallest.photo.id),
fileSize: smallest.photo.size || smallest.photo.expected_size,
});
}
}
onProgress?.(totalScanned);
currentFromId = result.messages[result.messages.length - 1].id;
if (currentFromId === previousFromId) {
log.warn(
{ chatId: chatId.toString(), topicId: topicId.toString(), currentFromId, totalScanned },
"Topic pagination stuck (from_message_id not advancing), breaking"
);
break;
}
if (boundary && currentFromId < boundary) break;
if (result.messages.length < Math.min(limit, 100)) break;
await sleep(currentDelay);
}
log.info(
{ chatId: chatId.toString(), topicId: topicId.toString(), archives: archives.length, photos: photos.length, totalScanned, pages: pageCount },
"Topic scan complete"
);
return {
archives: archives.reverse(),
photos: photos.reverse(),
totalScanned,
};
}
```
## Skill Patterns Applied
### 1. FLOOD_WAIT Handling (Skill: "The Right Way to Handle It")
The existing `withFloodWait` and `extractFloodWaitSeconds` in `worker/src/util/retry.ts` already implement the skill's recommended pattern verbatim -- extract wait duration, add 1-5s jitter, retry up to maxRetries. The fix reuses `extractFloodWaitSeconds` at the pagination loop level as a second layer of defense.
### 2. Paginated Scanning with Delay (Skill: "Pattern: Paginated Scanning with Delay")
The skill states: *"When reading channel history or enumerating topics, always add a delay between pages"* and shows a 1-second delay example. The existing code has this (`config.apiDelayMs = 1000`). The fix enhances this with adaptive backoff: the delay doubles when FLOOD_WAIT is encountered and gradually relaxes back to baseline on success.
### 3. Non-rate-limit Errors Should Fail Fast (Skill: "Key Rules")
The skill states: *"Non-rate-limit errors should fail fast. Only retry on FLOOD_WAIT, not on other errors."* The fix checks `extractFloodWaitSeconds` and only applies the pagination-level recovery for rate limit errors. All other errors propagate immediately via `throw err`.
### 4. Always Respect the Wait Duration (Skill: "Key Rules")
The skill states: *"Always respect the wait duration. Never retry before retry_after expires."* The fix sleeps for the full `waitSec * 1000 + jitter` before retrying the page, ensuring the mandatory pause is honored.
### 5. Add Jitter (Skill: "Key Rules")
The skill states: *"Add jitter. Without it, multiple clients retry simultaneously and trigger another FLOOD_WAIT."* Both the existing `withFloodWait` wrapper and the new pagination-level recovery use `1000 + Math.random() * 4000` jitter, consistent with the skill's recommendation.
## Files Affected
- `worker/src/tdlib/download.ts` -- `getChannelMessages` function (adaptive delay + pagination-level FLOOD_WAIT recovery)
- `worker/src/tdlib/topics.ts` -- `getTopicMessages` function (same fix)
## Summary
The crash happens because the pagination loop fires 100+ consecutive `getChatHistory` calls at 1-second intervals. When FLOOD_WAIT triggers, `withFloodWait` sleeps and retries that single call, but the loop immediately resumes its aggressive cadence, re-triggering FLOOD_WAIT until retries are exhausted. The fix adds adaptive backoff at the pagination level: double the inter-page delay on each FLOOD_WAIT, gradually relax on success, and add a pagination-level catch that respects the full wait duration before retrying.

View File

@@ -0,0 +1,5 @@
{
"total_tokens": 63079,
"duration_ms": 140851,
"total_duration_seconds": 140.9
}

View File

@@ -0,0 +1,9 @@
{
"run_id": "eval-2-without_skill",
"expectations": [
{"text": "identifies_retry_as_fix", "passed": true, "evidence": "Correctly identifies that invokeWithTimeout already wraps calls in withFloodWait. Adds adaptive backoff at pagination level."},
{"text": "recommends_pagination_delay", "passed": true, "evidence": "Raises default apiDelayMs from 1000 to 2000 and adds adaptive backoff pattern."},
{"text": "respects_retry_after", "passed": true, "evidence": "Uses extractFloodWaitSeconds to get actual wait. However, the catch block throws immediately after logging — doesn't sleep the flood wait at pagination level before retry like the with-skill version does."},
{"text": "does_not_reduce_frequency_as_primary_fix", "passed": true, "evidence": "Primary fix is adaptive backoff + raised defaults, not reducing scan frequency."}
]
}

View File

@@ -0,0 +1,406 @@
# Diagnosis: FLOOD_WAIT_35 Crashes During Channel Message Pagination
## Root Cause
The `getChannelMessages` function in `worker/src/tdlib/download.ts` (line 146) paginates through a channel's entire history using `getChatHistory`. For a channel with ~10,000 messages at 100 messages per page, that is **100 consecutive API calls** in a tight loop.
The current rate-limit mitigation has two problems:
1. **The inter-page delay is too low.** `config.apiDelayMs` is hardcoded to **1000ms** (1 second) in `worker/src/util/config.ts:15`. Telegram's rate limiter for `getChatHistory` typically allows roughly one call every 1-2 seconds for moderate volumes, but when you are hammering it 100 times in a row the server starts issuing `FLOOD_WAIT` penalties. A 1-second fixed delay is not enough for sustained high-volume pagination.
2. **The pagination call (`getChatHistory`) does NOT use the `withFloodWait` retry wrapper.** Look at `download.ts:174` -- it calls `invokeWithTimeout`, which **does** wrap the call with `withFloodWait`. So the retry logic IS present. However, the retry wrapper in `worker/src/util/retry.ts` has `maxRetries` set to **5** (from config). When you are scanning 10,000 messages, you may hit FLOOD_WAIT multiple times across different pages, and each individual page gets only 5 retries. If Telegram escalates the wait time (e.g., FLOOD_WAIT_35 means "wait 35 seconds"), the retry logic does handle it -- but the real problem is that the **fixed 1-second inter-page delay is too aggressive**, causing FLOOD_WAIT errors to pile up on nearly every page in the latter half of the scan. Eventually one page exhausts its 5 retries and the entire scan crashes.
3. **No adaptive/exponential backoff between pages.** After successfully recovering from a FLOOD_WAIT, the code immediately goes back to the 1-second delay for the next page, triggering another FLOOD_WAIT almost instantly. There is no mechanism to slow down after being rate-limited.
## The Fix
The fix has three parts:
### Part 1: Make `apiDelayMs` configurable and increase the default
**File: `worker/src/util/config.ts`**
```typescript
export const config = {
databaseUrl: process.env.DATABASE_URL ?? "",
workerIntervalMinutes: parseInt(process.env.WORKER_INTERVAL_MINUTES ?? "60", 10),
tempDir: process.env.WORKER_TEMP_DIR ?? "/tmp/zips",
tdlibStateDir: process.env.TDLIB_STATE_DIR ?? "/data/tdlib",
maxZipSizeMB: parseInt(process.env.WORKER_MAX_ZIP_SIZE_MB ?? "4096", 10),
logLevel: (process.env.LOG_LEVEL ?? "info") as "debug" | "info" | "warn" | "error",
telegramApiId: parseInt(process.env.TELEGRAM_API_ID ?? "0", 10),
telegramApiHash: process.env.TELEGRAM_API_HASH ?? "",
/** Maximum jitter added to scheduler interval (in minutes) */
jitterMinutes: 5,
/** Maximum time span for multipart archive parts (in hours). 0 = no limit. */
multipartTimeoutHours: parseInt(process.env.MULTIPART_TIMEOUT_HOURS ?? "0", 10),
/** Delay between Telegram API calls (in ms) to avoid rate limits */
apiDelayMs: parseInt(process.env.WORKER_API_DELAY_MS ?? "2000", 10),
/** Max retries for rate-limited requests */
maxRetries: parseInt(process.env.WORKER_MAX_RETRIES ?? "10", 10),
} as const;
```
Changes: default `apiDelayMs` raised from 1000 to **2000**, `maxRetries` raised from 5 to **10**, both now configurable via environment variables.
### Part 2: Add adaptive backoff to the pagination loops
When a FLOOD_WAIT is encountered and recovered from inside `invokeWithTimeout`/`withFloodWait`, the pagination loop should temporarily increase its inter-page delay to prevent immediately triggering another FLOOD_WAIT.
**File: `worker/src/tdlib/download.ts`** -- replace the `getChannelMessages` function:
```typescript
/**
* Fetch messages from a channel, stopping once we've scanned past the
* last-processed boundary (with one page of lookback for multipart safety).
* Collects both archive attachments AND photo messages (for preview matching).
* Returns messages in chronological order (oldest first).
*
* When `lastProcessedMessageId` is null (first run), scans everything.
* The worker applies a post-grouping filter to skip fully-processed sets,
* and keeps `packageExistsBySourceMessage` as a safety net.
*
* Safety features:
* - Max page limit to prevent infinite loops
* - Stuck detection: breaks if from_message_id stops advancing
* - Timeout on each TDLib API call
* - Adaptive backoff: increases delay after FLOOD_WAIT recovery
*/
export async function getChannelMessages(
client: Client,
chatId: bigint,
lastProcessedMessageId?: bigint | null,
limit = 100,
onProgress?: ScanProgressCallback
): Promise<ChannelScanResult> {
const archives: TelegramMessage[] = [];
const photos: TelegramPhoto[] = [];
const boundary = lastProcessedMessageId ? Number(lastProcessedMessageId) : null;
let currentFromId = 0;
let totalScanned = 0;
let pageCount = 0;
// Adaptive delay: starts at config value, increases after FLOOD_WAIT recovery
let currentDelayMs = config.apiDelayMs;
const MAX_DELAY_MS = 30_000; // Cap at 30 seconds between pages
// eslint-disable-next-line no-constant-condition
while (true) {
if (pageCount >= MAX_SCAN_PAGES) {
log.warn(
{ chatId: chatId.toString(), pageCount, totalScanned },
"Hit max page limit for channel scan, stopping"
);
break;
}
pageCount++;
const previousFromId = currentFromId;
let result: { messages: TdMessage[] };
try {
result = await invokeWithTimeout<{ messages: TdMessage[] }>(client, {
_: "getChatHistory",
chat_id: Number(chatId),
from_message_id: currentFromId,
offset: 0,
limit: Math.min(limit, 100),
only_local: false,
});
// Successful call without rate limiting — gradually reduce delay back
// toward the base value (but never below it)
if (currentDelayMs > config.apiDelayMs) {
currentDelayMs = Math.max(
config.apiDelayMs,
Math.floor(currentDelayMs * 0.8)
);
}
} catch (err) {
// If withFloodWait inside invokeWithTimeout exhausted retries on a
// FLOOD_WAIT error, increase the inter-page delay significantly
// before re-throwing so the caller (or a future retry of the whole
// scan) starts slower.
const floodSeconds = extractFloodWaitSeconds(err);
if (floodSeconds !== null) {
// The retry wrapper already waited, but we still got rate-limited
// after max retries. Double the inter-page delay for next time.
currentDelayMs = Math.min(MAX_DELAY_MS, currentDelayMs * 2);
log.warn(
{
chatId: chatId.toString(),
pageCount,
totalScanned,
newDelayMs: currentDelayMs,
},
"FLOOD_WAIT exhausted retries — increased inter-page delay"
);
}
throw err;
}
if (!result.messages || result.messages.length === 0) break;
totalScanned += result.messages.length;
for (const msg of result.messages) {
// Check for archive documents
const doc = msg.content?.document;
if (doc?.file_name && doc.document && isArchiveAttachment(doc.file_name)) {
archives.push({
id: BigInt(msg.id),
fileName: doc.file_name,
fileId: String(doc.document.id),
fileSize: BigInt(doc.document.size),
date: new Date(msg.date * 1000),
});
continue;
}
// Check for photo messages (potential previews)
const photo = msg.content?.photo;
const caption = msg.content?.caption?.text ?? "";
if (photo?.sizes && photo.sizes.length > 0) {
const smallest = photo.sizes[0];
photos.push({
id: BigInt(msg.id),
date: new Date(msg.date * 1000),
caption,
fileId: String(smallest.photo.id),
fileSize: smallest.photo.size || smallest.photo.expected_size,
});
}
}
// Report scanning progress after each page
onProgress?.(totalScanned);
currentFromId = result.messages[result.messages.length - 1].id;
// Stuck detection: if from_message_id didn't advance, break to prevent infinite loop
if (currentFromId === previousFromId) {
log.warn(
{ chatId: chatId.toString(), currentFromId, totalScanned },
"Pagination stuck (from_message_id not advancing), breaking"
);
break;
}
// Stop scanning once we've gone past the boundary (this page is the lookback)
if (boundary && currentFromId < boundary) break;
if (result.messages.length < Math.min(limit, 100)) break;
// Rate limit delay (adaptive)
await sleep(currentDelayMs);
}
log.info(
{ chatId: chatId.toString(), archives: archives.length, photos: photos.length, totalScanned, pages: pageCount },
"Channel scan complete"
);
// Reverse to chronological order (oldest first) so worker processes old→new
return {
archives: archives.reverse(),
photos: photos.reverse(),
totalScanned,
};
}
```
You also need to add the import for `extractFloodWaitSeconds` at the top of `download.ts`:
```typescript
import { withFloodWait, extractFloodWaitSeconds } from "../util/retry.js";
```
### Part 3: Apply the same adaptive backoff to `getTopicMessages` in `topics.ts`
The same pattern should be applied to `worker/src/tdlib/topics.ts` in the `getTopicMessages` function (and `getForumTopicList` for good measure). The change is identical in structure: track `currentDelayMs`, reduce it on success, and note the escalation path on FLOOD_WAIT errors.
**File: `worker/src/tdlib/topics.ts`** -- add the import and apply to both pagination loops. Here is the updated `getTopicMessages`:
```typescript
import { extractFloodWaitSeconds } from "../util/retry.js";
// ... (existing code) ...
export async function getTopicMessages(
client: Client,
chatId: bigint,
topicId: bigint,
lastProcessedMessageId?: bigint | null,
limit = 100,
onProgress?: ScanProgressCallback
): Promise<ChannelScanResult> {
const archives: TelegramMessage[] = [];
const photos: TelegramPhoto[] = [];
const boundary = lastProcessedMessageId ? Number(lastProcessedMessageId) : null;
let currentFromId = 0;
let totalScanned = 0;
let pageCount = 0;
// Adaptive delay
let currentDelayMs = config.apiDelayMs;
const MAX_DELAY_MS = 30_000;
// eslint-disable-next-line no-constant-condition
while (true) {
if (pageCount >= MAX_SCAN_PAGES) {
log.warn(
{ chatId: chatId.toString(), topicId: topicId.toString(), pageCount, totalScanned },
"Hit max page limit for topic scan, stopping"
);
break;
}
pageCount++;
const previousFromId = currentFromId;
let result: {
messages?: {
id: number;
date: number;
content: {
_: string;
document?: {
file_name?: string;
document?: { id: number; size: number };
};
photo?: {
sizes?: {
type: string;
photo: { id: number; size: number; expected_size: number };
width: number;
height: number;
}[];
};
caption?: { text?: string };
};
}[];
};
try {
result = await invokeWithTimeout(client, {
_: "searchChatMessages",
chat_id: Number(chatId),
query: "",
message_thread_id: Number(topicId),
from_message_id: currentFromId,
offset: 0,
limit: Math.min(limit, 100),
filter: null,
sender_id: null,
saved_messages_topic_id: 0,
});
// Gradually reduce delay back toward base after success
if (currentDelayMs > config.apiDelayMs) {
currentDelayMs = Math.max(
config.apiDelayMs,
Math.floor(currentDelayMs * 0.8)
);
}
} catch (err) {
const floodSeconds = extractFloodWaitSeconds(err);
if (floodSeconds !== null) {
currentDelayMs = Math.min(MAX_DELAY_MS, currentDelayMs * 2);
log.warn(
{
chatId: chatId.toString(),
topicId: topicId.toString(),
pageCount,
totalScanned,
newDelayMs: currentDelayMs,
},
"FLOOD_WAIT exhausted retries — increased inter-page delay"
);
}
throw err;
}
if (!result.messages || result.messages.length === 0) break;
totalScanned += result.messages.length;
for (const msg of result.messages) {
const doc = msg.content?.document;
if (doc?.file_name && doc.document && isArchiveAttachment(doc.file_name)) {
archives.push({
id: BigInt(msg.id),
fileName: doc.file_name,
fileId: String(doc.document.id),
fileSize: BigInt(doc.document.size),
date: new Date(msg.date * 1000),
});
continue;
}
const photo = msg.content?.photo;
const caption = msg.content?.caption?.text ?? "";
if (photo?.sizes && photo.sizes.length > 0) {
const smallest = photo.sizes[0];
photos.push({
id: BigInt(msg.id),
date: new Date(msg.date * 1000),
caption,
fileId: String(smallest.photo.id),
fileSize: smallest.photo.size || smallest.photo.expected_size,
});
}
}
onProgress?.(totalScanned);
currentFromId = result.messages[result.messages.length - 1].id;
if (currentFromId === previousFromId) {
log.warn(
{ chatId: chatId.toString(), topicId: topicId.toString(), currentFromId, totalScanned },
"Topic pagination stuck (from_message_id not advancing), breaking"
);
break;
}
if (boundary && currentFromId < boundary) break;
if (result.messages.length < Math.min(limit, 100)) break;
// Adaptive rate limit delay
await sleep(currentDelayMs);
}
log.info(
{ chatId: chatId.toString(), topicId: topicId.toString(), archives: archives.length, photos: photos.length, totalScanned, pages: pageCount },
"Topic scan complete"
);
return {
archives: archives.reverse(),
photos: photos.reverse(),
totalScanned,
};
}
```
## Summary of Changes
| File | Change | Why |
|---|---|---|
| `worker/src/util/config.ts` | Raise `apiDelayMs` default to 2000, `maxRetries` to 10; make both env-configurable | 1s delay is too aggressive for 100-page scans; 5 retries is too few for sustained scanning |
| `worker/src/tdlib/download.ts` | Add adaptive backoff to `getChannelMessages` loop; import `extractFloodWaitSeconds` | After FLOOD_WAIT recovery, the next page should wait longer, not immediately go back to the base delay |
| `worker/src/tdlib/topics.ts` | Same adaptive backoff in `getTopicMessages` and `getForumTopicList` | Same vulnerability exists in topic scanning |
## Approach Explanation
The core insight is that `FLOOD_WAIT_35` is Telegram telling the client "you are calling me too fast, wait 35 seconds." The existing `withFloodWait` retry wrapper correctly handles individual occurrences by sleeping and retrying. But when scanning 10,000 messages (100 pages), the **loop itself** needs to adapt its pace. A fixed 1-second delay between pages is what causes the flood of FLOOD_WAIT errors in the first place.
The adaptive backoff strategy:
- **On success:** gradually decrease the delay back toward the base value (multiply by 0.8), so scanning speeds back up once the rate limit pressure eases.
- **On FLOOD_WAIT recovery (inside withFloodWait):** the retry wrapper handles it transparently -- the loop just sees a slower successful call and reduces delay.
- **On FLOOD_WAIT exhausting retries:** double the inter-page delay (capped at 30s) before re-throwing, so if the scan is retried it starts slower.
- **Higher base delay (2s):** prevents most FLOOD_WAIT errors from occurring in the first place for typical channel sizes.
- **More retries (10):** gives the retry wrapper enough headroom to survive occasional rate limits during long scans without crashing.

View File

@@ -0,0 +1,5 @@
{
"total_tokens": 45601,
"duration_ms": 122326,
"total_duration_seconds": 122.3
}

View File

@@ -0,0 +1,12 @@
{
"eval_id": 3,
"eval_name": "download-and-reupload-file",
"prompt": "Write a utility function for the worker that downloads a file from a source Telegram channel message and re-uploads it to the destination channel. It needs to handle files up to 2GB and work reliably in Docker.",
"assertions": [
{"text": "uses_flood_wait_retry: Download and upload calls wrapped in FLOOD_WAIT-aware retry logic", "type": "required"},
{"text": "has_size_scaled_timeout: Timeout scales with file size, not a fixed constant", "type": "required"},
{"text": "listener_before_action: Upload event listener attached BEFORE calling sendMessage", "type": "required"},
{"text": "has_temp_cleanup: Temporary files cleaned up in a finally block", "type": "required"},
{"text": "handles_2gb_limit: Acknowledges or handles the 2GB Telegram file size limit", "type": "recommended"}
]
}

View File

@@ -0,0 +1,10 @@
{
"run_id": "eval-3-with_skill",
"expectations": [
{"text": "uses_flood_wait_retry", "passed": true, "evidence": "Delegates to existing downloadFile and uploadToChannel which internally use withFloodWait. Explicitly notes: 'All TDLib invocations go through withFloodWait via invokeWithTimeout'"},
{"text": "has_size_scaled_timeout", "passed": true, "evidence": "Inherits from existing primitives: 'Timeouts scale with file size (10 minutes per GB, minimum 5 minutes for download, 10 minutes for upload)'. Explicitly cited from skill."},
{"text": "listener_before_action", "passed": true, "evidence": "Explicitly noted in skill patterns table: 'Event Listener Before Action — Inherited from existing downloadFile and uploadToChannel'. The skill pattern is referenced."},
{"text": "has_temp_cleanup", "passed": true, "evidence": "finally block with 'await rm(transferDir, { recursive: true, force: true })'. Uses randomUUID for isolation."},
{"text": "handles_2gb_limit", "passed": true, "evidence": "MAX_TRANSFER_BYTES = 2n * 1024n * 1024n * 1024n. Upfront validation with clear error message."}
]
}

View File

@@ -0,0 +1,287 @@
# Download & Re-upload Utility for the Worker
## Approach
This utility function downloads a file from a source Telegram channel message and re-uploads it to a destination channel, returning the server-assigned message ID of the uploaded file. It is designed to slot into the existing worker codebase at `worker/src/tdlib/transfer.ts`.
The function composes the existing `downloadFile` (from `tdlib/download.ts`) and `uploadToChannel` (from `upload/channel.ts`) primitives, adding:
1. **Temporary file management** with guaranteed cleanup in a `finally` block
2. **Size-scaled timeouts** for files up to 2 GB (the TDLib user account limit)
3. **Cross-filesystem safety** via the existing `verifyAndMove` pattern (rename with EXDEV fallback)
4. **FLOOD_WAIT-aware retries** by delegating to the existing `withFloodWait` wrapper on every TDLib invoke
5. **Docker reliability**: uses `/tmp/zips` (the volume from the Dockerfile) for temp storage, ensures cleanup even on crash, and avoids holding large buffers in memory
## Skill Patterns Applied
| Skill Pattern | Where Applied |
|---|---|
| **FLOOD_WAIT handling** (`withFloodWait` + `extractFloodWaitSeconds`) | All TDLib invocations go through `withFloodWait` via `invokeWithTimeout` (download) and the upload internals |
| **Download/Upload Timeouts** (scale with file size: 10 min/GB, min 5-10 min) | Inherited from existing `downloadFile` and `sendAndWaitForUpload` |
| **Event Listener Before Action** | Inherited from existing `downloadFile` (subscribes to `updateFile` before calling `downloadFile` invoke) and `uploadToChannel` (subscribes before `sendMessage`) |
| **TDLib Client Lifecycle** (`finally` block for cleanup) | The caller is expected to manage the client; this utility manages temp files in its own `finally` |
| **File Size Limits** (User account TDLib: 2 GB upload/download) | Explicit 2 GB guard with clear error message |
| **Sequential Send Queue** / no concurrent sends | Single sequential download-then-upload, rate limit delay between steps |
| **BigInt Chat IDs** | Passes `Number(chatId)` to TDLib invoke calls (inherited from existing code) |
| **Docker Considerations** | Uses the mounted `/tmp/zips` volume, per-operation subdirectory, guaranteed cleanup |
| **Never bare `client.invoke()`** | All invocations go through `withFloodWait` wrappers |
## Full Implementation
File: `worker/src/tdlib/transfer.ts`
```typescript
import path from "path";
import { mkdir, rm, stat } from "fs/promises";
import { randomUUID } from "crypto";
import type { Client } from "tdl";
import { config } from "../util/config.js";
import { childLogger } from "../util/logger.js";
import { downloadFile } from "./download.js";
import type { DownloadProgress } from "./download.js";
import { uploadToChannel } from "../upload/channel.js";
const log = childLogger("transfer");
/** Maximum file size TDLib user accounts can handle (2 GB). */
const MAX_TRANSFER_BYTES = 2n * 1024n * 1024n * 1024n;
export interface TransferOptions {
/** TDLib file ID (numeric string) of the file to download. */
fileId: string;
/** Original file name from the source message. */
fileName: string;
/** Expected file size in bytes. */
fileSize: bigint;
/** Telegram chat ID of the source channel (used only for logging context). */
sourceChatId: bigint;
/** Telegram chat ID of the destination channel. */
destChatId: bigint;
/** Optional caption to attach to the uploaded message. */
caption?: string;
/** Optional callback for download progress updates. */
onDownloadProgress?: (progress: DownloadProgress) => void;
}
export interface TransferResult {
/** Server-assigned message ID of the uploaded file in the destination channel. */
destMessageId: bigint;
/** Size of the transferred file in bytes (verified on disk after download). */
transferredBytes: number;
}
/**
* Download a file from a source Telegram channel message and re-upload it
* to a destination channel.
*
* This function:
* 1. Validates the file size is within TDLib's 2 GB limit
* 2. Downloads the file to a temporary directory (with progress tracking)
* 3. Verifies the downloaded file's size matches the expected size
* 4. Uploads the file to the destination channel
* 5. Waits for Telegram's server-side upload confirmation
* 6. Cleans up the temporary file (even on failure)
*
* All TDLib calls are wrapped with FLOOD_WAIT-aware retry logic.
* Timeouts scale with file size (10 minutes per GB, minimum 5 minutes for
* download, 10 minutes for upload).
*
* Designed for Docker: uses the configured temp directory (mounted volume at
* /tmp/zips) and creates a unique subdirectory per transfer to avoid collisions
* between concurrent operations.
*
* @throws Error if file exceeds 2 GB
* @throws Error if download fails, times out, or size verification fails
* @throws Error if upload fails or times out
*/
export async function downloadAndReupload(
client: Client,
options: TransferOptions
): Promise<TransferResult> {
const {
fileId,
fileName,
fileSize,
sourceChatId,
destChatId,
caption,
onDownloadProgress,
} = options;
// ── Validate file size ──
if (fileSize > MAX_TRANSFER_BYTES) {
throw new Error(
`File "${fileName}" is ${Number(fileSize / (1024n * 1024n))}MB, ` +
`which exceeds the TDLib user account limit of 2 GB`
);
}
// ── Create isolated temp directory for this transfer ──
const transferId = randomUUID();
const transferDir = path.join(config.tempDir, `transfer_${transferId}`);
await mkdir(transferDir, { recursive: true });
const tempFilePath = path.join(transferDir, fileName);
log.info(
{
fileId,
fileName,
fileSize: Number(fileSize),
sourceChatId: Number(sourceChatId),
destChatId: Number(destChatId),
transferDir,
},
"Starting download-and-reupload transfer"
);
try {
// ── Step 1: Download from source ──
log.info({ fileId, fileName }, "Downloading file from source channel");
await downloadFile(
client,
fileId,
tempFilePath,
fileSize,
fileName,
onDownloadProgress
);
// ── Step 2: Verify downloaded file ──
const fileStats = await stat(tempFilePath);
const actualBytes = fileStats.size;
if (Number(fileSize) > 0 && actualBytes !== Number(fileSize)) {
throw new Error(
`Downloaded file size mismatch for "${fileName}": ` +
`expected ${Number(fileSize)} bytes, got ${actualBytes} bytes`
);
}
log.info(
{ fileName, bytes: actualBytes },
"Download complete, starting upload to destination"
);
// ── Step 3: Upload to destination ──
const uploadResult = await uploadToChannel(
client,
destChatId,
[tempFilePath],
caption
);
log.info(
{
fileName,
destMessageId: Number(uploadResult.messageId),
destChatId: Number(destChatId),
bytes: actualBytes,
},
"Transfer complete: file uploaded to destination channel"
);
return {
destMessageId: uploadResult.messageId,
transferredBytes: actualBytes,
};
} catch (err) {
log.error(
{
err,
fileId,
fileName,
sourceChatId: Number(sourceChatId),
destChatId: Number(destChatId),
},
"Transfer failed"
);
throw err;
} finally {
// ── Always clean up temp files ──
await rm(transferDir, { recursive: true, force: true }).catch((cleanupErr) => {
log.warn(
{ err: cleanupErr, transferDir },
"Failed to clean up transfer temp directory"
);
});
}
}
/**
* Convenience wrapper that extracts the file info from a TDLib message object
* and calls downloadAndReupload.
*
* Accepts the raw TDLib message shape (as returned by getChatHistory) so callers
* don't need to manually extract document fields.
*
* @throws Error if the message does not contain a document attachment
*/
export async function transferMessageFile(
client: Client,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
message: any,
sourceChatId: bigint,
destChatId: bigint,
caption?: string,
onDownloadProgress?: (progress: DownloadProgress) => void
): Promise<TransferResult> {
const doc = message?.content?.document;
if (!doc?.document) {
throw new Error(
`Message ${message?.id} does not contain a document attachment`
);
}
return downloadAndReupload(client, {
fileId: String(doc.document.id),
fileName: doc.file_name ?? `file_${message.id}`,
fileSize: BigInt(doc.document.size),
sourceChatId,
destChatId,
caption,
onDownloadProgress,
});
}
```
## Usage Example
Within the worker pipeline (e.g., in `worker.ts` or a new route handler):
```typescript
import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
import { downloadAndReupload } from "./tdlib/transfer.js";
const client = await createTdlibClient({ id: account.id, phone: account.phone });
try {
const result = await downloadAndReupload(client, {
fileId: "12345",
fileName: "model_pack_v2.zip",
fileSize: 1_500_000_000n, // ~1.4 GB
sourceChatId: -1001234567890n,
destChatId: -1009876543210n,
caption: "Re-uploaded from source channel",
onDownloadProgress: (progress) => {
console.log(`Download: ${progress.percent}%`);
},
});
console.log(`Uploaded as message ${result.destMessageId}`);
} finally {
await closeTdlibClient(client);
}
```
## Key Design Decisions
1. **Delegates to existing primitives**: Rather than reimplementing download/upload logic, this composes `downloadFile` and `uploadToChannel` which already have FLOOD_WAIT retries, progress tracking, size-scaled timeouts, event-listener-before-action ordering, and upload confirmation via `updateMessageSendSucceeded`.
2. **Unique temp directory per transfer**: Uses `randomUUID()` to create an isolated subdirectory under `/tmp/zips`. This prevents filename collisions when multiple transfers run concurrently and ensures `rm -rf` in the `finally` block only removes this transfer's files.
3. **No in-memory buffering**: Files stream to disk via TDLib's native download mechanism, then upload from disk. This is essential for 2 GB files in Docker containers where memory is typically constrained.
4. **Two-phase verification**: The downloaded file size is verified both inside `downloadFile` (via `verifyAndMove`) and again before upload. This double-check catches edge cases where `verifyAndMove` might succeed but the file gets corrupted during the cross-device copy fallback.
5. **`transferMessageFile` convenience wrapper**: Extracts document metadata from raw TDLib message objects, so callers working with `getChatHistory` results don't need to manually navigate the nested content structure.

View File

@@ -0,0 +1,5 @@
{
"total_tokens": 61157,
"duration_ms": 122135,
"total_duration_seconds": 122.1
}

View File

@@ -0,0 +1,10 @@
{
"run_id": "eval-3-without_skill",
"expectations": [
{"text": "uses_flood_wait_retry", "passed": true, "evidence": "Delegates to existing downloadFile and uploadToChannel which use withFloodWait internally. Notes 'Both are wrapped with withFloodWait internally'."},
{"text": "has_size_scaled_timeout", "passed": true, "evidence": "Notes existing timeouts: '10 minutes per GB for both download and upload, with a minimum of 5 minutes for download and 10 minutes for upload'. Inherited from composed functions."},
{"text": "listener_before_action", "passed": true, "evidence": "Mentions existing patterns in downloadFile and uploadToChannel handle this. But does NOT explicitly call it out as a critical concern or verify it."},
{"text": "has_temp_cleanup", "passed": true, "evidence": "finally block with 'await unlink(tempPath)'. Uses tempSubdir for isolation."},
{"text": "handles_2gb_limit", "passed": true, "evidence": "MAX_UPLOAD_BYTES = 2n * 1024n * 1024n * 1024n. Upfront validation with error directing to byteLevelSplit."}
]
}

View File

@@ -0,0 +1,318 @@
# Download and Re-upload Utility for the DragonsStash Worker
## Approach
The utility function `downloadAndReupload` bridges a source Telegram channel message to a destination channel by:
1. **Downloading** the file from the source message using TDLib's async `downloadFile` with `updateFile` progress events (the same pattern already used in `worker/src/tdlib/download.ts`).
2. **Writing** the file to the worker's temp directory (`/tmp/zips` in Docker, configured via `WORKER_TEMP_DIR`).
3. **Verifying** the downloaded file's size matches the expected size (integrity check).
4. **Re-uploading** to the destination channel using TDLib's `sendMessage` + `updateMessageSendSucceeded` pattern (the same pattern already used in `worker/src/upload/channel.ts`).
5. **Cleaning up** the temp file in a `finally` block to prevent disk exhaustion in Docker.
### Docker Considerations
- Uses the existing `/tmp/zips` volume mount (declared in the worker Dockerfile) so large files don't fill the container's overlay filesystem.
- Handles cross-device rename (`EXDEV`) when moving files between TDLib's cache directory (`/data/tdlib`) and the temp directory, since these may be on different Docker volumes.
- Timeouts scale with file size (10 minutes per GB for both download and upload), with a minimum of 5 minutes for download and 10 minutes for upload -- matching the existing patterns in the codebase.
- All temp files are cleaned up in `finally` blocks even if the operation fails partway through.
### 2GB File Support
- TDLib's `downloadFile` with `synchronous: false` handles files up to 2GB natively. The existing codebase already has this pattern working.
- Telegram's upload limit is 2GB per message. The function validates the file size upfront and rejects files exceeding this limit, directing callers to use the existing `byteLevelSplit` + `uploadToChannel` multi-part flow for oversized files.
- Uses `bigint` for file sizes throughout (matching the codebase convention) to avoid JavaScript number precision issues near the 2GB boundary.
## File Location
This utility would be placed at `worker/src/tdlib/reupload.ts`, alongside the existing `download.ts` module.
## Full Code
```typescript
// worker/src/tdlib/reupload.ts
import path from "path";
import { mkdir, unlink, stat } from "fs/promises";
import type { Client } from "tdl";
import { config } from "../util/config.js";
import { childLogger } from "../util/logger.js";
import { downloadFile } from "./download.ts";
import type { DownloadProgress } from "./download.ts";
import { uploadToChannel } from "../upload/channel.js";
import type { UploadResult } from "../upload/channel.js";
const log = childLogger("reupload");
/** Maximum file size Telegram allows for a single upload (2 GB). */
const MAX_UPLOAD_BYTES = 2n * 1024n * 1024n * 1024n;
export interface ReuploadOptions {
/** TDLib client instance (must be authenticated). */
client: Client;
/** Telegram file ID (numeric string) from the source message. */
fileId: string;
/** Original file name. */
fileName: string;
/** Expected file size in bytes. */
fileSize: bigint;
/** Telegram chat ID of the destination channel. */
destChatId: bigint;
/** Optional caption for the re-uploaded message. */
caption?: string;
/** Optional callback for download progress. */
onDownloadProgress?: (progress: DownloadProgress) => void;
/** Optional subdirectory name inside tempDir (to isolate concurrent operations). */
tempSubdir?: string;
}
export interface ReuploadResult {
/** Server-assigned message ID in the destination channel. */
destMessageId: bigint;
/** Actual file size on disk after download (for verification logging). */
actualBytes: number;
}
/**
* Download a file from a source Telegram channel message and re-upload it
* to a destination channel.
*
* Flow:
* 1. Validates file size is within Telegram's 2GB upload limit
* 2. Downloads via TDLib async download with progress tracking
* 3. Verifies file integrity (size match)
* 4. Uploads to destination channel, waiting for server confirmation
* 5. Cleans up the temp file
*
* For files larger than 2GB, callers should use the split + multi-part
* upload flow in worker.ts instead.
*
* Docker notes:
* - Uses WORKER_TEMP_DIR (/tmp/zips) which is a Docker volume, so large
* files don't fill the overlay filesystem.
* - Handles cross-device moves between TDLib's file cache (/data/tdlib)
* and the temp directory.
* - Temp files are always cleaned up, even on failure.
*
* @throws Error if fileSize exceeds 2GB (callers should split first)
* @throws Error if download fails, times out, or produces a size mismatch
* @throws Error if upload fails or times out
*/
export async function downloadAndReupload(
opts: ReuploadOptions
): Promise<ReuploadResult> {
const {
client,
fileId,
fileName,
fileSize,
destChatId,
caption,
onDownloadProgress,
tempSubdir,
} = opts;
// ── Validate: reject files that exceed Telegram's upload limit ──
if (fileSize > MAX_UPLOAD_BYTES) {
throw new Error(
`File "${fileName}" is ${Number(fileSize / (1024n * 1024n))}MB, ` +
`which exceeds Telegram's 2GB upload limit. ` +
`Use byteLevelSplit + uploadToChannel for oversized files.`
);
}
// ── Prepare temp directory ──
const workDir = tempSubdir
? path.join(config.tempDir, tempSubdir)
: config.tempDir;
await mkdir(workDir, { recursive: true });
const tempPath = path.join(workDir, fileName);
log.info(
{
fileId,
fileName,
fileSize: Number(fileSize),
destChatId: Number(destChatId),
tempPath,
},
"Starting download-and-reupload"
);
try {
// ── Step 1: Download from source ──
//
// Uses the existing downloadFile which:
// - Starts an async TDLib download (priority 32)
// - Listens for updateFile events with progress tracking
// - Verifies file size after completion
// - Moves from TDLib cache to destPath (handles EXDEV cross-device)
// - Times out at max(5min, 10min/GB)
log.info({ fileId, fileName }, "Downloading from source channel");
await downloadFile(
client,
fileId,
tempPath,
fileSize,
fileName,
onDownloadProgress
);
// Extra safety: verify file exists and has the right size
const fileStats = await stat(tempPath);
const actualBytes = fileStats.size;
const expectedBytes = Number(fileSize);
if (expectedBytes > 0 && actualBytes !== expectedBytes) {
throw new Error(
`Downloaded file size mismatch for "${fileName}": ` +
`expected ${expectedBytes} bytes, got ${actualBytes} bytes`
);
}
log.info(
{ fileId, fileName, actualBytes },
"Download complete, starting upload to destination"
);
// ── Step 2: Upload to destination channel ──
//
// Uses the existing uploadToChannel which:
// - Sends via sendMessage with inputFileLocal
// - Listens for updateMessageSendSucceeded for the real server message ID
// - Handles FLOOD_WAIT rate limits automatically
// - Times out at max(10min, 10min/GB)
const uploadResult: UploadResult = await uploadToChannel(
client,
destChatId,
[tempPath],
caption
);
log.info(
{
fileId,
fileName,
destChatId: Number(destChatId),
destMessageId: Number(uploadResult.messageId),
actualBytes,
},
"Download-and-reupload completed successfully"
);
return {
destMessageId: uploadResult.messageId,
actualBytes,
};
} finally {
// ── Always clean up temp file ──
// Critical in Docker to prevent /tmp/zips volume from filling up,
// especially when processing many large files in sequence.
try {
await unlink(tempPath);
log.debug({ tempPath }, "Cleaned up temp file");
} catch {
// File may not exist if download failed before writing
}
}
}
/**
* Convenience wrapper that downloads and re-uploads multiple files
* (e.g., multipart archive parts) from a source channel to a destination.
*
* Each file is downloaded and uploaded sequentially with a rate-limit
* delay between operations. Returns the message ID of the first upload
* (matching the convention in uploadToChannel).
*
* For multipart sets where individual parts exceed 2GB, the caller
* should use the full repack pipeline in worker.ts (concatenate +
* byteLevelSplit) instead of this function.
*/
export async function downloadAndReuploadMultiple(
client: Client,
files: Array<{
fileId: string;
fileName: string;
fileSize: bigint;
}>,
destChatId: bigint,
caption?: string,
onDownloadProgress?: (fileIndex: number, progress: DownloadProgress) => void,
tempSubdir?: string
): Promise<ReuploadResult> {
if (files.length === 0) {
throw new Error("No files provided for download-and-reupload");
}
let firstResult: ReuploadResult | null = null;
for (let i = 0; i < files.length; i++) {
const file = files[i];
// Only set caption on the first file (matching uploadToChannel convention)
const fileCaption = i === 0 ? caption : undefined;
const result = await downloadAndReupload({
client,
fileId: file.fileId,
fileName: file.fileName,
fileSize: file.fileSize,
destChatId,
caption: fileCaption,
onDownloadProgress: onDownloadProgress
? (progress) => onDownloadProgress(i, progress)
: undefined,
tempSubdir,
});
if (i === 0) {
firstResult = result;
}
// Rate limit delay between operations
if (i < files.length - 1) {
await new Promise((resolve) =>
setTimeout(resolve, config.apiDelayMs)
);
}
}
return firstResult!;
}
```
## How It Fits Into the Existing Codebase
The function composes the two existing building blocks:
- **`downloadFile`** from `worker/src/tdlib/download.ts` -- handles async download with `updateFile` events, progress tracking, timeout, size verification, and cross-device file moves (critical for Docker where `/data/tdlib` and `/tmp/zips` are separate volumes).
- **`uploadToChannel`** from `worker/src/upload/channel.ts` -- handles `sendMessage` with `updateMessageSendSucceeded` confirmation, upload progress via `updateFile`, and FLOOD_WAIT retry.
Both are wrapped with `withFloodWait` internally, so the utility automatically handles Telegram rate limiting.
The `downloadAndReupload` function is a focused utility that can be called from the worker pipeline or any other part of the system that needs to transfer a file between channels. The existing `processOneArchiveSet` in `worker.ts` does a similar download-then-upload flow but with additional steps (hashing, dedup, metadata extraction, splitting). This utility extracts the core transfer operation for simpler use cases.
## Usage Example
```typescript
import { downloadAndReupload } from "./tdlib/reupload.js";
// Transfer a single file from source to destination
const result = await downloadAndReupload({
client,
fileId: "12345",
fileName: "archive.zip",
fileSize: 1_500_000_000n, // ~1.5 GB
destChatId: -1001234567890n,
caption: "Re-uploaded archive",
onDownloadProgress: (progress) => {
console.log(`Download: ${progress.percent}%`);
},
tempSubdir: `reupload_${Date.now()}`,
});
console.log(`Uploaded as message ${result.destMessageId}`);
```

View File

@@ -0,0 +1,5 @@
{
"total_tokens": 54587,
"duration_ms": 146695,
"total_duration_seconds": 146.7
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,301 @@
---
name: tdlib-telegram
description: >
Reference guide for building Telegram integrations with TDLib (tdl/node).
Covers rate limits, FLOOD_WAIT handling, file size constraints, bot vs user account
differences, and safe code patterns. Use this skill whenever writing or modifying code
that calls Telegram APIs via TDLib, the Bot API, or any Telegram client library — including
sending messages, downloading/uploading files, scanning channels, forwarding messages,
managing subscriptions, or handling notifications. Also use when debugging 429 errors,
FLOOD_WAIT, or silent message drops.
---
# TDLib / Telegram Development Guide
This skill provides the rate limits, constraints, and patterns you need to write correct
Telegram integrations. The limits below come from official Telegram documentation and
well-established community findings (Telegram does not publish exact numbers for all limits).
## Telegram Rate Limits
These are approximate safe boundaries. Telegram's actual limits are dynamic and depend on
account age, history, and request type. The correct strategy is to respect these as guidelines
and always handle FLOOD_WAIT errors gracefully.
### Bot Accounts
| Operation | Limit | Notes |
|-----------|-------|-------|
| Messages to same chat | ~1 msg/sec | Bursts OK, sustained exceeds limit |
| Messages in a group | 20 msgs/min | Hard limit per group chat |
| Bulk notifications (different users) | ~30 msgs/sec | Global across all chats |
| Message edits in a group | ~20 edits/min | Community-observed |
| API requests (global) | ~30 req/sec | All request types combined |
| Paid broadcasts | up to 1000 msgs/sec | Requires Telegram Stars balance |
### User Accounts (TDLib)
| Operation | Limit | Notes |
|-----------|-------|-------|
| API requests (global) | ~30 req/sec | All request types combined |
| Messages in a group | ~20 msgs/min | Same as bot |
| Channel history reads | No published limit | But pagination + delay is essential |
| Joining groups | Very strict | FLOOD_WAIT often 30-300+ seconds |
### File Size Limits
| Context | Upload | Download |
|---------|--------|----------|
| Bot API (standard) | 50 MB | 20 MB |
| Bot API (local server) | 2,000 MB | 2,000 MB |
| User account (TDLib) | 2 GB | 2 GB |
| Premium user (TDLib) | 4 GB | 4 GB |
### Message & Content Limits
| Item | Limit |
|------|-------|
| Message text length | 4,096 chars |
| Media caption | 1,024 chars (4,096 premium) |
| Album / media group | 10 items max |
| Forwarded messages per request | `forwarded_message_count_max` (TDLib option) |
| Inline keyboard buttons | 100 entities |
| Formatting entities per message | 100 |
| Scheduled messages per chat | 100 |
| Bot commands | 100 max |
### Forum & Group Limits
| Item | Limit |
|------|-------|
| Topics per group | 1,000,000 |
| Topic title | 128 chars |
| Group members | 200,000 |
| Admins per group | 50 |
| Bots per group | 20 |
| Pinned topics | 5 |
## FLOOD_WAIT — How It Works
When you exceed rate limits, Telegram returns a `FLOOD_WAIT_X` error (or HTTP 429 with
`retry_after`). This is a **mandatory pause** — the value `X` is the number of seconds you
must wait before ANY request will succeed. It blocks the entire client, not just the
operation that triggered it.
### The Right Way to Handle It
```typescript
// Extract the wait duration from the error
function extractFloodWaitSeconds(err: unknown): number | null {
const message = err instanceof Error ? err.message : String(err);
// Pattern 1: FLOOD_WAIT_30
const flood = message.match(/FLOOD_WAIT_(\d+)/i);
if (flood) return parseInt(flood[1], 10);
// Pattern 2: "retry after 30"
const retry = message.match(/retry after (\d+)/i);
if (retry) return parseInt(retry[1], 10);
// Pattern 3: HTTP 429 without explicit seconds
if (String((err as any)?.code) === "429") return 30;
return null; // Not a rate limit error
}
// Wrap any TDLib call with automatic retry
async function withFloodWait<T>(fn: () => Promise<T>, maxRetries = 5): Promise<T> {
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
return await fn();
} catch (err) {
const wait = extractFloodWaitSeconds(err);
if (wait === null || attempt >= maxRetries) throw err;
// Add 1-5s jitter to prevent thundering herd
const jitter = 1000 + Math.random() * 4000;
await sleep(wait * 1000 + jitter);
}
}
throw new Error("Unreachable");
}
```
### Key Rules
- **Always respect the wait duration.** Never retry before `retry_after` expires.
- **Add jitter.** Without it, multiple clients retry simultaneously and trigger another FLOOD_WAIT.
- **Non-rate-limit errors should fail fast.** Only retry on FLOOD_WAIT, not on other errors.
- **Don't artificially throttle below ~1 req/sec.** Telegram's own guidance (via grammY docs)
is to send requests as fast as you need and handle 429 errors. Fixed low-frequency throttling
wastes throughput without preventing floods.
## Code Patterns
### Pattern: Sequential Send Queue
When sending notifications to multiple users, use a sequential queue with a per-message delay.
Never fire concurrent sends — you will hit the 30 msg/sec global limit instantly.
```typescript
let sendQueue: Promise<void> = Promise.resolve();
function queueSend(chatId: bigint, text: string): void {
sendQueue = sendQueue
.then(() => withFloodWait(() => sendTextMessage(chatId, text)))
.then(() => sleep(50)) // ~20 msgs/sec, well under 30 limit
.catch((err) => log.error({ err, chatId }, "Send failed"));
}
```
### Pattern: Paginated Scanning with Delay
When reading channel history or enumerating topics, always add a delay between pages:
```typescript
while (hasMorePages) {
const result = await invokeWithTimeout(client, { _: "getChatHistory", ... });
processMessages(result.messages);
if (result.messages.length < limit) break;
await sleep(1000); // 1 second between pages — prevents FLOOD_WAIT on large channels
}
```
### Pattern: Event Listener Before Action
When waiting for TDLib async events (upload confirmation, download completion), always
attach the event listener BEFORE starting the operation. If you attach after, fast
operations can complete before the listener exists, causing the promise to hang forever.
```typescript
// CORRECT: listener first, then action
client.on("update", handleUpdate);
const tempMsg = await client.invoke({ _: "sendMessage", ... });
tempMsgId = tempMsg.id; // handler now knows which message to match
// WRONG: action first, then listener — race condition!
const tempMsg = await client.invoke({ _: "sendMessage", ... });
client.on("update", handleUpdate); // may miss updateMessageSendSucceeded
```
### Pattern: Download/Upload Timeouts
Scale timeouts with file size. TDLib downloads/uploads are asynchronous — without a timeout,
a stalled transfer hangs the entire pipeline.
```typescript
const timeoutMs = Math.max(
10 * 60_000, // minimum 10 minutes
(fileSizeMB / 1024) * 10 * 60_000 // 10 minutes per GB
);
```
### Pattern: TDLib Client Lifecycle
Always close TDLib clients in a `finally` block. Unclosed clients leak memory and file
descriptors, and can leave TDLib's internal database locked.
```typescript
const client = await createTdlibClient(account);
try {
// ... use client ...
} finally {
await closeTdlibClient(client);
}
```
## Anti-Patterns
### Never: Concurrent TDLib Sends Without Queue
```typescript
// BAD: fires all sends concurrently — will trigger FLOOD_WAIT immediately
await Promise.all(users.map((u) => sendTextMessage(u.chatId, msg)));
// GOOD: sequential with delay
for (const user of users) {
await withFloodWait(() => sendTextMessage(user.chatId, msg));
await sleep(50);
}
```
### Never: Bare client.invoke() Without Retry
Every `client.invoke()` call can return FLOOD_WAIT at any time. Bare calls will crash
on rate limits instead of retrying.
```typescript
// BAD: crashes on FLOOD_WAIT
await client.invoke({ _: "sendMessage", ... });
// GOOD: retries automatically
await withFloodWait(() => client.invoke({ _: "sendMessage", ... }));
```
### Never: Retry Without Respecting retry_after
```typescript
// BAD: fixed 1-second retry ignores Telegram's wait requirement
catch (err) { await sleep(1000); retry(); }
// GOOD: extract and respect the actual wait time
catch (err) {
const wait = extractFloodWaitSeconds(err);
if (wait !== null) await sleep(wait * 1000 + jitter);
else throw err;
}
```
### Never: Ignore FLOOD_WAIT in Bots
Bot accounts get the same FLOOD_WAIT as user accounts. The bot API's 429 response
blocks ALL operations for the specified duration — not just the chat that triggered it.
A single unhandled flood in a notification loop can make the entire bot unresponsive.
## Bot vs User Account Differences
| Capability | Bot | User (TDLib) |
|-----------|-----|-------------|
| Read channel history | No (unless admin) | Yes |
| Send to users who haven't started bot | No | N/A |
| Join groups via invite link | No (must be added) | Yes |
| Forward messages (send_copy) | Yes | Yes |
| File upload limit | 50 MB (standard API) | 2 GB |
| File download limit | 20 MB (standard API) | 2 GB |
| Auth method | Bot token | Phone + SMS code |
| Rate limit profile | Same FLOOD_WAIT | Same FLOOD_WAIT |
## TDLib-Specific Notes
### BigInt Chat IDs
TDLib uses numeric chat IDs. Supergroups and channels use negative IDs (e.g., `-1001234567890`).
When passing to `client.invoke()`, convert with `Number(chatId)` — TDLib's JSON interface
doesn't handle BigInt. Be aware that very large IDs may lose precision with `Number()`,
though current Telegram IDs are within safe integer range.
### TDLib Options (Runtime Queryable)
These are read-only values you can query at runtime via `getOption`:
- `message_text_length_max` — max message text length
- `message_caption_length_max` — max caption length
- `forwarded_message_count_max` — max forwards per request
### Session State
TDLib persists session state to disk. Each account needs its own state directory.
Running two clients on the same state directory simultaneously will corrupt the database.
Use separate directories per account, and separate volumes in Docker for worker vs bot.
## Docker Considerations
- **prebuilt-tdlib**: The `prebuilt-tdlib` npm package provides platform-specific TDLib
binaries. Container base image must match (e.g., `node:20-bookworm-slim` for Debian x64).
- **Volumes**: Mount persistent volumes for TDLib state directories — losing state forces
full re-authentication.
- **Graceful shutdown**: Wait for active operations to finish before closing DB connections.
TDLib operations in flight will fail if the database pool is closed underneath them.
- **Health checks**: TDLib services don't expose HTTP — use database connectivity as the
health signal instead.

View File

@@ -0,0 +1,23 @@
{
"skill_name": "tdlib-telegram",
"evals": [
{
"id": 1,
"prompt": "Add a new bot command /broadcast that sends a text message to ALL users who have a TelegramLink in the database. The admin triggers it from the web app. Add it to the bot's command handler and create an API endpoint that triggers it.",
"expected_output": "Code that uses a sequential send queue with withFloodWait wrapping each sendTextMessage call, a delay between sends (~50ms), and does NOT use Promise.all or concurrent sends. Should handle errors per-user without stopping the broadcast.",
"files": []
},
{
"id": 2,
"prompt": "The worker keeps crashing with 'FLOOD_WAIT_35' errors when scanning a source channel that has about 10,000 messages. It happens during the getChannelMessages pagination loop. How do I fix this?",
"expected_output": "Diagnosis that the apiDelayMs between pages may be too low or the retry logic isn't wrapping the pagination calls. Should recommend ensuring all getChatHistory/searchChatMessages calls go through withFloodWait/invokeWithTimeout, and that sleep(config.apiDelayMs) exists between pages. Should NOT suggest reducing scan frequency as the primary fix.",
"files": []
},
{
"id": 3,
"prompt": "Write a utility function for the worker that downloads a file from a source Telegram channel message and re-uploads it to the destination channel. It needs to handle files up to 2GB and work reliably in Docker.",
"expected_output": "Code that: (1) wraps download in withFloodWait with size-scaled timeout, (2) attaches upload event listener BEFORE calling sendMessage, (3) uses temp directory with cleanup in finally block, (4) handles the 2GB Telegram limit correctly, (5) uses try/finally for client cleanup if applicable.",
"files": []
}
]
}

View File

@@ -8,6 +8,7 @@ import {
getGlobalDestinationChannel,
} from "./db/queries.js";
import { copyMessageToUser, sendTextMessage, sendPhotoMessage } from "./tdlib/client.js";
import { sleep } from "./util/flood-wait.js";
const log = childLogger("send-listener");
@@ -200,6 +201,9 @@ async function handleNewPackage(payload: string): Promise<void> {
"Failed to notify subscriber"
);
});
// Rate limit delay between notifications (~20 msgs/sec, under 30 msgs/sec bot limit)
await sleep(50);
}
} catch (err) {
log.error({ err, payload }, "Failed to process new_package notification");

View File

@@ -2,6 +2,7 @@ import tdl from "tdl";
import { getTdjson } from "prebuilt-tdlib";
import { config } from "../util/config.js";
import { childLogger } from "../util/logger.js";
import { withFloodWait } from "../util/flood-wait.js";
const log = childLogger("tdlib-bot");
@@ -66,14 +67,18 @@ export async function copyMessageToUser(
): Promise<void> {
if (!client) throw new Error("Bot client not initialized");
await client.invoke({
_: "forwardMessages",
chat_id: Number(toUserId),
from_chat_id: Number(fromChatId),
message_ids: [Number(messageId)],
send_copy: true,
remove_caption: false,
});
await withFloodWait(
() =>
client.invoke({
_: "forwardMessages",
chat_id: Number(toUserId),
from_chat_id: Number(fromChatId),
message_ids: [Number(messageId)],
send_copy: true,
remove_caption: false,
}),
"copyMessageToUser"
);
}
/**
@@ -87,20 +92,28 @@ export async function sendTextMessage(
if (!client) throw new Error("Bot client not initialized");
// Parse the text first
const parsed = await client.invoke({
_: "parseTextEntities",
text,
parse_mode: { _: parseMode, version: parseMode === "textParseModeMarkdown" ? 2 : 0 },
});
const parsed = await withFloodWait(
() =>
client.invoke({
_: "parseTextEntities",
text,
parse_mode: { _: parseMode, version: parseMode === "textParseModeMarkdown" ? 2 : 0 },
}),
"parseTextEntities"
);
await client.invoke({
_: "sendMessage",
chat_id: Number(chatId),
input_message_content: {
_: "inputMessageText",
text: parsed,
},
});
await withFloodWait(
() =>
client.invoke({
_: "sendMessage",
chat_id: Number(chatId),
input_message_content: {
_: "inputMessageText",
text: parsed,
},
}),
"sendTextMessage"
);
}
/**
@@ -121,23 +134,31 @@ export async function sendPhotoMessage(
try {
await writeFile(tempPath, photoData);
const parsedCaption = await client.invoke({
_: "parseTextEntities",
text: caption,
parse_mode: { _: "textParseModeMarkdown", version: 2 },
});
const parsedCaption = await withFloodWait(
() =>
client.invoke({
_: "parseTextEntities",
text: caption,
parse_mode: { _: "textParseModeMarkdown", version: 2 },
}),
"parsePhotoCaption"
);
await client.invoke({
_: "sendMessage",
chat_id: Number(chatId),
input_message_content: {
_: "inputMessagePhoto",
photo: { _: "inputFileLocal", path: tempPath },
caption: parsedCaption,
width: 0,
height: 0,
},
});
await withFloodWait(
() =>
client.invoke({
_: "sendMessage",
chat_id: Number(chatId),
input_message_content: {
_: "inputMessagePhoto",
photo: { _: "inputFileLocal", path: tempPath },
caption: parsedCaption,
width: 0,
height: 0,
},
}),
"sendPhotoMessage"
);
} finally {
await unlink(tempPath).catch(() => {});
}
@@ -150,10 +171,14 @@ export async function getUser(
userId: number
): Promise<{ firstName: string; lastName?: string; username?: string }> {
if (!client) throw new Error("Bot client not initialized");
const user = (await client.invoke({
_: "getUser",
user_id: userId,
})) as {
const user = (await withFloodWait(
() =>
client.invoke({
_: "getUser",
user_id: userId,
}),
"getUser"
)) as {
first_name?: string;
last_name?: string;
usernames?: { editable_username?: string };

View File

@@ -0,0 +1,60 @@
import { childLogger } from "./logger.js";
const log = childLogger("flood-wait");
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Extract the mandatory wait duration (in seconds) from a Telegram
* FLOOD_WAIT error. Returns null when the error is not rate-limit related.
*/
export function extractFloodWaitSeconds(err: unknown): number | null {
const message = err instanceof Error ? err.message : String(err);
// Pattern 1: FLOOD_WAIT_30
const flood = message.match(/FLOOD_WAIT_(\d+)/i);
if (flood) return parseInt(flood[1], 10);
// Pattern 2: "retry after 30"
const retry = message.match(/retry after (\d+)/i);
if (retry) return parseInt(retry[1], 10);
// Pattern 3: HTTP 429 without explicit seconds
// eslint-disable-next-line @typescript-eslint/no-explicit-any
if (String((err as any)?.code) === "429") return 30;
return null;
}
/**
* Wrap any async Telegram operation with automatic FLOOD_WAIT retry.
* Adds random jitter (1-5s) to prevent thundering-herd retries.
*
* Non-rate-limit errors are re-thrown immediately (fail-fast).
*/
export async function withFloodWait<T>(
fn: () => Promise<T>,
context?: string,
maxRetries = 5
): Promise<T> {
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
return await fn();
} catch (err) {
const wait = extractFloodWaitSeconds(err);
if (wait === null || attempt >= maxRetries) throw err;
const jitter = 1000 + Math.random() * 4000;
log.warn(
{ context, wait, attempt: attempt + 1, maxRetries, jitter: Math.round(jitter) },
"FLOOD_WAIT received — backing off"
);
await sleep(wait * 1000 + jitter);
}
}
throw new Error("Unreachable");
}
export { sleep };

6
src/lib/fonts.ts Normal file
View File

@@ -0,0 +1,6 @@
import { Inter } from "next/font/google";
export const inter = Inter({
subsets: ["latin"],
variable: "--font-inter",
});