mirror of
https://github.com/xCyanGrizzly/DragonsStash.git
synced 2026-05-10 22:01:16 +00:00
perf: set watermarks even when no archives found to prevent re-scanning
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
Previously, channels/topics with no new archives never had their watermark updated. This meant every cycle re-scanned all messages from scratch just to discover nothing new — especially costly for the 1079- topic Model Printing Emporium forum. - Add maxScannedMessageId to ChannelScanResult (highest msg ID seen) - Set channel watermark to scan boundary when no archives are found - Set topic watermark to scan boundary when no archives are found - Fall back to scan watermark when archive processing doesn't advance it After one full cycle, subsequent cycles will skip already-scanned messages via the early-exit boundary check, dramatically reducing TDLib API calls on channels with mostly non-archive content. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -79,6 +79,8 @@ export interface ChannelScanResult {
|
|||||||
archives: TelegramMessage[];
|
archives: TelegramMessage[];
|
||||||
photos: TelegramPhoto[];
|
photos: TelegramPhoto[];
|
||||||
totalScanned: number;
|
totalScanned: number;
|
||||||
|
/** Highest message ID seen during scan (for watermark, even when no archives found). */
|
||||||
|
maxScannedMessageId: bigint | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export type ScanProgressCallback = (messagesScanned: number) => void;
|
export type ScanProgressCallback = (messagesScanned: number) => void;
|
||||||
@@ -158,6 +160,7 @@ export async function getChannelMessages(
|
|||||||
const archives: TelegramMessage[] = [];
|
const archives: TelegramMessage[] = [];
|
||||||
const photos: TelegramPhoto[] = [];
|
const photos: TelegramPhoto[] = [];
|
||||||
const boundary = lastProcessedMessageId ? Number(lastProcessedMessageId) : null;
|
const boundary = lastProcessedMessageId ? Number(lastProcessedMessageId) : null;
|
||||||
|
let maxScannedMessageId: bigint | null = null;
|
||||||
|
|
||||||
// Open the chat so TDLib can access it
|
// Open the chat so TDLib can access it
|
||||||
try {
|
try {
|
||||||
@@ -204,6 +207,12 @@ export async function getChannelMessages(
|
|||||||
|
|
||||||
totalScanned += result.messages.length;
|
totalScanned += result.messages.length;
|
||||||
|
|
||||||
|
// Track highest message ID (first message in batch = newest, since results are newest-first)
|
||||||
|
const batchMaxId = BigInt(result.messages[0].id);
|
||||||
|
if (maxScannedMessageId === null || batchMaxId > maxScannedMessageId) {
|
||||||
|
maxScannedMessageId = batchMaxId;
|
||||||
|
}
|
||||||
|
|
||||||
for (const msg of result.messages) {
|
for (const msg of result.messages) {
|
||||||
// Check for archive documents
|
// Check for archive documents
|
||||||
const doc = msg.content?.document;
|
const doc = msg.content?.document;
|
||||||
@@ -271,6 +280,7 @@ export async function getChannelMessages(
|
|||||||
archives: archives.reverse(),
|
archives: archives.reverse(),
|
||||||
photos: photos.reverse(),
|
photos: photos.reverse(),
|
||||||
totalScanned,
|
totalScanned,
|
||||||
|
maxScannedMessageId,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -178,6 +178,7 @@ export async function getTopicMessages(
|
|||||||
const archives: TelegramMessage[] = [];
|
const archives: TelegramMessage[] = [];
|
||||||
const photos: TelegramPhoto[] = [];
|
const photos: TelegramPhoto[] = [];
|
||||||
const boundary = lastProcessedMessageId ? Number(lastProcessedMessageId) : null;
|
const boundary = lastProcessedMessageId ? Number(lastProcessedMessageId) : null;
|
||||||
|
let maxScannedMessageId: bigint | null = null;
|
||||||
|
|
||||||
let currentFromId = 0;
|
let currentFromId = 0;
|
||||||
let totalScanned = 0;
|
let totalScanned = 0;
|
||||||
@@ -239,6 +240,12 @@ export async function getTopicMessages(
|
|||||||
|
|
||||||
totalScanned += result.messages.length;
|
totalScanned += result.messages.length;
|
||||||
|
|
||||||
|
// Track highest message ID (first message = newest, since results are newest-first)
|
||||||
|
const batchMaxId = BigInt(result.messages[0].id);
|
||||||
|
if (maxScannedMessageId === null || batchMaxId > maxScannedMessageId) {
|
||||||
|
maxScannedMessageId = batchMaxId;
|
||||||
|
}
|
||||||
|
|
||||||
for (const msg of result.messages) {
|
for (const msg of result.messages) {
|
||||||
// Check for archive documents
|
// Check for archive documents
|
||||||
const doc = msg.content?.document;
|
const doc = msg.content?.document;
|
||||||
@@ -302,6 +309,7 @@ export async function getTopicMessages(
|
|||||||
archives: archives.reverse(),
|
archives: archives.reverse(),
|
||||||
photos: photos.reverse(),
|
photos: photos.reverse(),
|
||||||
totalScanned,
|
totalScanned,
|
||||||
|
maxScannedMessageId,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -538,6 +538,15 @@ export async function runWorkerForAccount(
|
|||||||
{ channelId: channel.id, topic: topic.name, totalScanned: scanResult.totalScanned },
|
{ channelId: channel.id, topic: topic.name, totalScanned: scanResult.totalScanned },
|
||||||
"No new archives in topic"
|
"No new archives in topic"
|
||||||
);
|
);
|
||||||
|
// Still advance topic watermark so we don't re-scan these messages next cycle
|
||||||
|
if (scanResult.maxScannedMessageId) {
|
||||||
|
await upsertTopicProgress(
|
||||||
|
mapping.id,
|
||||||
|
topic.topicId,
|
||||||
|
topic.name,
|
||||||
|
scanResult.maxScannedMessageId
|
||||||
|
);
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -555,13 +564,14 @@ export async function runWorkerForAccount(
|
|||||||
// Sync client back in case it was recreated during upload stall recovery
|
// Sync client back in case it was recreated during upload stall recovery
|
||||||
client = pipelineCtx.client;
|
client = pipelineCtx.client;
|
||||||
|
|
||||||
// Only advance progress to the highest successfully processed message
|
// Advance progress: use archive watermark if available, fall back to scan watermark
|
||||||
if (maxProcessedId) {
|
const topicWatermark = maxProcessedId ?? scanResult.maxScannedMessageId;
|
||||||
|
if (topicWatermark) {
|
||||||
await upsertTopicProgress(
|
await upsertTopicProgress(
|
||||||
mapping.id,
|
mapping.id,
|
||||||
topic.topicId,
|
topic.topicId,
|
||||||
topic.name,
|
topic.name,
|
||||||
maxProcessedId
|
topicWatermark
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} catch (topicErr) {
|
} catch (topicErr) {
|
||||||
@@ -611,6 +621,11 @@ export async function runWorkerForAccount(
|
|||||||
|
|
||||||
if (scanResult.archives.length === 0) {
|
if (scanResult.archives.length === 0) {
|
||||||
accountLog.info({ channelId: channel.id, title: channel.title, totalScanned: scanResult.totalScanned }, "No new archives in channel");
|
accountLog.info({ channelId: channel.id, title: channel.title, totalScanned: scanResult.totalScanned }, "No new archives in channel");
|
||||||
|
// Still advance watermark to highest scanned message so we don't
|
||||||
|
// re-scan these messages next cycle
|
||||||
|
if (scanResult.maxScannedMessageId) {
|
||||||
|
await updateLastProcessedMessage(mapping.id, scanResult.maxScannedMessageId);
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -628,9 +643,10 @@ export async function runWorkerForAccount(
|
|||||||
// Sync client back in case it was recreated during upload stall recovery
|
// Sync client back in case it was recreated during upload stall recovery
|
||||||
client = pipelineCtx.client;
|
client = pipelineCtx.client;
|
||||||
|
|
||||||
// Only advance progress to the highest successfully processed message
|
// Advance progress: use archive watermark if available, fall back to scan watermark
|
||||||
if (maxProcessedId) {
|
const channelWatermark = maxProcessedId ?? scanResult.maxScannedMessageId;
|
||||||
await updateLastProcessedMessage(mapping.id, maxProcessedId);
|
if (channelWatermark) {
|
||||||
|
await updateLastProcessedMessage(mapping.id, channelWatermark);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (channelErr) {
|
} catch (channelErr) {
|
||||||
|
|||||||
Reference in New Issue
Block a user