mirror of
https://github.com/xCyanGrizzly/DragonsStash.git
synced 2026-05-11 14:21:15 +00:00
feat: add Telegram integration with forum topic support and creator tracking
Adds full Telegram ZIP ingestion pipeline: TDLib worker service scans source channels for archive files, deduplicates by content hash, extracts metadata, uploads to archive channel, and indexes in Postgres. Forum supergroups are scanned per-topic with topic names used as creator. Filename-based creator extraction (e.g. "Mammoth Factory - 2026-01.zip") serves as fallback. Includes admin UI for managing accounts/channels, simplified account setup (API credentials via env vars), auth code/password submission dialog, package browser with creator column, and live ingestion activity tracking. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
86
worker/src/preview/match.ts
Normal file
86
worker/src/preview/match.ts
Normal file
@@ -0,0 +1,86 @@
|
||||
import { childLogger } from "../util/logger.js";
|
||||
|
||||
const log = childLogger("preview-match");
|
||||
|
||||
export interface TelegramPhoto {
|
||||
id: bigint;
|
||||
date: Date;
|
||||
/** Caption text on the photo message (if any). */
|
||||
caption: string;
|
||||
/** The smallest photo size available — used as thumbnail. */
|
||||
fileId: string;
|
||||
fileSize: number;
|
||||
}
|
||||
|
||||
export interface ArchiveRef {
|
||||
baseName: string;
|
||||
firstMessageId: bigint;
|
||||
firstMessageDate: Date;
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to match a photo message to an archive by:
|
||||
* 1. Caption contains the archive baseName (without extension)
|
||||
* 2. Photo was posted within ±10 messages (time-window: ±6 hours)
|
||||
*
|
||||
* Returns the best match (closest in time), or null.
|
||||
*/
|
||||
export function matchPreviewToArchive(
|
||||
photos: TelegramPhoto[],
|
||||
archives: ArchiveRef[]
|
||||
): Map<string, TelegramPhoto> {
|
||||
const results = new Map<string, TelegramPhoto>();
|
||||
const TIME_WINDOW_MS = 6 * 60 * 60 * 1000; // 6 hours
|
||||
|
||||
for (const archive of archives) {
|
||||
// Normalize the archive base name for matching
|
||||
const normalizedBase = normalizeForMatch(archive.baseName);
|
||||
if (!normalizedBase) continue;
|
||||
|
||||
let bestMatch: TelegramPhoto | null = null;
|
||||
let bestTimeDiff = Infinity;
|
||||
|
||||
for (const photo of photos) {
|
||||
const timeDiff = Math.abs(
|
||||
photo.date.getTime() - archive.firstMessageDate.getTime()
|
||||
);
|
||||
|
||||
// Must be within time window
|
||||
if (timeDiff > TIME_WINDOW_MS) continue;
|
||||
|
||||
// Check if the photo caption contains the archive base name
|
||||
const normalizedCaption = normalizeForMatch(photo.caption);
|
||||
if (!normalizedCaption) continue;
|
||||
|
||||
const matches =
|
||||
normalizedCaption.includes(normalizedBase) ||
|
||||
normalizedBase.includes(normalizedCaption);
|
||||
|
||||
if (matches && timeDiff < bestTimeDiff) {
|
||||
bestMatch = photo;
|
||||
bestTimeDiff = timeDiff;
|
||||
}
|
||||
}
|
||||
|
||||
if (bestMatch) {
|
||||
log.debug(
|
||||
{ baseName: archive.baseName, photoId: bestMatch.id.toString() },
|
||||
"Matched preview photo to archive"
|
||||
);
|
||||
results.set(archive.baseName, bestMatch);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip extension, punctuation, and normalize for fuzzy matching.
|
||||
*/
|
||||
function normalizeForMatch(input: string): string {
|
||||
return input
|
||||
.toLowerCase()
|
||||
.replace(/\.[a-z0-9]{1,5}$/i, "") // strip extension
|
||||
.replace(/[_\-.\s]+/g, " ") // normalize separators
|
||||
.trim();
|
||||
}
|
||||
Reference in New Issue
Block a user