feat: add Telegram integration with forum topic support and creator tracking

Adds full Telegram ZIP ingestion pipeline: TDLib worker service scans source
channels for archive files, deduplicates by content hash, extracts metadata,
uploads to archive channel, and indexes in Postgres. Forum supergroups are
scanned per-topic with topic names used as creator. Filename-based creator
extraction (e.g. "Mammoth Factory - 2026-01.zip") serves as fallback.

Includes admin UI for managing accounts/channels, simplified account setup
(API credentials via env vars), auth code/password submission dialog,
package browser with creator column, and live ingestion activity tracking.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
xCyanGrizzly
2026-02-24 16:02:06 +01:00
parent beb9cfb312
commit b427193d17
70 changed files with 8627 additions and 2 deletions

View File

@@ -0,0 +1,86 @@
import { childLogger } from "../util/logger.js";
const log = childLogger("preview-match");
export interface TelegramPhoto {
id: bigint;
date: Date;
/** Caption text on the photo message (if any). */
caption: string;
/** The smallest photo size available — used as thumbnail. */
fileId: string;
fileSize: number;
}
export interface ArchiveRef {
baseName: string;
firstMessageId: bigint;
firstMessageDate: Date;
}
/**
* Try to match a photo message to an archive by:
* 1. Caption contains the archive baseName (without extension)
* 2. Photo was posted within ±10 messages (time-window: ±6 hours)
*
* Returns the best match (closest in time), or null.
*/
export function matchPreviewToArchive(
photos: TelegramPhoto[],
archives: ArchiveRef[]
): Map<string, TelegramPhoto> {
const results = new Map<string, TelegramPhoto>();
const TIME_WINDOW_MS = 6 * 60 * 60 * 1000; // 6 hours
for (const archive of archives) {
// Normalize the archive base name for matching
const normalizedBase = normalizeForMatch(archive.baseName);
if (!normalizedBase) continue;
let bestMatch: TelegramPhoto | null = null;
let bestTimeDiff = Infinity;
for (const photo of photos) {
const timeDiff = Math.abs(
photo.date.getTime() - archive.firstMessageDate.getTime()
);
// Must be within time window
if (timeDiff > TIME_WINDOW_MS) continue;
// Check if the photo caption contains the archive base name
const normalizedCaption = normalizeForMatch(photo.caption);
if (!normalizedCaption) continue;
const matches =
normalizedCaption.includes(normalizedBase) ||
normalizedBase.includes(normalizedCaption);
if (matches && timeDiff < bestTimeDiff) {
bestMatch = photo;
bestTimeDiff = timeDiff;
}
}
if (bestMatch) {
log.debug(
{ baseName: archive.baseName, photoId: bestMatch.id.toString() },
"Matched preview photo to archive"
);
results.set(archive.baseName, bestMatch);
}
}
return results;
}
/**
* Strip extension, punctuation, and normalize for fuzzy matching.
*/
function normalizeForMatch(input: string): string {
return input
.toLowerCase()
.replace(/\.[a-z0-9]{1,5}$/i, "") // strip extension
.replace(/[_\-.\s]+/g, " ") // normalize separators
.trim();
}