mirror of
https://github.com/xCyanGrizzly/DragonsStash.git
synced 2026-05-11 14:21:15 +00:00
feat: add Telegram integration with forum topic support and creator tracking
Adds full Telegram ZIP ingestion pipeline: TDLib worker service scans source channels for archive files, deduplicates by content hash, extracts metadata, uploads to archive channel, and indexes in Postgres. Forum supergroups are scanned per-topic with topic names used as creator. Filename-based creator extraction (e.g. "Mammoth Factory - 2026-01.zip") serves as fallback. Includes admin UI for managing accounts/channels, simplified account setup (API credentials via env vars), auth code/password submission dialog, package browser with creator column, and live ingestion activity tracking. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
100
worker/src/archive/multipart.ts
Normal file
100
worker/src/archive/multipart.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
import { detectArchive, type ArchiveFormat, type MultipartInfo } from "./detect.js";
|
||||
import { config } from "../util/config.js";
|
||||
import { childLogger } from "../util/logger.js";
|
||||
|
||||
const log = childLogger("multipart");
|
||||
|
||||
export interface TelegramMessage {
|
||||
id: bigint;
|
||||
fileName: string;
|
||||
fileId: string;
|
||||
fileSize: bigint;
|
||||
date: Date;
|
||||
}
|
||||
|
||||
export interface ArchiveSet {
|
||||
type: ArchiveFormat;
|
||||
baseName: string;
|
||||
parts: TelegramMessage[];
|
||||
isMultipart: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Group messages into archive sets (single files + multipart groups).
|
||||
* Messages should be pre-filtered to only include archive attachments.
|
||||
*/
|
||||
export function groupArchiveSets(messages: TelegramMessage[]): ArchiveSet[] {
|
||||
// Detect and annotate each message
|
||||
const annotated: { msg: TelegramMessage; info: MultipartInfo }[] = [];
|
||||
for (const msg of messages) {
|
||||
const info = detectArchive(msg.fileName);
|
||||
if (info) {
|
||||
annotated.push({ msg, info });
|
||||
}
|
||||
}
|
||||
|
||||
// Group by baseName + format
|
||||
const groups = new Map<string, { msg: TelegramMessage; info: MultipartInfo }[]>();
|
||||
for (const item of annotated) {
|
||||
const key = `${item.info.format}:${item.info.baseName.toLowerCase()}`;
|
||||
const group = groups.get(key) ?? [];
|
||||
group.push(item);
|
||||
groups.set(key, group);
|
||||
}
|
||||
|
||||
const results: ArchiveSet[] = [];
|
||||
|
||||
for (const [, group] of groups) {
|
||||
const format = group[0].info.format;
|
||||
const baseName = group[0].info.baseName;
|
||||
|
||||
// Separate explicit multipart entries from potential singles
|
||||
const multipartEntries = group.filter((g) => g.info.pattern !== "SINGLE");
|
||||
const singleEntries = group.filter((g) => g.info.pattern === "SINGLE");
|
||||
|
||||
if (multipartEntries.length > 0) {
|
||||
// This is a multipart set
|
||||
// Check if any single entry is the "final part" of a legacy split
|
||||
const allEntries = [...multipartEntries, ...singleEntries];
|
||||
|
||||
// Check time span — skip if parts span too long
|
||||
const dates = allEntries.map((e) => e.msg.date.getTime());
|
||||
const span = Math.max(...dates) - Math.min(...dates);
|
||||
const maxSpanMs = config.multipartTimeoutHours * 60 * 60 * 1000;
|
||||
|
||||
if (span > maxSpanMs) {
|
||||
log.warn(
|
||||
{ baseName, format, span: span / 3600000 },
|
||||
"Multipart set spans too long, skipping"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Sort by part number (singles get a very high number so they come last — they're the final part)
|
||||
allEntries.sort((a, b) => {
|
||||
const aNum = a.info.partNumber === -1 ? 999999 : a.info.partNumber;
|
||||
const bNum = b.info.partNumber === -1 ? 999999 : b.info.partNumber;
|
||||
return aNum - bNum;
|
||||
});
|
||||
|
||||
results.push({
|
||||
type: format,
|
||||
baseName,
|
||||
parts: allEntries.map((e) => e.msg),
|
||||
isMultipart: true,
|
||||
});
|
||||
} else {
|
||||
// All entries are singles — each is its own archive set
|
||||
for (const entry of singleEntries) {
|
||||
results.push({
|
||||
type: format,
|
||||
baseName: entry.info.baseName,
|
||||
parts: [entry.msg],
|
||||
isMultipart: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
Reference in New Issue
Block a user