mirror of
https://github.com/xCyanGrizzly/DragonsStash.git
synced 2026-05-11 06:11:15 +00:00
feat: add Telegram integration with forum topic support and creator tracking
Adds full Telegram ZIP ingestion pipeline: TDLib worker service scans source channels for archive files, deduplicates by content hash, extracts metadata, uploads to archive channel, and indexes in Postgres. Forum supergroups are scanned per-topic with topic names used as creator. Filename-based creator extraction (e.g. "Mammoth Factory - 2026-01.zip") serves as fallback. Includes admin UI for managing accounts/channels, simplified account setup (API credentials via env vars), auth code/password submission dialog, package browser with creator column, and live ingestion activity tracking. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
96
worker/src/archive/detect.ts
Normal file
96
worker/src/archive/detect.ts
Normal file
@@ -0,0 +1,96 @@
|
||||
export type ArchiveFormat = "ZIP" | "RAR";
|
||||
|
||||
export interface MultipartInfo {
|
||||
baseName: string;
|
||||
partNumber: number;
|
||||
format: ArchiveFormat;
|
||||
pattern: "ZIP_NUMBERED" | "ZIP_LEGACY" | "RAR_PART" | "RAR_LEGACY" | "SINGLE";
|
||||
}
|
||||
|
||||
const patterns: {
|
||||
regex: RegExp;
|
||||
format: ArchiveFormat;
|
||||
pattern: MultipartInfo["pattern"];
|
||||
getBaseName: (match: RegExpMatchArray) => string;
|
||||
getPartNumber: (match: RegExpMatchArray) => number;
|
||||
}[] = [
|
||||
// pack.zip.001, pack.zip.002
|
||||
{
|
||||
regex: /^(.+\.zip)\.(\d{3,})$/i,
|
||||
format: "ZIP",
|
||||
pattern: "ZIP_NUMBERED",
|
||||
getBaseName: (m) => m[1],
|
||||
getPartNumber: (m) => parseInt(m[2], 10),
|
||||
},
|
||||
// pack.z01, pack.z02 (legacy split — final part is pack.zip)
|
||||
{
|
||||
regex: /^(.+)\.z(\d{2,})$/i,
|
||||
format: "ZIP",
|
||||
pattern: "ZIP_LEGACY",
|
||||
getBaseName: (m) => m[1],
|
||||
getPartNumber: (m) => parseInt(m[2], 10),
|
||||
},
|
||||
// pack.part1.rar, pack.part2.rar
|
||||
{
|
||||
regex: /^(.+)\.part(\d+)\.rar$/i,
|
||||
format: "RAR",
|
||||
pattern: "RAR_PART",
|
||||
getBaseName: (m) => m[1],
|
||||
getPartNumber: (m) => parseInt(m[2], 10),
|
||||
},
|
||||
// pack.r00, pack.r01 (legacy split — final part is pack.rar)
|
||||
{
|
||||
regex: /^(.+)\.r(\d{2,})$/i,
|
||||
format: "RAR",
|
||||
pattern: "RAR_LEGACY",
|
||||
getBaseName: (m) => m[1],
|
||||
getPartNumber: (m) => parseInt(m[2], 10),
|
||||
},
|
||||
];
|
||||
|
||||
/**
|
||||
* Detect if a filename is an archive and extract multipart info.
|
||||
*/
|
||||
export function detectArchive(fileName: string): MultipartInfo | null {
|
||||
// Check multipart patterns first
|
||||
for (const p of patterns) {
|
||||
const match = fileName.match(p.regex);
|
||||
if (match) {
|
||||
return {
|
||||
baseName: p.getBaseName(match),
|
||||
partNumber: p.getPartNumber(match),
|
||||
format: p.format,
|
||||
pattern: p.pattern,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Single .zip file — could be a standalone or the final part of a ZIP_LEGACY set
|
||||
if (/\.zip$/i.test(fileName)) {
|
||||
return {
|
||||
baseName: fileName.replace(/\.zip$/i, ""),
|
||||
partNumber: -1, // -1 signals "could be single or final legacy part"
|
||||
format: "ZIP",
|
||||
pattern: "SINGLE",
|
||||
};
|
||||
}
|
||||
|
||||
// Single .rar file — could be standalone or final part of RAR_LEGACY set
|
||||
if (/\.rar$/i.test(fileName)) {
|
||||
return {
|
||||
baseName: fileName.replace(/\.rar$/i, ""),
|
||||
partNumber: -1,
|
||||
format: "RAR",
|
||||
pattern: "SINGLE",
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a filename looks like any archive attachment we should process.
|
||||
*/
|
||||
export function isArchiveAttachment(fileName: string): boolean {
|
||||
return detectArchive(fileName) !== null;
|
||||
}
|
||||
Reference in New Issue
Block a user