mirror of
https://github.com/xCyanGrizzly/DragonsStash.git
synced 2026-05-11 06:11:15 +00:00
feat: add Telegram integration with forum topic support and creator tracking
Adds full Telegram ZIP ingestion pipeline: TDLib worker service scans source channels for archive files, deduplicates by content hash, extracts metadata, uploads to archive channel, and indexes in Postgres. Forum supergroups are scanned per-topic with topic names used as creator. Filename-based creator extraction (e.g. "Mammoth Factory - 2026-01.zip") serves as fallback. Includes admin UI for managing accounts/channels, simplified account setup (API credentials via env vars), auth code/password submission dialog, package browser with creator column, and live ingestion activity tracking. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
21
worker/src/archive/creator.ts
Normal file
21
worker/src/archive/creator.ts
Normal file
@@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Extract a creator name from common archive file naming patterns.
|
||||
*
|
||||
* Priority in the worker: topic name > filename extraction.
|
||||
* This is the fallback when no forum topic name is available.
|
||||
*
|
||||
* Patterns handled (split on ` - `):
|
||||
* "Mammoth Factory - 2026-01.zip" → "Mammoth Factory"
|
||||
* "Artist Name - Pack Title.part01.rar" → "Artist Name"
|
||||
* "some_random_file.zip" → null
|
||||
*/
|
||||
export function extractCreatorFromFileName(fileName: string): string | null {
|
||||
// Strip archive extensions (.zip, .rar, .part01.rar, .z01, etc.)
|
||||
const bare = fileName.replace(/(\.(part\d+\.rar|z\d{2}|zip|rar))+$/i, "");
|
||||
|
||||
const idx = bare.indexOf(" - ");
|
||||
if (idx <= 0) return null;
|
||||
|
||||
const creator = bare.slice(0, idx).trim();
|
||||
return creator.length > 0 ? creator : null;
|
||||
}
|
||||
96
worker/src/archive/detect.ts
Normal file
96
worker/src/archive/detect.ts
Normal file
@@ -0,0 +1,96 @@
|
||||
export type ArchiveFormat = "ZIP" | "RAR";
|
||||
|
||||
export interface MultipartInfo {
|
||||
baseName: string;
|
||||
partNumber: number;
|
||||
format: ArchiveFormat;
|
||||
pattern: "ZIP_NUMBERED" | "ZIP_LEGACY" | "RAR_PART" | "RAR_LEGACY" | "SINGLE";
|
||||
}
|
||||
|
||||
const patterns: {
|
||||
regex: RegExp;
|
||||
format: ArchiveFormat;
|
||||
pattern: MultipartInfo["pattern"];
|
||||
getBaseName: (match: RegExpMatchArray) => string;
|
||||
getPartNumber: (match: RegExpMatchArray) => number;
|
||||
}[] = [
|
||||
// pack.zip.001, pack.zip.002
|
||||
{
|
||||
regex: /^(.+\.zip)\.(\d{3,})$/i,
|
||||
format: "ZIP",
|
||||
pattern: "ZIP_NUMBERED",
|
||||
getBaseName: (m) => m[1],
|
||||
getPartNumber: (m) => parseInt(m[2], 10),
|
||||
},
|
||||
// pack.z01, pack.z02 (legacy split — final part is pack.zip)
|
||||
{
|
||||
regex: /^(.+)\.z(\d{2,})$/i,
|
||||
format: "ZIP",
|
||||
pattern: "ZIP_LEGACY",
|
||||
getBaseName: (m) => m[1],
|
||||
getPartNumber: (m) => parseInt(m[2], 10),
|
||||
},
|
||||
// pack.part1.rar, pack.part2.rar
|
||||
{
|
||||
regex: /^(.+)\.part(\d+)\.rar$/i,
|
||||
format: "RAR",
|
||||
pattern: "RAR_PART",
|
||||
getBaseName: (m) => m[1],
|
||||
getPartNumber: (m) => parseInt(m[2], 10),
|
||||
},
|
||||
// pack.r00, pack.r01 (legacy split — final part is pack.rar)
|
||||
{
|
||||
regex: /^(.+)\.r(\d{2,})$/i,
|
||||
format: "RAR",
|
||||
pattern: "RAR_LEGACY",
|
||||
getBaseName: (m) => m[1],
|
||||
getPartNumber: (m) => parseInt(m[2], 10),
|
||||
},
|
||||
];
|
||||
|
||||
/**
|
||||
* Detect if a filename is an archive and extract multipart info.
|
||||
*/
|
||||
export function detectArchive(fileName: string): MultipartInfo | null {
|
||||
// Check multipart patterns first
|
||||
for (const p of patterns) {
|
||||
const match = fileName.match(p.regex);
|
||||
if (match) {
|
||||
return {
|
||||
baseName: p.getBaseName(match),
|
||||
partNumber: p.getPartNumber(match),
|
||||
format: p.format,
|
||||
pattern: p.pattern,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Single .zip file — could be a standalone or the final part of a ZIP_LEGACY set
|
||||
if (/\.zip$/i.test(fileName)) {
|
||||
return {
|
||||
baseName: fileName.replace(/\.zip$/i, ""),
|
||||
partNumber: -1, // -1 signals "could be single or final legacy part"
|
||||
format: "ZIP",
|
||||
pattern: "SINGLE",
|
||||
};
|
||||
}
|
||||
|
||||
// Single .rar file — could be standalone or final part of RAR_LEGACY set
|
||||
if (/\.rar$/i.test(fileName)) {
|
||||
return {
|
||||
baseName: fileName.replace(/\.rar$/i, ""),
|
||||
partNumber: -1,
|
||||
format: "RAR",
|
||||
pattern: "SINGLE",
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a filename looks like any archive attachment we should process.
|
||||
*/
|
||||
export function isArchiveAttachment(fileName: string): boolean {
|
||||
return detectArchive(fileName) !== null;
|
||||
}
|
||||
25
worker/src/archive/hash.ts
Normal file
25
worker/src/archive/hash.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
import { createReadStream } from "fs";
|
||||
import { createHash } from "crypto";
|
||||
import { pipeline } from "stream/promises";
|
||||
import { PassThrough } from "stream";
|
||||
|
||||
/**
|
||||
* Compute SHA-256 hash of one or more files by streaming them in order.
|
||||
* Memory usage: O(1) — reads in 64KB chunks regardless of total size.
|
||||
* For multipart archives, pass all parts sorted by part number.
|
||||
*/
|
||||
export async function hashParts(filePaths: string[]): Promise<string> {
|
||||
const hash = createHash("sha256");
|
||||
for (const filePath of filePaths) {
|
||||
await pipeline(
|
||||
createReadStream(filePath),
|
||||
new PassThrough({
|
||||
transform(chunk, _encoding, callback) {
|
||||
hash.update(chunk);
|
||||
callback();
|
||||
},
|
||||
})
|
||||
);
|
||||
}
|
||||
return hash.digest("hex");
|
||||
}
|
||||
100
worker/src/archive/multipart.ts
Normal file
100
worker/src/archive/multipart.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
import { detectArchive, type ArchiveFormat, type MultipartInfo } from "./detect.js";
|
||||
import { config } from "../util/config.js";
|
||||
import { childLogger } from "../util/logger.js";
|
||||
|
||||
const log = childLogger("multipart");
|
||||
|
||||
export interface TelegramMessage {
|
||||
id: bigint;
|
||||
fileName: string;
|
||||
fileId: string;
|
||||
fileSize: bigint;
|
||||
date: Date;
|
||||
}
|
||||
|
||||
export interface ArchiveSet {
|
||||
type: ArchiveFormat;
|
||||
baseName: string;
|
||||
parts: TelegramMessage[];
|
||||
isMultipart: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Group messages into archive sets (single files + multipart groups).
|
||||
* Messages should be pre-filtered to only include archive attachments.
|
||||
*/
|
||||
export function groupArchiveSets(messages: TelegramMessage[]): ArchiveSet[] {
|
||||
// Detect and annotate each message
|
||||
const annotated: { msg: TelegramMessage; info: MultipartInfo }[] = [];
|
||||
for (const msg of messages) {
|
||||
const info = detectArchive(msg.fileName);
|
||||
if (info) {
|
||||
annotated.push({ msg, info });
|
||||
}
|
||||
}
|
||||
|
||||
// Group by baseName + format
|
||||
const groups = new Map<string, { msg: TelegramMessage; info: MultipartInfo }[]>();
|
||||
for (const item of annotated) {
|
||||
const key = `${item.info.format}:${item.info.baseName.toLowerCase()}`;
|
||||
const group = groups.get(key) ?? [];
|
||||
group.push(item);
|
||||
groups.set(key, group);
|
||||
}
|
||||
|
||||
const results: ArchiveSet[] = [];
|
||||
|
||||
for (const [, group] of groups) {
|
||||
const format = group[0].info.format;
|
||||
const baseName = group[0].info.baseName;
|
||||
|
||||
// Separate explicit multipart entries from potential singles
|
||||
const multipartEntries = group.filter((g) => g.info.pattern !== "SINGLE");
|
||||
const singleEntries = group.filter((g) => g.info.pattern === "SINGLE");
|
||||
|
||||
if (multipartEntries.length > 0) {
|
||||
// This is a multipart set
|
||||
// Check if any single entry is the "final part" of a legacy split
|
||||
const allEntries = [...multipartEntries, ...singleEntries];
|
||||
|
||||
// Check time span — skip if parts span too long
|
||||
const dates = allEntries.map((e) => e.msg.date.getTime());
|
||||
const span = Math.max(...dates) - Math.min(...dates);
|
||||
const maxSpanMs = config.multipartTimeoutHours * 60 * 60 * 1000;
|
||||
|
||||
if (span > maxSpanMs) {
|
||||
log.warn(
|
||||
{ baseName, format, span: span / 3600000 },
|
||||
"Multipart set spans too long, skipping"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Sort by part number (singles get a very high number so they come last — they're the final part)
|
||||
allEntries.sort((a, b) => {
|
||||
const aNum = a.info.partNumber === -1 ? 999999 : a.info.partNumber;
|
||||
const bNum = b.info.partNumber === -1 ? 999999 : b.info.partNumber;
|
||||
return aNum - bNum;
|
||||
});
|
||||
|
||||
results.push({
|
||||
type: format,
|
||||
baseName,
|
||||
parts: allEntries.map((e) => e.msg),
|
||||
isMultipart: true,
|
||||
});
|
||||
} else {
|
||||
// All entries are singles — each is its own archive set
|
||||
for (const entry of singleEntries) {
|
||||
results.push({
|
||||
type: format,
|
||||
baseName: entry.info.baseName,
|
||||
parts: [entry.msg],
|
||||
isMultipart: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
90
worker/src/archive/rar-reader.ts
Normal file
90
worker/src/archive/rar-reader.ts
Normal file
@@ -0,0 +1,90 @@
|
||||
import { execFile } from "child_process";
|
||||
import { promisify } from "util";
|
||||
import path from "path";
|
||||
import { childLogger } from "../util/logger.js";
|
||||
import type { FileEntry } from "./zip-reader.js";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
const log = childLogger("rar-reader");
|
||||
|
||||
/**
|
||||
* Parse output of `unrar l -v <file>` to extract file metadata.
|
||||
* unrar automatically discovers sibling parts when they're co-located.
|
||||
*/
|
||||
export async function readRarContents(
|
||||
firstPartPath: string
|
||||
): Promise<FileEntry[]> {
|
||||
try {
|
||||
const { stdout } = await execFileAsync("unrar", ["l", "-v", firstPartPath], {
|
||||
timeout: 30000,
|
||||
maxBuffer: 10 * 1024 * 1024, // 10MB for very large archives
|
||||
});
|
||||
|
||||
return parseUnrarOutput(stdout);
|
||||
} catch (err) {
|
||||
log.warn({ err, file: firstPartPath }, "Failed to read RAR contents");
|
||||
return []; // Fallback: return empty on error
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the tabular output of `unrar l -v`.
|
||||
*
|
||||
* Example output format:
|
||||
* Archive: test.rar
|
||||
* Details: RAR 5
|
||||
*
|
||||
* Attributes Size Packed Ratio Date Time CRC-32 Name
|
||||
* ----------- --------- --------- ----- -------- ----- -------- ----
|
||||
* ...A.... 12345 10234 83% 2024-01-15 10:30 DEADBEEF folder/file.stl
|
||||
* ----------- --------- --------- ----- -------- ----- -------- ----
|
||||
*/
|
||||
function parseUnrarOutput(output: string): FileEntry[] {
|
||||
const entries: FileEntry[] = [];
|
||||
const lines = output.split("\n");
|
||||
|
||||
let inFileList = false;
|
||||
let separatorCount = 0;
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
|
||||
// Detect separator lines (------- pattern)
|
||||
if (/^-{5,}/.test(trimmed)) {
|
||||
separatorCount++;
|
||||
if (separatorCount === 1) {
|
||||
inFileList = true;
|
||||
} else if (separatorCount >= 2) {
|
||||
inFileList = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!inFileList) continue;
|
||||
|
||||
// Parse file entry line
|
||||
// Format: Attributes Size Packed Ratio Date Time CRC Name
|
||||
const match = trimmed.match(
|
||||
/^\S+\s+(\d+)\s+(\d+)\s+\d+%\s+\S+\s+\S+\s+([0-9A-Fa-f]+)\s+(.+)$/
|
||||
);
|
||||
|
||||
if (match) {
|
||||
const [, uncompressedStr, compressedStr, crc32, filePath] = match;
|
||||
|
||||
// Skip directory entries (typically end with / or have size 0 with dir attributes)
|
||||
if (filePath.endsWith("/") || filePath.endsWith("\\")) continue;
|
||||
|
||||
const ext = path.extname(filePath).toLowerCase();
|
||||
entries.push({
|
||||
path: filePath,
|
||||
fileName: path.basename(filePath),
|
||||
extension: ext ? ext.slice(1) : null,
|
||||
compressedSize: BigInt(compressedStr),
|
||||
uncompressedSize: BigInt(uncompressedStr),
|
||||
crc32: crc32.toLowerCase(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return entries;
|
||||
}
|
||||
48
worker/src/archive/split.ts
Normal file
48
worker/src/archive/split.ts
Normal file
@@ -0,0 +1,48 @@
|
||||
import { createReadStream, createWriteStream } from "fs";
|
||||
import { stat } from "fs/promises";
|
||||
import path from "path";
|
||||
import { pipeline } from "stream/promises";
|
||||
import { childLogger } from "../util/logger.js";
|
||||
|
||||
const log = childLogger("split");
|
||||
|
||||
/** 2GB in bytes — Telegram's file size limit */
|
||||
const MAX_PART_SIZE = 2n * 1024n * 1024n * 1024n;
|
||||
|
||||
/**
|
||||
* Split a file into ≤2GB parts using byte-level splitting.
|
||||
* Returns paths to the split parts. If the file is already ≤2GB, returns the original path.
|
||||
*/
|
||||
export async function byteLevelSplit(filePath: string): Promise<string[]> {
|
||||
const stats = await stat(filePath);
|
||||
const fileSize = BigInt(stats.size);
|
||||
|
||||
if (fileSize <= MAX_PART_SIZE) {
|
||||
return [filePath];
|
||||
}
|
||||
|
||||
const dir = path.dirname(filePath);
|
||||
const baseName = path.basename(filePath);
|
||||
const partSize = Number(MAX_PART_SIZE);
|
||||
const totalParts = Math.ceil(Number(fileSize) / partSize);
|
||||
const parts: string[] = [];
|
||||
|
||||
log.info({ filePath, fileSize: Number(fileSize), totalParts }, "Splitting file");
|
||||
|
||||
for (let i = 0; i < totalParts; i++) {
|
||||
const partNum = String(i + 1).padStart(3, "0");
|
||||
const partPath = path.join(dir, `${baseName}.${partNum}`);
|
||||
const start = i * partSize;
|
||||
const end = Math.min(start + partSize - 1, Number(fileSize) - 1);
|
||||
|
||||
await pipeline(
|
||||
createReadStream(filePath, { start, end }),
|
||||
createWriteStream(partPath)
|
||||
);
|
||||
|
||||
parts.push(partPath);
|
||||
}
|
||||
|
||||
log.info({ filePath, parts: parts.length }, "File split complete");
|
||||
return parts;
|
||||
}
|
||||
61
worker/src/archive/zip-reader.ts
Normal file
61
worker/src/archive/zip-reader.ts
Normal file
@@ -0,0 +1,61 @@
|
||||
import yauzl from "yauzl";
|
||||
import path from "path";
|
||||
import { childLogger } from "../util/logger.js";
|
||||
|
||||
const log = childLogger("zip-reader");
|
||||
|
||||
export interface FileEntry {
|
||||
path: string;
|
||||
fileName: string;
|
||||
extension: string | null;
|
||||
compressedSize: bigint;
|
||||
uncompressedSize: bigint;
|
||||
crc32: string | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the central directory of a ZIP file without extracting any contents.
|
||||
* For multipart ZIPs, pass the paths sorted by part order.
|
||||
* We attempt to read from the last part first (central directory is at the end).
|
||||
*/
|
||||
export async function readZipCentralDirectory(
|
||||
filePaths: string[]
|
||||
): Promise<FileEntry[]> {
|
||||
// The central directory lives at the end of the last file
|
||||
const targetFile = filePaths[filePaths.length - 1];
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
yauzl.open(targetFile, { lazyEntries: true, autoClose: true }, (err, zipFile) => {
|
||||
if (err) {
|
||||
log.warn({ err, file: targetFile }, "Failed to open ZIP for reading");
|
||||
resolve([]); // Fallback: return empty on error
|
||||
return;
|
||||
}
|
||||
|
||||
const entries: FileEntry[] = [];
|
||||
|
||||
zipFile.readEntry();
|
||||
zipFile.on("entry", (entry: yauzl.Entry) => {
|
||||
// Skip directories
|
||||
if (!entry.fileName.endsWith("/")) {
|
||||
const ext = path.extname(entry.fileName).toLowerCase();
|
||||
entries.push({
|
||||
path: entry.fileName,
|
||||
fileName: path.basename(entry.fileName),
|
||||
extension: ext ? ext.slice(1) : null, // Remove leading dot
|
||||
compressedSize: BigInt(entry.compressedSize),
|
||||
uncompressedSize: BigInt(entry.uncompressedSize),
|
||||
crc32: entry.crc32 !== 0 ? entry.crc32.toString(16).padStart(8, "0") : null,
|
||||
});
|
||||
}
|
||||
zipFile.readEntry();
|
||||
});
|
||||
|
||||
zipFile.on("end", () => resolve(entries));
|
||||
zipFile.on("error", (error) => {
|
||||
log.warn({ error, file: targetFile }, "Error reading ZIP entries");
|
||||
resolve(entries); // Return whatever we got
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
14
worker/src/db/client.ts
Normal file
14
worker/src/db/client.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
import { PrismaClient } from "@prisma/client";
|
||||
import { PrismaPg } from "@prisma/adapter-pg";
|
||||
import pg from "pg";
|
||||
import { config } from "../util/config.js";
|
||||
|
||||
const pool = new pg.Pool({
|
||||
connectionString: config.databaseUrl,
|
||||
max: 5,
|
||||
});
|
||||
|
||||
const adapter = new PrismaPg(pool);
|
||||
export const db = new PrismaClient({ adapter });
|
||||
|
||||
export { pool };
|
||||
56
worker/src/db/locks.ts
Normal file
56
worker/src/db/locks.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import { pool } from "./client.js";
|
||||
import { childLogger } from "../util/logger.js";
|
||||
|
||||
const log = childLogger("locks");
|
||||
|
||||
/**
|
||||
* Derive a stable 32-bit integer lock ID from an account ID string.
|
||||
* PostgreSQL advisory locks use bigint, but we use 32-bit for safety.
|
||||
*/
|
||||
function hashToLockId(accountId: string): number {
|
||||
let hash = 0;
|
||||
for (let i = 0; i < accountId.length; i++) {
|
||||
const char = accountId.charCodeAt(i);
|
||||
hash = (hash << 5) - hash + char;
|
||||
hash |= 0; // Convert to 32-bit integer
|
||||
}
|
||||
return Math.abs(hash);
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to acquire a PostgreSQL advisory lock for an account.
|
||||
* Returns true if acquired, false if already held by another session.
|
||||
*/
|
||||
export async function tryAcquireLock(accountId: string): Promise<boolean> {
|
||||
const lockId = hashToLockId(accountId);
|
||||
const client = await pool.connect();
|
||||
try {
|
||||
const result = await client.query<{ pg_try_advisory_lock: boolean }>(
|
||||
"SELECT pg_try_advisory_lock($1)",
|
||||
[lockId]
|
||||
);
|
||||
const acquired = result.rows[0]?.pg_try_advisory_lock ?? false;
|
||||
if (acquired) {
|
||||
log.debug({ accountId, lockId }, "Advisory lock acquired");
|
||||
} else {
|
||||
log.debug({ accountId, lockId }, "Advisory lock already held");
|
||||
}
|
||||
return acquired;
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Release the advisory lock for an account.
|
||||
*/
|
||||
export async function releaseLock(accountId: string): Promise<void> {
|
||||
const lockId = hashToLockId(accountId);
|
||||
const client = await pool.connect();
|
||||
try {
|
||||
await client.query("SELECT pg_advisory_unlock($1)", [lockId]);
|
||||
log.debug({ accountId, lockId }, "Advisory lock released");
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
270
worker/src/db/queries.ts
Normal file
270
worker/src/db/queries.ts
Normal file
@@ -0,0 +1,270 @@
|
||||
import { db } from "./client.js";
|
||||
import type { ArchiveType } from "@prisma/client";
|
||||
|
||||
export async function getActiveAccounts() {
|
||||
return db.telegramAccount.findMany({
|
||||
where: { isActive: true, authState: "AUTHENTICATED" },
|
||||
});
|
||||
}
|
||||
|
||||
export async function getSourceChannelMappings(accountId: string) {
|
||||
return db.accountChannelMap.findMany({
|
||||
where: {
|
||||
accountId,
|
||||
role: "READER",
|
||||
channel: { type: "SOURCE", isActive: true },
|
||||
},
|
||||
include: { channel: true },
|
||||
});
|
||||
}
|
||||
|
||||
export async function getDestinationChannel(accountId: string) {
|
||||
const mapping = await db.accountChannelMap.findFirst({
|
||||
where: {
|
||||
accountId,
|
||||
role: "WRITER",
|
||||
channel: { type: "DESTINATION", isActive: true },
|
||||
},
|
||||
include: { channel: true },
|
||||
});
|
||||
return mapping?.channel ?? null;
|
||||
}
|
||||
|
||||
export async function packageExistsByHash(contentHash: string) {
|
||||
const pkg = await db.package.findUnique({
|
||||
where: { contentHash },
|
||||
select: { id: true },
|
||||
});
|
||||
return pkg !== null;
|
||||
}
|
||||
|
||||
export interface CreatePackageInput {
|
||||
contentHash: string;
|
||||
fileName: string;
|
||||
fileSize: bigint;
|
||||
archiveType: ArchiveType;
|
||||
sourceChannelId: string;
|
||||
sourceMessageId: bigint;
|
||||
sourceTopicId?: bigint | null;
|
||||
destChannelId?: string;
|
||||
destMessageId?: bigint;
|
||||
isMultipart: boolean;
|
||||
partCount: number;
|
||||
ingestionRunId: string;
|
||||
creator?: string | null;
|
||||
previewData?: Buffer | null;
|
||||
previewMsgId?: bigint | null;
|
||||
files: {
|
||||
path: string;
|
||||
fileName: string;
|
||||
extension: string | null;
|
||||
compressedSize: bigint;
|
||||
uncompressedSize: bigint;
|
||||
crc32: string | null;
|
||||
}[];
|
||||
}
|
||||
|
||||
export async function createPackageWithFiles(input: CreatePackageInput) {
|
||||
return db.package.create({
|
||||
data: {
|
||||
contentHash: input.contentHash,
|
||||
fileName: input.fileName,
|
||||
fileSize: input.fileSize,
|
||||
archiveType: input.archiveType,
|
||||
sourceChannelId: input.sourceChannelId,
|
||||
sourceMessageId: input.sourceMessageId,
|
||||
sourceTopicId: input.sourceTopicId ?? undefined,
|
||||
destChannelId: input.destChannelId,
|
||||
destMessageId: input.destMessageId,
|
||||
isMultipart: input.isMultipart,
|
||||
partCount: input.partCount,
|
||||
fileCount: input.files.length,
|
||||
ingestionRunId: input.ingestionRunId,
|
||||
creator: input.creator ?? undefined,
|
||||
previewData: input.previewData ? new Uint8Array(input.previewData) : undefined,
|
||||
previewMsgId: input.previewMsgId ?? undefined,
|
||||
files: {
|
||||
create: input.files,
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export async function createIngestionRun(accountId: string) {
|
||||
return db.ingestionRun.create({
|
||||
data: {
|
||||
accountId,
|
||||
status: "RUNNING",
|
||||
currentActivity: "Starting ingestion run",
|
||||
currentStep: "initializing",
|
||||
lastActivityAt: new Date(),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export interface ActivityUpdate {
|
||||
currentActivity: string;
|
||||
currentStep: string;
|
||||
currentChannel?: string | null;
|
||||
currentFile?: string | null;
|
||||
currentFileNum?: number | null;
|
||||
totalFiles?: number | null;
|
||||
downloadedBytes?: bigint | null;
|
||||
totalBytes?: bigint | null;
|
||||
downloadPercent?: number | null;
|
||||
messagesScanned?: number;
|
||||
zipsFound?: number;
|
||||
zipsDuplicate?: number;
|
||||
zipsIngested?: number;
|
||||
}
|
||||
|
||||
export async function updateRunActivity(
|
||||
runId: string,
|
||||
activity: ActivityUpdate
|
||||
) {
|
||||
return db.ingestionRun.update({
|
||||
where: { id: runId },
|
||||
data: {
|
||||
currentActivity: activity.currentActivity,
|
||||
currentStep: activity.currentStep,
|
||||
currentChannel: activity.currentChannel ?? undefined,
|
||||
currentFile: activity.currentFile ?? undefined,
|
||||
currentFileNum: activity.currentFileNum ?? undefined,
|
||||
totalFiles: activity.totalFiles ?? undefined,
|
||||
downloadedBytes: activity.downloadedBytes ?? undefined,
|
||||
totalBytes: activity.totalBytes ?? undefined,
|
||||
downloadPercent: activity.downloadPercent ?? undefined,
|
||||
lastActivityAt: new Date(),
|
||||
...(activity.messagesScanned !== undefined && { messagesScanned: activity.messagesScanned }),
|
||||
...(activity.zipsFound !== undefined && { zipsFound: activity.zipsFound }),
|
||||
...(activity.zipsDuplicate !== undefined && { zipsDuplicate: activity.zipsDuplicate }),
|
||||
...(activity.zipsIngested !== undefined && { zipsIngested: activity.zipsIngested }),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const CLEAR_ACTIVITY = {
|
||||
currentActivity: null,
|
||||
currentStep: null,
|
||||
currentChannel: null,
|
||||
currentFile: null,
|
||||
currentFileNum: null,
|
||||
totalFiles: null,
|
||||
downloadedBytes: null,
|
||||
totalBytes: null,
|
||||
downloadPercent: null,
|
||||
lastActivityAt: new Date(),
|
||||
};
|
||||
|
||||
export async function completeIngestionRun(
|
||||
runId: string,
|
||||
counters: {
|
||||
messagesScanned: number;
|
||||
zipsFound: number;
|
||||
zipsDuplicate: number;
|
||||
zipsIngested: number;
|
||||
}
|
||||
) {
|
||||
return db.ingestionRun.update({
|
||||
where: { id: runId },
|
||||
data: {
|
||||
status: "COMPLETED",
|
||||
finishedAt: new Date(),
|
||||
...counters,
|
||||
...CLEAR_ACTIVITY,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export async function failIngestionRun(runId: string, errorMessage: string) {
|
||||
return db.ingestionRun.update({
|
||||
where: { id: runId },
|
||||
data: {
|
||||
status: "FAILED",
|
||||
finishedAt: new Date(),
|
||||
errorMessage,
|
||||
...CLEAR_ACTIVITY,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export async function updateLastProcessedMessage(
|
||||
mappingId: string,
|
||||
messageId: bigint
|
||||
) {
|
||||
return db.accountChannelMap.update({
|
||||
where: { id: mappingId },
|
||||
data: { lastProcessedMessageId: messageId },
|
||||
});
|
||||
}
|
||||
|
||||
export async function markStaleRunsAsFailed() {
|
||||
return db.ingestionRun.updateMany({
|
||||
where: { status: "RUNNING" },
|
||||
data: {
|
||||
status: "FAILED",
|
||||
finishedAt: new Date(),
|
||||
errorMessage: "Worker restarted — run was still marked as RUNNING",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export async function updateAccountAuthState(
|
||||
accountId: string,
|
||||
authState: "PENDING" | "AWAITING_CODE" | "AWAITING_PASSWORD" | "AUTHENTICATED" | "EXPIRED",
|
||||
authCode?: string | null
|
||||
) {
|
||||
return db.telegramAccount.update({
|
||||
where: { id: accountId },
|
||||
data: { authState, authCode, lastSeenAt: authState === "AUTHENTICATED" ? new Date() : undefined },
|
||||
});
|
||||
}
|
||||
|
||||
export async function getAccountAuthCode(accountId: string) {
|
||||
const account = await db.telegramAccount.findUnique({
|
||||
where: { id: accountId },
|
||||
select: { authCode: true, authState: true },
|
||||
});
|
||||
return account;
|
||||
}
|
||||
|
||||
// ── Forum / Topic progress ──
|
||||
|
||||
export async function setChannelForum(channelId: string, isForum: boolean) {
|
||||
return db.telegramChannel.update({
|
||||
where: { id: channelId },
|
||||
data: { isForum },
|
||||
});
|
||||
}
|
||||
|
||||
export async function getTopicProgress(mappingId: string) {
|
||||
return db.topicProgress.findMany({
|
||||
where: { accountChannelMapId: mappingId },
|
||||
});
|
||||
}
|
||||
|
||||
export async function upsertTopicProgress(
|
||||
mappingId: string,
|
||||
topicId: bigint,
|
||||
topicName: string | null,
|
||||
lastProcessedMessageId: bigint
|
||||
) {
|
||||
return db.topicProgress.upsert({
|
||||
where: {
|
||||
accountChannelMapId_topicId: {
|
||||
accountChannelMapId: mappingId,
|
||||
topicId,
|
||||
},
|
||||
},
|
||||
create: {
|
||||
accountChannelMapId: mappingId,
|
||||
topicId,
|
||||
topicName,
|
||||
lastProcessedMessageId,
|
||||
},
|
||||
update: {
|
||||
topicName,
|
||||
lastProcessedMessageId,
|
||||
},
|
||||
});
|
||||
}
|
||||
50
worker/src/index.ts
Normal file
50
worker/src/index.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
import { mkdir } from "fs/promises";
|
||||
import { config } from "./util/config.js";
|
||||
import { logger } from "./util/logger.js";
|
||||
import { markStaleRunsAsFailed } from "./db/queries.js";
|
||||
import { cleanupTempDir } from "./worker.js";
|
||||
import { startScheduler, stopScheduler } from "./scheduler.js";
|
||||
import { db, pool } from "./db/client.js";
|
||||
|
||||
const log = logger.child({ module: "main" });
|
||||
|
||||
async function main(): Promise<void> {
|
||||
log.info("DragonsStash Telegram Worker starting");
|
||||
log.info({ config: { ...config, databaseUrl: "***" } }, "Configuration loaded");
|
||||
|
||||
// Ensure temp directory exists
|
||||
await mkdir(config.tempDir, { recursive: true });
|
||||
await mkdir(config.tdlibStateDir, { recursive: true });
|
||||
|
||||
// Clean up stale state
|
||||
await cleanupTempDir();
|
||||
await markStaleRunsAsFailed();
|
||||
|
||||
// Start the scheduler
|
||||
await startScheduler();
|
||||
}
|
||||
|
||||
// Graceful shutdown
|
||||
function shutdown(signal: string): void {
|
||||
log.info({ signal }, "Shutdown signal received");
|
||||
stopScheduler();
|
||||
|
||||
// Close DB connections
|
||||
Promise.all([db.$disconnect(), pool.end()])
|
||||
.then(() => {
|
||||
log.info("Shutdown complete");
|
||||
process.exit(0);
|
||||
})
|
||||
.catch((err) => {
|
||||
log.error({ err }, "Error during shutdown");
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
process.on("SIGTERM", () => shutdown("SIGTERM"));
|
||||
process.on("SIGINT", () => shutdown("SIGINT"));
|
||||
|
||||
main().catch((err) => {
|
||||
log.fatal({ err }, "Worker failed to start");
|
||||
process.exit(1);
|
||||
});
|
||||
86
worker/src/preview/match.ts
Normal file
86
worker/src/preview/match.ts
Normal file
@@ -0,0 +1,86 @@
|
||||
import { childLogger } from "../util/logger.js";
|
||||
|
||||
const log = childLogger("preview-match");
|
||||
|
||||
export interface TelegramPhoto {
|
||||
id: bigint;
|
||||
date: Date;
|
||||
/** Caption text on the photo message (if any). */
|
||||
caption: string;
|
||||
/** The smallest photo size available — used as thumbnail. */
|
||||
fileId: string;
|
||||
fileSize: number;
|
||||
}
|
||||
|
||||
export interface ArchiveRef {
|
||||
baseName: string;
|
||||
firstMessageId: bigint;
|
||||
firstMessageDate: Date;
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to match a photo message to an archive by:
|
||||
* 1. Caption contains the archive baseName (without extension)
|
||||
* 2. Photo was posted within ±10 messages (time-window: ±6 hours)
|
||||
*
|
||||
* Returns the best match (closest in time), or null.
|
||||
*/
|
||||
export function matchPreviewToArchive(
|
||||
photos: TelegramPhoto[],
|
||||
archives: ArchiveRef[]
|
||||
): Map<string, TelegramPhoto> {
|
||||
const results = new Map<string, TelegramPhoto>();
|
||||
const TIME_WINDOW_MS = 6 * 60 * 60 * 1000; // 6 hours
|
||||
|
||||
for (const archive of archives) {
|
||||
// Normalize the archive base name for matching
|
||||
const normalizedBase = normalizeForMatch(archive.baseName);
|
||||
if (!normalizedBase) continue;
|
||||
|
||||
let bestMatch: TelegramPhoto | null = null;
|
||||
let bestTimeDiff = Infinity;
|
||||
|
||||
for (const photo of photos) {
|
||||
const timeDiff = Math.abs(
|
||||
photo.date.getTime() - archive.firstMessageDate.getTime()
|
||||
);
|
||||
|
||||
// Must be within time window
|
||||
if (timeDiff > TIME_WINDOW_MS) continue;
|
||||
|
||||
// Check if the photo caption contains the archive base name
|
||||
const normalizedCaption = normalizeForMatch(photo.caption);
|
||||
if (!normalizedCaption) continue;
|
||||
|
||||
const matches =
|
||||
normalizedCaption.includes(normalizedBase) ||
|
||||
normalizedBase.includes(normalizedCaption);
|
||||
|
||||
if (matches && timeDiff < bestTimeDiff) {
|
||||
bestMatch = photo;
|
||||
bestTimeDiff = timeDiff;
|
||||
}
|
||||
}
|
||||
|
||||
if (bestMatch) {
|
||||
log.debug(
|
||||
{ baseName: archive.baseName, photoId: bestMatch.id.toString() },
|
||||
"Matched preview photo to archive"
|
||||
);
|
||||
results.set(archive.baseName, bestMatch);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip extension, punctuation, and normalize for fuzzy matching.
|
||||
*/
|
||||
function normalizeForMatch(input: string): string {
|
||||
return input
|
||||
.toLowerCase()
|
||||
.replace(/\.[a-z0-9]{1,5}$/i, "") // strip extension
|
||||
.replace(/[_\-.\s]+/g, " ") // normalize separators
|
||||
.trim();
|
||||
}
|
||||
92
worker/src/scheduler.ts
Normal file
92
worker/src/scheduler.ts
Normal file
@@ -0,0 +1,92 @@
|
||||
import { config } from "./util/config.js";
|
||||
import { childLogger } from "./util/logger.js";
|
||||
import { getActiveAccounts } from "./db/queries.js";
|
||||
import { runWorkerForAccount } from "./worker.js";
|
||||
|
||||
const log = childLogger("scheduler");
|
||||
|
||||
let running = false;
|
||||
let timer: ReturnType<typeof setTimeout> | null = null;
|
||||
|
||||
/**
|
||||
* Run one ingestion cycle: process all active, authenticated accounts sequentially.
|
||||
*/
|
||||
async function runCycle(): Promise<void> {
|
||||
if (running) {
|
||||
log.warn("Previous cycle still running, skipping");
|
||||
return;
|
||||
}
|
||||
|
||||
running = true;
|
||||
log.info("Starting ingestion cycle");
|
||||
|
||||
try {
|
||||
const accounts = await getActiveAccounts();
|
||||
|
||||
if (accounts.length === 0) {
|
||||
log.info("No active authenticated accounts, nothing to do");
|
||||
return;
|
||||
}
|
||||
|
||||
log.info({ accountCount: accounts.length }, "Processing accounts");
|
||||
|
||||
for (const account of accounts) {
|
||||
await runWorkerForAccount(account);
|
||||
}
|
||||
|
||||
log.info("Ingestion cycle complete");
|
||||
} catch (err) {
|
||||
log.error({ err }, "Ingestion cycle failed");
|
||||
} finally {
|
||||
running = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Schedule the next cycle with jitter.
|
||||
*/
|
||||
function scheduleNext(): void {
|
||||
const intervalMs = config.workerIntervalMinutes * 60 * 1000;
|
||||
const jitterMs = Math.random() * config.jitterMinutes * 60 * 1000;
|
||||
const delay = intervalMs + jitterMs;
|
||||
|
||||
log.info(
|
||||
{ nextRunInMinutes: Math.round(delay / 60000) },
|
||||
"Next cycle scheduled"
|
||||
);
|
||||
|
||||
timer = setTimeout(async () => {
|
||||
await runCycle();
|
||||
scheduleNext();
|
||||
}, delay);
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the scheduler. Runs an immediate first cycle, then schedules subsequent ones.
|
||||
*/
|
||||
export async function startScheduler(): Promise<void> {
|
||||
log.info(
|
||||
{
|
||||
intervalMinutes: config.workerIntervalMinutes,
|
||||
jitterMinutes: config.jitterMinutes,
|
||||
},
|
||||
"Scheduler starting"
|
||||
);
|
||||
|
||||
// Run immediately on start
|
||||
await runCycle();
|
||||
|
||||
// Then schedule recurring cycles
|
||||
scheduleNext();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the scheduler gracefully.
|
||||
*/
|
||||
export function stopScheduler(): void {
|
||||
if (timer) {
|
||||
clearTimeout(timer);
|
||||
timer = null;
|
||||
}
|
||||
log.info("Scheduler stopped");
|
||||
}
|
||||
120
worker/src/tdlib/client.ts
Normal file
120
worker/src/tdlib/client.ts
Normal file
@@ -0,0 +1,120 @@
|
||||
import tdl, { createClient, type Client } from "tdl";
|
||||
import { getTdjson } from "prebuilt-tdlib";
|
||||
import path from "path";
|
||||
import { config } from "../util/config.js";
|
||||
import { childLogger } from "../util/logger.js";
|
||||
import {
|
||||
updateAccountAuthState,
|
||||
getAccountAuthCode,
|
||||
} from "../db/queries.js";
|
||||
|
||||
const log = childLogger("tdlib-client");
|
||||
|
||||
// Configure tdl to use the prebuilt tdjson shared library
|
||||
tdl.configure({ tdjson: getTdjson() });
|
||||
|
||||
interface AccountConfig {
|
||||
id: string;
|
||||
phone: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create and authenticate a TDLib client for a Telegram account.
|
||||
* Authentication flow communicates with the admin UI via the database:
|
||||
* - Worker sets authState to AWAITING_CODE when TDLib asks for phone code
|
||||
* - Admin enters the code via UI, which writes it to authCode field
|
||||
* - Worker polls DB for the code and feeds it to TDLib
|
||||
*/
|
||||
export async function createTdlibClient(
|
||||
account: AccountConfig
|
||||
): Promise<Client> {
|
||||
const dbPath = path.join(config.tdlibStateDir, account.id);
|
||||
|
||||
const client = createClient({
|
||||
apiId: config.telegramApiId,
|
||||
apiHash: config.telegramApiHash,
|
||||
databaseDirectory: dbPath,
|
||||
filesDirectory: path.join(dbPath, "files"),
|
||||
});
|
||||
|
||||
client.on("error", (err) => {
|
||||
log.error({ err, accountId: account.id }, "TDLib client error");
|
||||
});
|
||||
|
||||
try {
|
||||
await client.login(() => ({
|
||||
getPhoneNumber: async () => {
|
||||
log.info({ accountId: account.id }, "TDLib requesting phone number");
|
||||
return account.phone;
|
||||
},
|
||||
getAuthCode: async () => {
|
||||
log.info({ accountId: account.id }, "TDLib requesting auth code");
|
||||
await updateAccountAuthState(account.id, "AWAITING_CODE");
|
||||
|
||||
// Poll database for the code entered via admin UI
|
||||
const code = await pollForAuthCode(account.id);
|
||||
if (!code) {
|
||||
throw new Error("Auth code not provided within timeout");
|
||||
}
|
||||
|
||||
// Clear the code after reading
|
||||
await updateAccountAuthState(account.id, "AUTHENTICATED", null);
|
||||
return code;
|
||||
},
|
||||
getPassword: async () => {
|
||||
log.info({ accountId: account.id }, "TDLib requesting 2FA password");
|
||||
await updateAccountAuthState(account.id, "AWAITING_PASSWORD");
|
||||
|
||||
// Poll database for the password entered via admin UI
|
||||
const code = await pollForAuthCode(account.id);
|
||||
if (!code) {
|
||||
throw new Error("2FA password not provided within timeout");
|
||||
}
|
||||
|
||||
await updateAccountAuthState(account.id, "AUTHENTICATED", null);
|
||||
return code;
|
||||
},
|
||||
}));
|
||||
|
||||
await updateAccountAuthState(account.id, "AUTHENTICATED");
|
||||
log.info({ accountId: account.id }, "TDLib client authenticated");
|
||||
return client;
|
||||
} catch (err) {
|
||||
log.error({ err, accountId: account.id }, "TDLib authentication failed");
|
||||
await updateAccountAuthState(account.id, "EXPIRED");
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll the database every 5 seconds for an auth code, up to 5 minutes.
|
||||
*/
|
||||
async function pollForAuthCode(
|
||||
accountId: string,
|
||||
timeoutMs = 300_000
|
||||
): Promise<string | null> {
|
||||
const start = Date.now();
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
const result = await getAccountAuthCode(accountId);
|
||||
if (result?.authCode) {
|
||||
return result.authCode;
|
||||
}
|
||||
await sleep(5000);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
/**
|
||||
* Close a TDLib client gracefully.
|
||||
*/
|
||||
export async function closeTdlibClient(client: Client): Promise<void> {
|
||||
try {
|
||||
await client.close();
|
||||
} catch (err) {
|
||||
log.warn({ err }, "Error closing TDLib client");
|
||||
}
|
||||
}
|
||||
389
worker/src/tdlib/download.ts
Normal file
389
worker/src/tdlib/download.ts
Normal file
@@ -0,0 +1,389 @@
|
||||
import type { Client } from "tdl";
|
||||
import { readFile, rename, stat } from "fs/promises";
|
||||
import { config } from "../util/config.js";
|
||||
import { childLogger } from "../util/logger.js";
|
||||
import { isArchiveAttachment } from "../archive/detect.js";
|
||||
import type { TelegramMessage } from "../archive/multipart.js";
|
||||
import type { TelegramPhoto } from "../preview/match.js";
|
||||
|
||||
const log = childLogger("download");
|
||||
|
||||
interface TdPhotoSize {
|
||||
type: string;
|
||||
photo: {
|
||||
id: number;
|
||||
size: number;
|
||||
expected_size: number;
|
||||
local?: {
|
||||
path?: string;
|
||||
is_downloading_active?: boolean;
|
||||
is_downloading_completed?: boolean;
|
||||
downloaded_size?: number;
|
||||
};
|
||||
};
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
interface TdMessage {
|
||||
id: number;
|
||||
date: number;
|
||||
content: {
|
||||
_: string;
|
||||
document?: {
|
||||
file_name?: string;
|
||||
document?: {
|
||||
id: number;
|
||||
size: number;
|
||||
local?: {
|
||||
path?: string;
|
||||
is_downloading_completed?: boolean;
|
||||
};
|
||||
};
|
||||
};
|
||||
photo?: {
|
||||
sizes?: TdPhotoSize[];
|
||||
};
|
||||
caption?: {
|
||||
text?: string;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
interface TdFile {
|
||||
id: number;
|
||||
size: number;
|
||||
expected_size: number;
|
||||
local: {
|
||||
path: string;
|
||||
is_downloading_active: boolean;
|
||||
is_downloading_completed: boolean;
|
||||
downloaded_size: number;
|
||||
download_offset: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface ChannelScanResult {
|
||||
archives: TelegramMessage[];
|
||||
photos: TelegramPhoto[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch messages from a channel since a given message ID.
|
||||
* Collects both archive attachments AND photo messages (for preview matching).
|
||||
* Returns messages in chronological order (oldest first).
|
||||
*/
|
||||
export async function getChannelMessages(
|
||||
client: Client,
|
||||
chatId: bigint,
|
||||
fromMessageId?: bigint | null,
|
||||
limit = 100
|
||||
): Promise<ChannelScanResult> {
|
||||
const archives: TelegramMessage[] = [];
|
||||
const photos: TelegramPhoto[] = [];
|
||||
let currentFromId = fromMessageId ? Number(fromMessageId) : 0;
|
||||
|
||||
// eslint-disable-next-line no-constant-condition
|
||||
while (true) {
|
||||
const result = (await client.invoke({
|
||||
_: "getChatHistory",
|
||||
chat_id: Number(chatId),
|
||||
from_message_id: currentFromId,
|
||||
offset: 0,
|
||||
limit: Math.min(limit, 100),
|
||||
only_local: false,
|
||||
})) as { messages: TdMessage[] };
|
||||
|
||||
if (!result.messages || result.messages.length === 0) break;
|
||||
|
||||
for (const msg of result.messages) {
|
||||
// Check for archive documents
|
||||
const doc = msg.content?.document;
|
||||
if (doc?.file_name && doc.document && isArchiveAttachment(doc.file_name)) {
|
||||
archives.push({
|
||||
id: BigInt(msg.id),
|
||||
fileName: doc.file_name,
|
||||
fileId: String(doc.document.id),
|
||||
fileSize: BigInt(doc.document.size),
|
||||
date: new Date(msg.date * 1000),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for photo messages (potential previews)
|
||||
const photo = msg.content?.photo;
|
||||
const caption = msg.content?.caption?.text ?? "";
|
||||
if (photo?.sizes && photo.sizes.length > 0) {
|
||||
// Pick the smallest size for thumbnail (type "s" or "m")
|
||||
// TDLib photo sizes are ordered from smallest to largest
|
||||
const smallest = photo.sizes[0];
|
||||
photos.push({
|
||||
id: BigInt(msg.id),
|
||||
date: new Date(msg.date * 1000),
|
||||
caption,
|
||||
fileId: String(smallest.photo.id),
|
||||
fileSize: smallest.photo.size || smallest.photo.expected_size,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
currentFromId = result.messages[result.messages.length - 1].id;
|
||||
if (result.messages.length < 100) break;
|
||||
|
||||
// Rate limit delay
|
||||
await sleep(config.apiDelayMs);
|
||||
}
|
||||
|
||||
// Return in chronological order (oldest first)
|
||||
return {
|
||||
archives: archives.reverse(),
|
||||
photos: photos.reverse(),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Download a photo thumbnail from Telegram and return its raw bytes.
|
||||
* Uses synchronous download (photos are small, typically < 100KB).
|
||||
* Returns null if download fails (non-critical).
|
||||
*/
|
||||
export async function downloadPhotoThumbnail(
|
||||
client: Client,
|
||||
fileId: string
|
||||
): Promise<Buffer | null> {
|
||||
const numericId = parseInt(fileId, 10);
|
||||
|
||||
try {
|
||||
const result = (await client.invoke({
|
||||
_: "downloadFile",
|
||||
file_id: numericId,
|
||||
priority: 1, // Low priority — thumbnails are nice-to-have
|
||||
offset: 0,
|
||||
limit: 0,
|
||||
synchronous: true, // Small file — wait for it
|
||||
})) as TdFile;
|
||||
|
||||
if (result?.local?.is_downloading_completed && result.local.path) {
|
||||
const data = await readFile(result.local.path);
|
||||
log.debug(
|
||||
{ fileId, bytes: data.length },
|
||||
"Downloaded photo thumbnail"
|
||||
);
|
||||
return data;
|
||||
}
|
||||
} catch (err) {
|
||||
log.warn({ fileId, err }, "Failed to download photo thumbnail");
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export interface DownloadProgress {
|
||||
fileId: string;
|
||||
fileName: string;
|
||||
downloadedBytes: number;
|
||||
totalBytes: number;
|
||||
percent: number;
|
||||
isComplete: boolean;
|
||||
}
|
||||
|
||||
export type ProgressCallback = (progress: DownloadProgress) => void;
|
||||
|
||||
/**
|
||||
* Download a file from Telegram to a local path with progress tracking
|
||||
* and integrity verification.
|
||||
*
|
||||
* Progress flow:
|
||||
* 1. Starts async download via TDLib
|
||||
* 2. Listens for `updateFile` events to track download progress
|
||||
* 3. Logs progress at every 10% increment
|
||||
* 4. Once complete, verifies the local file size matches the expected size
|
||||
* 5. Moves the file from TDLib's cache to the destination path
|
||||
*
|
||||
* Verification:
|
||||
* - Compares actual file size on disk to the expected size from Telegram
|
||||
* - Throws on mismatch (partial/corrupt download)
|
||||
* - Throws on timeout (configurable, scales with file size)
|
||||
* - Throws if download stops without completing (network error, etc.)
|
||||
*/
|
||||
export async function downloadFile(
|
||||
client: Client,
|
||||
fileId: string,
|
||||
destPath: string,
|
||||
expectedSize: bigint,
|
||||
fileName: string,
|
||||
onProgress?: ProgressCallback
|
||||
): Promise<void> {
|
||||
const numericId = parseInt(fileId, 10);
|
||||
const totalBytes = Number(expectedSize);
|
||||
|
||||
log.info(
|
||||
{ fileId, fileName, destPath, totalBytes },
|
||||
"Starting file download"
|
||||
);
|
||||
|
||||
// Report initial progress
|
||||
onProgress?.({
|
||||
fileId,
|
||||
fileName,
|
||||
downloadedBytes: 0,
|
||||
totalBytes,
|
||||
percent: 0,
|
||||
isComplete: false,
|
||||
});
|
||||
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
let lastLoggedPercent = 0;
|
||||
let settled = false;
|
||||
|
||||
// Timeout: 10 minutes per GB, minimum 5 minutes
|
||||
const timeoutMs = Math.max(
|
||||
5 * 60_000,
|
||||
(totalBytes / (1024 * 1024 * 1024)) * 10 * 60_000
|
||||
);
|
||||
const timer = setTimeout(() => {
|
||||
if (!settled) {
|
||||
settled = true;
|
||||
cleanup();
|
||||
reject(
|
||||
new Error(
|
||||
`Download timed out after ${Math.round(timeoutMs / 60_000)}min for ${fileName}`
|
||||
)
|
||||
);
|
||||
}
|
||||
}, timeoutMs);
|
||||
|
||||
// Listen for file update events to track progress
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const handleUpdate = (update: any) => {
|
||||
if (update?._ !== "updateFile") return;
|
||||
const file = update.file as TdFile | undefined;
|
||||
if (!file || file.id !== numericId) return;
|
||||
|
||||
const downloaded = file.local.downloaded_size;
|
||||
const percent =
|
||||
totalBytes > 0 ? Math.round((downloaded / totalBytes) * 100) : 0;
|
||||
|
||||
// Log at every 10% increment
|
||||
if (percent >= lastLoggedPercent + 10) {
|
||||
lastLoggedPercent = percent - (percent % 10);
|
||||
log.info(
|
||||
{ fileId, fileName, downloaded, totalBytes, percent: `${percent}%` },
|
||||
"Download progress"
|
||||
);
|
||||
}
|
||||
|
||||
// Report to callback
|
||||
onProgress?.({
|
||||
fileId,
|
||||
fileName,
|
||||
downloadedBytes: downloaded,
|
||||
totalBytes,
|
||||
percent,
|
||||
isComplete: file.local.is_downloading_completed,
|
||||
});
|
||||
|
||||
// Download finished
|
||||
if (file.local.is_downloading_completed) {
|
||||
if (!settled) {
|
||||
settled = true;
|
||||
cleanup();
|
||||
verifyAndMove(file.local.path, destPath, totalBytes, fileName, fileId)
|
||||
.then(resolve)
|
||||
.catch(reject);
|
||||
}
|
||||
}
|
||||
|
||||
// Download stopped without completing (network error, cancelled, etc.)
|
||||
if (
|
||||
!file.local.is_downloading_active &&
|
||||
!file.local.is_downloading_completed
|
||||
) {
|
||||
if (!settled) {
|
||||
settled = true;
|
||||
cleanup();
|
||||
reject(
|
||||
new Error(
|
||||
`Download stopped unexpectedly for ${fileName} ` +
|
||||
`(${downloaded}/${totalBytes} bytes, ${percent}%)`
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const cleanup = () => {
|
||||
clearTimeout(timer);
|
||||
client.off("update", handleUpdate);
|
||||
};
|
||||
|
||||
// Subscribe to updates BEFORE starting download
|
||||
client.on("update", handleUpdate);
|
||||
|
||||
// Start async download (non-blocking — progress via updateFile events)
|
||||
client
|
||||
.invoke({
|
||||
_: "downloadFile",
|
||||
file_id: numericId,
|
||||
priority: 32,
|
||||
offset: 0,
|
||||
limit: 0,
|
||||
synchronous: false,
|
||||
})
|
||||
.then((result: unknown) => {
|
||||
// If the file was already cached locally, invoke returns immediately
|
||||
const file = result as TdFile | undefined;
|
||||
if (file?.local?.is_downloading_completed && !settled) {
|
||||
settled = true;
|
||||
cleanup();
|
||||
verifyAndMove(file.local.path, destPath, totalBytes, fileName, fileId)
|
||||
.then(resolve)
|
||||
.catch(reject);
|
||||
}
|
||||
})
|
||||
.catch((err: unknown) => {
|
||||
if (!settled) {
|
||||
settled = true;
|
||||
cleanup();
|
||||
reject(err);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify the downloaded file's size matches the expected size,
|
||||
* then move it to the destination path.
|
||||
*/
|
||||
async function verifyAndMove(
|
||||
localPath: string,
|
||||
destPath: string,
|
||||
expectedBytes: number,
|
||||
fileName: string,
|
||||
fileId: string
|
||||
): Promise<void> {
|
||||
const stats = await stat(localPath);
|
||||
const actualBytes = stats.size;
|
||||
|
||||
if (expectedBytes > 0 && actualBytes !== expectedBytes) {
|
||||
log.error(
|
||||
{ fileId, fileName, expectedBytes, actualBytes },
|
||||
"Download size mismatch — file is incomplete or corrupted"
|
||||
);
|
||||
throw new Error(
|
||||
`Download verification failed for ${fileName}: ` +
|
||||
`expected ${expectedBytes} bytes, got ${actualBytes} bytes`
|
||||
);
|
||||
}
|
||||
|
||||
log.info(
|
||||
{ fileId, fileName, bytes: actualBytes, destPath },
|
||||
"File verified and complete"
|
||||
);
|
||||
|
||||
// Move from TDLib's cache to our temp directory
|
||||
await rename(localPath, destPath);
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
222
worker/src/tdlib/topics.ts
Normal file
222
worker/src/tdlib/topics.ts
Normal file
@@ -0,0 +1,222 @@
|
||||
import type { Client } from "tdl";
|
||||
import { config } from "../util/config.js";
|
||||
import { childLogger } from "../util/logger.js";
|
||||
import { isArchiveAttachment } from "../archive/detect.js";
|
||||
import type { TelegramMessage } from "../archive/multipart.js";
|
||||
import type { TelegramPhoto } from "../preview/match.js";
|
||||
import type { ChannelScanResult } from "./download.js";
|
||||
|
||||
const log = childLogger("topics");
|
||||
|
||||
export interface ForumTopic {
|
||||
topicId: bigint;
|
||||
name: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a chat is a forum supergroup (topics enabled).
|
||||
*/
|
||||
export async function isChatForum(
|
||||
client: Client,
|
||||
chatId: bigint
|
||||
): Promise<boolean> {
|
||||
try {
|
||||
const chat = (await client.invoke({
|
||||
_: "getChat",
|
||||
chat_id: Number(chatId),
|
||||
})) as {
|
||||
type?: {
|
||||
_: string;
|
||||
supergroup_id?: number;
|
||||
is_forum?: boolean;
|
||||
};
|
||||
};
|
||||
|
||||
if (chat.type?._ === "chatTypeSupergroup" && chat.type.is_forum) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Also check via getSupergroup for older TDLib versions
|
||||
if (chat.type?._ === "chatTypeSupergroup" && chat.type.supergroup_id) {
|
||||
const sg = (await client.invoke({
|
||||
_: "getSupergroup",
|
||||
supergroup_id: chat.type.supergroup_id,
|
||||
})) as { is_forum?: boolean };
|
||||
return sg.is_forum === true;
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (err) {
|
||||
log.warn({ err, chatId: chatId.toString() }, "Failed to check if chat is forum");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all forum topics in a supergroup.
|
||||
*/
|
||||
export async function getForumTopicList(
|
||||
client: Client,
|
||||
chatId: bigint
|
||||
): Promise<ForumTopic[]> {
|
||||
const topics: ForumTopic[] = [];
|
||||
let offsetDate = 0;
|
||||
let offsetMessageId = 0;
|
||||
let offsetMessageThreadId = 0;
|
||||
|
||||
// eslint-disable-next-line no-constant-condition
|
||||
while (true) {
|
||||
const result = (await client.invoke({
|
||||
_: "getForumTopics",
|
||||
chat_id: Number(chatId),
|
||||
query: "",
|
||||
offset_date: offsetDate,
|
||||
offset_message_id: offsetMessageId,
|
||||
offset_message_thread_id: offsetMessageThreadId,
|
||||
limit: 100,
|
||||
})) as {
|
||||
topics?: {
|
||||
info?: {
|
||||
message_thread_id?: number;
|
||||
name?: string;
|
||||
is_general?: boolean;
|
||||
};
|
||||
}[];
|
||||
next_offset_date?: number;
|
||||
next_offset_message_id?: number;
|
||||
next_offset_message_thread_id?: number;
|
||||
};
|
||||
|
||||
if (!result.topics || result.topics.length === 0) break;
|
||||
|
||||
for (const t of result.topics) {
|
||||
if (!t.info?.message_thread_id) continue;
|
||||
// Skip the "General" topic — it's not creator-specific
|
||||
if (t.info.is_general) continue;
|
||||
|
||||
topics.push({
|
||||
topicId: BigInt(t.info.message_thread_id),
|
||||
name: t.info.name ?? "Unnamed",
|
||||
});
|
||||
}
|
||||
|
||||
// Check if there are more pages
|
||||
if (
|
||||
!result.next_offset_date &&
|
||||
!result.next_offset_message_id &&
|
||||
!result.next_offset_message_thread_id
|
||||
) {
|
||||
break;
|
||||
}
|
||||
|
||||
offsetDate = result.next_offset_date ?? 0;
|
||||
offsetMessageId = result.next_offset_message_id ?? 0;
|
||||
offsetMessageThreadId = result.next_offset_message_thread_id ?? 0;
|
||||
|
||||
await sleep(config.apiDelayMs);
|
||||
}
|
||||
|
||||
log.info(
|
||||
{ chatId: chatId.toString(), topicCount: topics.length },
|
||||
"Enumerated forum topics"
|
||||
);
|
||||
|
||||
return topics;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch messages from a specific forum topic (thread).
|
||||
* Uses getMessageThreadHistory to scan within a topic.
|
||||
*/
|
||||
export async function getTopicMessages(
|
||||
client: Client,
|
||||
chatId: bigint,
|
||||
topicId: bigint,
|
||||
fromMessageId?: bigint | null,
|
||||
limit = 100
|
||||
): Promise<ChannelScanResult> {
|
||||
const archives: TelegramMessage[] = [];
|
||||
const photos: TelegramPhoto[] = [];
|
||||
let currentFromId = fromMessageId ? Number(fromMessageId) : 0;
|
||||
|
||||
// eslint-disable-next-line no-constant-condition
|
||||
while (true) {
|
||||
const result = (await client.invoke({
|
||||
_: "getMessageThreadHistory",
|
||||
chat_id: Number(chatId),
|
||||
message_id: Number(topicId),
|
||||
from_message_id: currentFromId,
|
||||
offset: 0,
|
||||
limit: Math.min(limit, 100),
|
||||
})) as {
|
||||
messages?: {
|
||||
id: number;
|
||||
date: number;
|
||||
content: {
|
||||
_: string;
|
||||
document?: {
|
||||
file_name?: string;
|
||||
document?: {
|
||||
id: number;
|
||||
size: number;
|
||||
};
|
||||
};
|
||||
photo?: {
|
||||
sizes?: {
|
||||
type: string;
|
||||
photo: { id: number; size: number; expected_size: number };
|
||||
width: number;
|
||||
height: number;
|
||||
}[];
|
||||
};
|
||||
caption?: { text?: string };
|
||||
};
|
||||
}[];
|
||||
};
|
||||
|
||||
if (!result.messages || result.messages.length === 0) break;
|
||||
|
||||
for (const msg of result.messages) {
|
||||
// Check for archive documents
|
||||
const doc = msg.content?.document;
|
||||
if (doc?.file_name && doc.document && isArchiveAttachment(doc.file_name)) {
|
||||
archives.push({
|
||||
id: BigInt(msg.id),
|
||||
fileName: doc.file_name,
|
||||
fileId: String(doc.document.id),
|
||||
fileSize: BigInt(doc.document.size),
|
||||
date: new Date(msg.date * 1000),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for photo messages (potential previews)
|
||||
const photo = msg.content?.photo;
|
||||
const caption = msg.content?.caption?.text ?? "";
|
||||
if (photo?.sizes && photo.sizes.length > 0) {
|
||||
const smallest = photo.sizes[0];
|
||||
photos.push({
|
||||
id: BigInt(msg.id),
|
||||
date: new Date(msg.date * 1000),
|
||||
caption,
|
||||
fileId: String(smallest.photo.id),
|
||||
fileSize: smallest.photo.size || smallest.photo.expected_size,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
currentFromId = result.messages[result.messages.length - 1].id;
|
||||
if (result.messages.length < 100) break;
|
||||
|
||||
await sleep(config.apiDelayMs);
|
||||
}
|
||||
|
||||
return {
|
||||
archives: archives.reverse(),
|
||||
photos: photos.reverse(),
|
||||
};
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
76
worker/src/upload/channel.ts
Normal file
76
worker/src/upload/channel.ts
Normal file
@@ -0,0 +1,76 @@
|
||||
import type { Client } from "tdl";
|
||||
import { config } from "../util/config.js";
|
||||
import { childLogger } from "../util/logger.js";
|
||||
|
||||
const log = childLogger("upload");
|
||||
|
||||
export interface UploadResult {
|
||||
messageId: bigint;
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload one or more files to a destination Telegram channel.
|
||||
* For multipart archives, each file is sent as a separate message.
|
||||
* Returns the message ID of the first uploaded message.
|
||||
*/
|
||||
export async function uploadToChannel(
|
||||
client: Client,
|
||||
chatId: bigint,
|
||||
filePaths: string[],
|
||||
caption?: string
|
||||
): Promise<UploadResult> {
|
||||
let firstMessageId: bigint | null = null;
|
||||
|
||||
for (let i = 0; i < filePaths.length; i++) {
|
||||
const filePath = filePaths[i];
|
||||
const fileCaption =
|
||||
i === 0 && caption ? caption : undefined;
|
||||
|
||||
log.debug(
|
||||
{ chatId: Number(chatId), filePath, part: i + 1, total: filePaths.length },
|
||||
"Uploading file to channel"
|
||||
);
|
||||
|
||||
const result = (await client.invoke({
|
||||
_: "sendMessage",
|
||||
chat_id: Number(chatId),
|
||||
input_message_content: {
|
||||
_: "inputMessageDocument",
|
||||
document: {
|
||||
_: "inputFileLocal",
|
||||
path: filePath,
|
||||
},
|
||||
caption: fileCaption
|
||||
? {
|
||||
_: "formattedText",
|
||||
text: fileCaption,
|
||||
}
|
||||
: undefined,
|
||||
},
|
||||
})) as { id: number };
|
||||
|
||||
if (i === 0) {
|
||||
firstMessageId = BigInt(result.id);
|
||||
}
|
||||
|
||||
// Rate limit delay between uploads
|
||||
if (i < filePaths.length - 1) {
|
||||
await sleep(config.apiDelayMs);
|
||||
}
|
||||
}
|
||||
|
||||
if (firstMessageId === null) {
|
||||
throw new Error("Upload failed: no messages sent");
|
||||
}
|
||||
|
||||
log.info(
|
||||
{ chatId: Number(chatId), messageId: Number(firstMessageId), files: filePaths.length },
|
||||
"Upload complete"
|
||||
);
|
||||
|
||||
return { messageId: firstMessageId };
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
18
worker/src/util/config.ts
Normal file
18
worker/src/util/config.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
export const config = {
|
||||
databaseUrl: process.env.DATABASE_URL ?? "",
|
||||
workerIntervalMinutes: parseInt(process.env.WORKER_INTERVAL_MINUTES ?? "60", 10),
|
||||
tempDir: process.env.WORKER_TEMP_DIR ?? "/tmp/zips",
|
||||
tdlibStateDir: process.env.TDLIB_STATE_DIR ?? "/data/tdlib",
|
||||
maxZipSizeMB: parseInt(process.env.WORKER_MAX_ZIP_SIZE_MB ?? "4096", 10),
|
||||
logLevel: (process.env.LOG_LEVEL ?? "info") as "debug" | "info" | "warn" | "error",
|
||||
telegramApiId: parseInt(process.env.TELEGRAM_API_ID ?? "0", 10),
|
||||
telegramApiHash: process.env.TELEGRAM_API_HASH ?? "",
|
||||
/** Maximum jitter added to scheduler interval (in minutes) */
|
||||
jitterMinutes: 5,
|
||||
/** Maximum time between multipart archive parts (in hours) */
|
||||
multipartTimeoutHours: 24,
|
||||
/** Delay between Telegram API calls (in ms) to avoid rate limits */
|
||||
apiDelayMs: 1000,
|
||||
/** Max retries for rate-limited requests */
|
||||
maxRetries: 5,
|
||||
} as const;
|
||||
14
worker/src/util/logger.ts
Normal file
14
worker/src/util/logger.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
import pino from "pino";
|
||||
import { config } from "./config.js";
|
||||
|
||||
export const logger = pino({
|
||||
level: config.logLevel,
|
||||
transport:
|
||||
config.logLevel === "debug"
|
||||
? { target: "pino/file", options: { destination: 1 } }
|
||||
: undefined,
|
||||
});
|
||||
|
||||
export function childLogger(name: string, extra?: Record<string, unknown>) {
|
||||
return logger.child({ module: name, ...extra });
|
||||
}
|
||||
665
worker/src/worker.ts
Normal file
665
worker/src/worker.ts
Normal file
@@ -0,0 +1,665 @@
|
||||
import path from "path";
|
||||
import { unlink, readdir } from "fs/promises";
|
||||
import { config } from "./util/config.js";
|
||||
import { childLogger } from "./util/logger.js";
|
||||
import { tryAcquireLock, releaseLock } from "./db/locks.js";
|
||||
import {
|
||||
getSourceChannelMappings,
|
||||
getDestinationChannel,
|
||||
packageExistsByHash,
|
||||
createPackageWithFiles,
|
||||
createIngestionRun,
|
||||
completeIngestionRun,
|
||||
failIngestionRun,
|
||||
updateLastProcessedMessage,
|
||||
updateRunActivity,
|
||||
setChannelForum,
|
||||
getTopicProgress,
|
||||
upsertTopicProgress,
|
||||
} from "./db/queries.js";
|
||||
import type { ActivityUpdate } from "./db/queries.js";
|
||||
import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
|
||||
import { getChannelMessages, downloadFile, downloadPhotoThumbnail } from "./tdlib/download.js";
|
||||
import type { DownloadProgress, ChannelScanResult } from "./tdlib/download.js";
|
||||
import { isChatForum, getForumTopicList, getTopicMessages } from "./tdlib/topics.js";
|
||||
import { matchPreviewToArchive } from "./preview/match.js";
|
||||
import { groupArchiveSets } from "./archive/multipart.js";
|
||||
import type { ArchiveSet } from "./archive/multipart.js";
|
||||
import { extractCreatorFromFileName } from "./archive/creator.js";
|
||||
import { hashParts } from "./archive/hash.js";
|
||||
import { readZipCentralDirectory } from "./archive/zip-reader.js";
|
||||
import { readRarContents } from "./archive/rar-reader.js";
|
||||
import { byteLevelSplit } from "./archive/split.js";
|
||||
import { uploadToChannel } from "./upload/channel.js";
|
||||
import type { TelegramAccount, TelegramChannel } from "@prisma/client";
|
||||
import type { Client } from "tdl";
|
||||
|
||||
const log = childLogger("worker");
|
||||
|
||||
/**
|
||||
* Throttle DB writes for download progress to avoid hammering the DB.
|
||||
* Only writes if at least 2 seconds have passed since the last write.
|
||||
*/
|
||||
function createThrottledActivityUpdater(runId: string, minIntervalMs = 2000) {
|
||||
let lastWriteTime = 0;
|
||||
let pendingUpdate: ActivityUpdate | null = null;
|
||||
let flushTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
|
||||
const flush = async () => {
|
||||
if (pendingUpdate) {
|
||||
const update = pendingUpdate;
|
||||
pendingUpdate = null;
|
||||
lastWriteTime = Date.now();
|
||||
await updateRunActivity(runId, update).catch(() => {});
|
||||
}
|
||||
};
|
||||
|
||||
return {
|
||||
update: (activity: ActivityUpdate) => {
|
||||
pendingUpdate = activity;
|
||||
const elapsed = Date.now() - lastWriteTime;
|
||||
if (elapsed >= minIntervalMs) {
|
||||
if (flushTimer) clearTimeout(flushTimer);
|
||||
flush();
|
||||
} else if (!flushTimer) {
|
||||
flushTimer = setTimeout(() => {
|
||||
flushTimer = null;
|
||||
flush();
|
||||
}, minIntervalMs - elapsed);
|
||||
}
|
||||
},
|
||||
flush,
|
||||
};
|
||||
}
|
||||
|
||||
/** Shared context passed to the archive processing pipeline. */
|
||||
interface PipelineContext {
|
||||
client: Client;
|
||||
runId: string;
|
||||
channelTitle: string;
|
||||
channel: TelegramChannel;
|
||||
destChannelTelegramId: bigint;
|
||||
destChannelId: string;
|
||||
throttled: ReturnType<typeof createThrottledActivityUpdater>;
|
||||
counters: {
|
||||
messagesScanned: number;
|
||||
zipsFound: number;
|
||||
zipsDuplicate: number;
|
||||
zipsIngested: number;
|
||||
};
|
||||
/** Creator from forum topic name (null for non-forum). */
|
||||
topicCreator: string | null;
|
||||
/** Forum topic ID (null for non-forum). */
|
||||
sourceTopicId: bigint | null;
|
||||
accountLog: ReturnType<typeof childLogger>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a full ingestion cycle for a single Telegram account.
|
||||
* Every step writes live activity to the DB so the admin UI can display it.
|
||||
*/
|
||||
export async function runWorkerForAccount(
|
||||
account: TelegramAccount
|
||||
): Promise<void> {
|
||||
const accountLog = childLogger("worker", { accountId: account.id, phone: account.phone });
|
||||
|
||||
// 1. Acquire advisory lock
|
||||
const acquired = await tryAcquireLock(account.id);
|
||||
if (!acquired) {
|
||||
accountLog.info("Account already locked, skipping");
|
||||
return;
|
||||
}
|
||||
|
||||
let runId: string | undefined;
|
||||
|
||||
try {
|
||||
// 2. Create ingestion run
|
||||
const run = await createIngestionRun(account.id);
|
||||
runId = run.id;
|
||||
const activeRunId = runId;
|
||||
accountLog.info({ runId }, "Ingestion run started");
|
||||
|
||||
const throttled = createThrottledActivityUpdater(activeRunId);
|
||||
|
||||
// 3. Initialize TDLib client
|
||||
await updateRunActivity(activeRunId, {
|
||||
currentActivity: "Connecting to Telegram",
|
||||
currentStep: "connecting",
|
||||
});
|
||||
|
||||
const client = await createTdlibClient({
|
||||
id: account.id,
|
||||
phone: account.phone,
|
||||
});
|
||||
|
||||
const counters = {
|
||||
messagesScanned: 0,
|
||||
zipsFound: 0,
|
||||
zipsDuplicate: 0,
|
||||
zipsIngested: 0,
|
||||
};
|
||||
|
||||
try {
|
||||
// 4. Get assigned source channels and destination
|
||||
const channelMappings = await getSourceChannelMappings(account.id);
|
||||
const destChannel = await getDestinationChannel(account.id);
|
||||
|
||||
if (!destChannel) {
|
||||
throw new Error("No active destination channel configured");
|
||||
}
|
||||
|
||||
for (const mapping of channelMappings) {
|
||||
const channel = mapping.channel;
|
||||
|
||||
// ── Check if channel is a forum ──
|
||||
const forum = await isChatForum(client, channel.telegramId);
|
||||
if (forum !== channel.isForum) {
|
||||
await setChannelForum(channel.id, forum);
|
||||
accountLog.info(
|
||||
{ channelId: channel.id, title: channel.title, isForum: forum },
|
||||
"Updated channel forum status"
|
||||
);
|
||||
}
|
||||
|
||||
const pipelineCtx: PipelineContext = {
|
||||
client,
|
||||
runId: activeRunId,
|
||||
channelTitle: channel.title,
|
||||
channel,
|
||||
destChannelTelegramId: destChannel.telegramId,
|
||||
destChannelId: destChannel.id,
|
||||
throttled,
|
||||
counters,
|
||||
topicCreator: null,
|
||||
sourceTopicId: null,
|
||||
accountLog,
|
||||
};
|
||||
|
||||
if (forum) {
|
||||
// ── Forum channel: scan per-topic ──
|
||||
await updateRunActivity(activeRunId, {
|
||||
currentActivity: `Enumerating topics in "${channel.title}"`,
|
||||
currentStep: "scanning",
|
||||
currentChannel: channel.title,
|
||||
currentFile: null,
|
||||
currentFileNum: null,
|
||||
totalFiles: null,
|
||||
downloadedBytes: null,
|
||||
totalBytes: null,
|
||||
downloadPercent: null,
|
||||
});
|
||||
|
||||
const topics = await getForumTopicList(client, channel.telegramId);
|
||||
const topicProgressList = await getTopicProgress(mapping.id);
|
||||
|
||||
accountLog.info(
|
||||
{ channelId: channel.id, title: channel.title, topicCount: topics.length },
|
||||
"Scanning forum channel by topic"
|
||||
);
|
||||
|
||||
for (const topic of topics) {
|
||||
const progress = topicProgressList.find(
|
||||
(tp) => tp.topicId === topic.topicId
|
||||
);
|
||||
|
||||
await updateRunActivity(activeRunId, {
|
||||
currentActivity: `Scanning topic "${topic.name}" in "${channel.title}"`,
|
||||
currentStep: "scanning",
|
||||
currentChannel: `${channel.title} › ${topic.name}`,
|
||||
currentFile: null,
|
||||
currentFileNum: null,
|
||||
totalFiles: null,
|
||||
downloadedBytes: null,
|
||||
totalBytes: null,
|
||||
downloadPercent: null,
|
||||
});
|
||||
|
||||
const scanResult = await getTopicMessages(
|
||||
client,
|
||||
channel.telegramId,
|
||||
topic.topicId,
|
||||
progress?.lastProcessedMessageId
|
||||
);
|
||||
|
||||
if (scanResult.archives.length === 0) {
|
||||
accountLog.debug(
|
||||
{ channelId: channel.id, topic: topic.name },
|
||||
"No new archives in topic"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
accountLog.info(
|
||||
{ topic: topic.name, archives: scanResult.archives.length, photos: scanResult.photos.length },
|
||||
"Found messages in topic"
|
||||
);
|
||||
|
||||
// Process archives with topic creator
|
||||
pipelineCtx.topicCreator = topic.name;
|
||||
pipelineCtx.sourceTopicId = topic.topicId;
|
||||
pipelineCtx.channelTitle = `${channel.title} › ${topic.name}`;
|
||||
|
||||
await processArchiveSets(pipelineCtx, scanResult, run.id);
|
||||
|
||||
// Update topic progress
|
||||
const allMsgIds = [
|
||||
...scanResult.archives.map((m) => m.id),
|
||||
...scanResult.photos.map((p) => p.id),
|
||||
];
|
||||
if (allMsgIds.length > 0) {
|
||||
const maxId = allMsgIds.reduce((a, b) => (a > b ? a : b));
|
||||
await upsertTopicProgress(
|
||||
mapping.id,
|
||||
topic.topicId,
|
||||
topic.name,
|
||||
maxId
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// ── Non-forum channel: flat scan (existing behavior) ──
|
||||
await updateRunActivity(activeRunId, {
|
||||
currentActivity: `Scanning "${channel.title}" for new archives`,
|
||||
currentStep: "scanning",
|
||||
currentChannel: channel.title,
|
||||
currentFile: null,
|
||||
currentFileNum: null,
|
||||
totalFiles: null,
|
||||
downloadedBytes: null,
|
||||
totalBytes: null,
|
||||
downloadPercent: null,
|
||||
});
|
||||
|
||||
accountLog.info(
|
||||
{ channelId: channel.id, title: channel.title },
|
||||
"Processing source channel"
|
||||
);
|
||||
|
||||
const scanResult = await getChannelMessages(
|
||||
client,
|
||||
channel.telegramId,
|
||||
mapping.lastProcessedMessageId
|
||||
);
|
||||
|
||||
if (scanResult.archives.length === 0) {
|
||||
accountLog.debug({ channelId: channel.id }, "No new archives");
|
||||
continue;
|
||||
}
|
||||
|
||||
accountLog.info(
|
||||
{ archives: scanResult.archives.length, photos: scanResult.photos.length },
|
||||
"Found messages in channel"
|
||||
);
|
||||
|
||||
// For non-forum, creator comes from filename (set to null, resolved per-archive)
|
||||
pipelineCtx.topicCreator = null;
|
||||
pipelineCtx.sourceTopicId = null;
|
||||
pipelineCtx.channelTitle = channel.title;
|
||||
|
||||
await processArchiveSets(pipelineCtx, scanResult, run.id);
|
||||
|
||||
// Update last processed message
|
||||
const allMsgIds = [
|
||||
...scanResult.archives.map((m) => m.id),
|
||||
...scanResult.photos.map((p) => p.id),
|
||||
];
|
||||
if (allMsgIds.length > 0) {
|
||||
const maxId = allMsgIds.reduce((a, b) => (a > b ? a : b));
|
||||
await updateLastProcessedMessage(mapping.id, maxId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Done ──
|
||||
await completeIngestionRun(activeRunId, counters);
|
||||
accountLog.info({ counters }, "Ingestion run completed");
|
||||
} finally {
|
||||
await closeTdlibClient(client);
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
accountLog.error({ err }, "Ingestion run failed");
|
||||
if (runId) {
|
||||
await failIngestionRun(runId, message).catch((e) =>
|
||||
accountLog.error({ e }, "Failed to mark run as failed")
|
||||
);
|
||||
}
|
||||
} finally {
|
||||
await releaseLock(account.id);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a scan result through the archive pipeline:
|
||||
* group → download → hash → dedup → metadata → split → upload → preview → index.
|
||||
*/
|
||||
async function processArchiveSets(
|
||||
ctx: PipelineContext,
|
||||
scanResult: ChannelScanResult,
|
||||
ingestionRunId: string
|
||||
): Promise<void> {
|
||||
const { client, runId, channelTitle, channel, throttled, counters, accountLog } = ctx;
|
||||
|
||||
// Group into archive sets
|
||||
const archiveSets = groupArchiveSets(scanResult.archives);
|
||||
counters.zipsFound += archiveSets.length;
|
||||
|
||||
// Match preview photos to archive sets
|
||||
const previewMatches = matchPreviewToArchive(
|
||||
scanResult.photos,
|
||||
archiveSets.map((s) => ({
|
||||
baseName: s.baseName,
|
||||
firstMessageId: s.parts[0].id,
|
||||
firstMessageDate: s.parts[0].date,
|
||||
}))
|
||||
);
|
||||
|
||||
if (previewMatches.size > 0) {
|
||||
accountLog.info(
|
||||
{ matched: previewMatches.size, total: archiveSets.length },
|
||||
"Matched preview photos to archives"
|
||||
);
|
||||
}
|
||||
|
||||
await updateRunActivity(runId, {
|
||||
currentActivity: `Found ${archiveSets.length} archive(s) in "${channelTitle}"`,
|
||||
currentStep: "scanning",
|
||||
currentChannel: channelTitle,
|
||||
totalFiles: archiveSets.length,
|
||||
zipsFound: counters.zipsFound,
|
||||
});
|
||||
|
||||
for (let setIdx = 0; setIdx < archiveSets.length; setIdx++) {
|
||||
await processOneArchiveSet(
|
||||
ctx,
|
||||
archiveSets[setIdx],
|
||||
setIdx,
|
||||
archiveSets.length,
|
||||
previewMatches,
|
||||
ingestionRunId
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single archive set through the full pipeline.
|
||||
*/
|
||||
async function processOneArchiveSet(
|
||||
ctx: PipelineContext,
|
||||
archiveSet: ArchiveSet,
|
||||
setIdx: number,
|
||||
totalSets: number,
|
||||
previewMatches: Map<string, { id: bigint; fileId: string }>,
|
||||
ingestionRunId: string
|
||||
): Promise<void> {
|
||||
const {
|
||||
client, runId, channelTitle, channel,
|
||||
destChannelTelegramId, destChannelId,
|
||||
throttled, counters, topicCreator, sourceTopicId, accountLog,
|
||||
} = ctx;
|
||||
|
||||
counters.messagesScanned += archiveSet.parts.length;
|
||||
const archiveName = archiveSet.parts[0].fileName;
|
||||
const tempPaths: string[] = [];
|
||||
let splitPaths: string[] = [];
|
||||
|
||||
try {
|
||||
// ── Downloading ──
|
||||
for (let partIdx = 0; partIdx < archiveSet.parts.length; partIdx++) {
|
||||
const part = archiveSet.parts[partIdx];
|
||||
const tempPath = path.join(
|
||||
config.tempDir,
|
||||
`${ingestionRunId}_${part.id}_${part.fileName}`
|
||||
);
|
||||
|
||||
const partLabel = archiveSet.parts.length > 1
|
||||
? ` (part ${partIdx + 1}/${archiveSet.parts.length})`
|
||||
: "";
|
||||
|
||||
await updateRunActivity(runId, {
|
||||
currentActivity: `Downloading ${part.fileName}${partLabel}`,
|
||||
currentStep: "downloading",
|
||||
currentChannel: channelTitle,
|
||||
currentFile: part.fileName,
|
||||
currentFileNum: setIdx + 1,
|
||||
totalFiles: totalSets,
|
||||
downloadedBytes: 0n,
|
||||
totalBytes: part.fileSize,
|
||||
downloadPercent: 0,
|
||||
messagesScanned: counters.messagesScanned,
|
||||
});
|
||||
|
||||
accountLog.info(
|
||||
{
|
||||
fileName: part.fileName,
|
||||
fileSize: Number(part.fileSize),
|
||||
part: partIdx + 1,
|
||||
totalParts: archiveSet.parts.length,
|
||||
},
|
||||
"Downloading archive part"
|
||||
);
|
||||
|
||||
await downloadFile(
|
||||
client,
|
||||
part.fileId,
|
||||
tempPath,
|
||||
part.fileSize,
|
||||
part.fileName,
|
||||
(progress: DownloadProgress) => {
|
||||
throttled.update({
|
||||
currentActivity: `Downloading ${part.fileName}${partLabel} — ${progress.percent}%`,
|
||||
currentStep: "downloading",
|
||||
currentChannel: channelTitle,
|
||||
currentFile: part.fileName,
|
||||
currentFileNum: setIdx + 1,
|
||||
totalFiles: totalSets,
|
||||
downloadedBytes: BigInt(progress.downloadedBytes),
|
||||
totalBytes: BigInt(progress.totalBytes),
|
||||
downloadPercent: progress.percent,
|
||||
});
|
||||
}
|
||||
);
|
||||
await throttled.flush();
|
||||
tempPaths.push(tempPath);
|
||||
}
|
||||
|
||||
// ── Hashing ──
|
||||
await updateRunActivity(runId, {
|
||||
currentActivity: `Computing hash for ${archiveName}`,
|
||||
currentStep: "hashing",
|
||||
currentChannel: channelTitle,
|
||||
currentFile: archiveName,
|
||||
currentFileNum: setIdx + 1,
|
||||
totalFiles: totalSets,
|
||||
downloadedBytes: null,
|
||||
totalBytes: null,
|
||||
downloadPercent: null,
|
||||
});
|
||||
|
||||
const contentHash = await hashParts(tempPaths);
|
||||
|
||||
// ── Deduplicating ──
|
||||
await updateRunActivity(runId, {
|
||||
currentActivity: `Checking if ${archiveName} is a duplicate`,
|
||||
currentStep: "deduplicating",
|
||||
currentChannel: channelTitle,
|
||||
currentFile: archiveName,
|
||||
currentFileNum: setIdx + 1,
|
||||
totalFiles: totalSets,
|
||||
});
|
||||
|
||||
const exists = await packageExistsByHash(contentHash);
|
||||
if (exists) {
|
||||
counters.zipsDuplicate++;
|
||||
accountLog.debug({ contentHash }, "Duplicate archive, skipping");
|
||||
|
||||
await updateRunActivity(runId, {
|
||||
currentActivity: `Skipped ${archiveName} (duplicate)`,
|
||||
currentStep: "deduplicating",
|
||||
currentChannel: channelTitle,
|
||||
currentFile: archiveName,
|
||||
currentFileNum: setIdx + 1,
|
||||
totalFiles: totalSets,
|
||||
zipsDuplicate: counters.zipsDuplicate,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Reading metadata ──
|
||||
await updateRunActivity(runId, {
|
||||
currentActivity: `Reading file list from ${archiveName}`,
|
||||
currentStep: "reading_metadata",
|
||||
currentChannel: channelTitle,
|
||||
currentFile: archiveName,
|
||||
currentFileNum: setIdx + 1,
|
||||
totalFiles: totalSets,
|
||||
});
|
||||
|
||||
let entries: { path: string; fileName: string; extension: string | null; compressedSize: bigint; uncompressedSize: bigint; crc32: string | null }[] = [];
|
||||
try {
|
||||
if (archiveSet.type === "ZIP") {
|
||||
entries = await readZipCentralDirectory(tempPaths);
|
||||
} else {
|
||||
entries = await readRarContents(tempPaths[0]);
|
||||
}
|
||||
} catch (err) {
|
||||
accountLog.warn({ err, baseName: archiveSet.baseName }, "Failed to read archive metadata, ingesting without file list");
|
||||
}
|
||||
|
||||
// ── Splitting (if needed) ──
|
||||
let uploadPaths = tempPaths;
|
||||
const totalSize = archiveSet.parts.reduce(
|
||||
(sum, p) => sum + p.fileSize,
|
||||
0n
|
||||
);
|
||||
|
||||
if (!archiveSet.isMultipart && totalSize > 2n * 1024n * 1024n * 1024n) {
|
||||
await updateRunActivity(runId, {
|
||||
currentActivity: `Splitting ${archiveName} for upload (>2GB)`,
|
||||
currentStep: "splitting",
|
||||
currentChannel: channelTitle,
|
||||
currentFile: archiveName,
|
||||
currentFileNum: setIdx + 1,
|
||||
totalFiles: totalSets,
|
||||
});
|
||||
splitPaths = await byteLevelSplit(tempPaths[0]);
|
||||
uploadPaths = splitPaths;
|
||||
}
|
||||
|
||||
// ── Uploading ──
|
||||
const uploadLabel = uploadPaths.length > 1
|
||||
? ` (${uploadPaths.length} parts)`
|
||||
: "";
|
||||
await updateRunActivity(runId, {
|
||||
currentActivity: `Uploading ${archiveName} to archive channel${uploadLabel}`,
|
||||
currentStep: "uploading",
|
||||
currentChannel: channelTitle,
|
||||
currentFile: archiveName,
|
||||
currentFileNum: setIdx + 1,
|
||||
totalFiles: totalSets,
|
||||
});
|
||||
|
||||
const destResult = await uploadToChannel(
|
||||
client,
|
||||
destChannelTelegramId,
|
||||
uploadPaths
|
||||
);
|
||||
|
||||
// ── Preview thumbnail ──
|
||||
let previewData: Buffer | null = null;
|
||||
let previewMsgId: bigint | null = null;
|
||||
const matchedPhoto = previewMatches.get(archiveSet.baseName);
|
||||
if (matchedPhoto) {
|
||||
await updateRunActivity(runId, {
|
||||
currentActivity: `Downloading preview image for ${archiveName}`,
|
||||
currentStep: "preview",
|
||||
currentChannel: channelTitle,
|
||||
currentFile: archiveName,
|
||||
currentFileNum: setIdx + 1,
|
||||
totalFiles: totalSets,
|
||||
});
|
||||
previewData = await downloadPhotoThumbnail(client, matchedPhoto.fileId);
|
||||
previewMsgId = matchedPhoto.id;
|
||||
}
|
||||
|
||||
// ── Resolve creator: topic name > filename extraction > null ──
|
||||
const creator = topicCreator ?? extractCreatorFromFileName(archiveName) ?? null;
|
||||
|
||||
// ── Indexing ──
|
||||
await updateRunActivity(runId, {
|
||||
currentActivity: `Saving metadata for ${archiveName} (${entries.length} files)`,
|
||||
currentStep: "indexing",
|
||||
currentChannel: channelTitle,
|
||||
currentFile: archiveName,
|
||||
currentFileNum: setIdx + 1,
|
||||
totalFiles: totalSets,
|
||||
});
|
||||
|
||||
await createPackageWithFiles({
|
||||
contentHash,
|
||||
fileName: archiveName,
|
||||
fileSize: totalSize,
|
||||
archiveType: archiveSet.type,
|
||||
sourceChannelId: channel.id,
|
||||
sourceMessageId: archiveSet.parts[0].id,
|
||||
sourceTopicId,
|
||||
destChannelId,
|
||||
destMessageId: destResult.messageId,
|
||||
isMultipart:
|
||||
archiveSet.parts.length > 1 || uploadPaths.length > 1,
|
||||
partCount: uploadPaths.length,
|
||||
ingestionRunId,
|
||||
creator,
|
||||
previewData,
|
||||
previewMsgId,
|
||||
files: entries,
|
||||
});
|
||||
|
||||
counters.zipsIngested++;
|
||||
|
||||
await updateRunActivity(runId, {
|
||||
currentActivity: `Ingested ${archiveName} (${entries.length} files indexed)`,
|
||||
currentStep: "complete",
|
||||
currentChannel: channelTitle,
|
||||
currentFile: archiveName,
|
||||
currentFileNum: setIdx + 1,
|
||||
totalFiles: totalSets,
|
||||
zipsIngested: counters.zipsIngested,
|
||||
});
|
||||
|
||||
accountLog.info(
|
||||
{ fileName: archiveName, contentHash, fileCount: entries.length, creator },
|
||||
"Archive ingested"
|
||||
);
|
||||
} finally {
|
||||
// ALWAYS delete temp files
|
||||
await deleteFiles([...tempPaths, ...splitPaths]);
|
||||
}
|
||||
}
|
||||
|
||||
async function deleteFiles(paths: string[]): Promise<void> {
|
||||
for (const p of paths) {
|
||||
try {
|
||||
await unlink(p);
|
||||
} catch {
|
||||
// File may already be deleted or never created
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up any leftover temp files from previous runs.
|
||||
*/
|
||||
export async function cleanupTempDir(): Promise<void> {
|
||||
try {
|
||||
const files = await readdir(config.tempDir);
|
||||
for (const file of files) {
|
||||
await unlink(path.join(config.tempDir, file)).catch(() => {});
|
||||
}
|
||||
if (files.length > 0) {
|
||||
log.info({ count: files.length }, "Cleaned up stale temp files");
|
||||
}
|
||||
} catch {
|
||||
// Directory might not exist yet
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user