Fix worker getting stuck during sync: add timeouts, stuck detection, and safety limits

- Add invokeWithTimeout wrapper for TDLib API calls (2min timeout per call)
- Add stuck detection to getChannelMessages: break if from_message_id doesn't advance
- Add stuck detection to getTopicMessages: same protection for topic scanning
- Add stuck detection to getForumTopicList: break if pagination offsets don't advance
- Add max page limit (5000) to all scanning loops to prevent infinite pagination
- Add mutex wait timeout (30min) to prevent indefinite blocking when holder hangs
- Add cycle timeout (4h default, configurable via WORKER_CYCLE_TIMEOUT_MINUTES)
- Fix end-of-page detection to use actual limit value instead of hardcoded 100

Co-authored-by: xCyanGrizzly <53275238+xCyanGrizzly@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2026-03-05 13:14:53 +00:00
parent ad71346468
commit 9adbdb2a77
73 changed files with 3945 additions and 40 deletions

12
worker/dist/archive/creator.d.ts vendored Normal file
View File

@@ -0,0 +1,12 @@
/**
* Extract a creator name from common archive file naming patterns.
*
* Priority in the worker: topic name > filename extraction.
* This is the fallback when no forum topic name is available.
*
* Patterns handled (split on ` - `):
* "Mammoth Factory - 2026-01.zip" → "Mammoth Factory"
* "Artist Name - Pack Title.part01.rar" → "Artist Name"
* "some_random_file.zip" → null
*/
export declare function extractCreatorFromFileName(fileName: string): string | null;

21
worker/dist/archive/creator.js vendored Normal file
View File

@@ -0,0 +1,21 @@
/**
* Extract a creator name from common archive file naming patterns.
*
* Priority in the worker: topic name > filename extraction.
* This is the fallback when no forum topic name is available.
*
* Patterns handled (split on ` - `):
* "Mammoth Factory - 2026-01.zip" → "Mammoth Factory"
* "Artist Name - Pack Title.part01.rar" → "Artist Name"
* "some_random_file.zip" → null
*/
export function extractCreatorFromFileName(fileName) {
// Strip archive extensions (.zip, .rar, .part01.rar, .z01, etc.)
const bare = fileName.replace(/(\.(part\d+\.rar|z\d{2}|zip|rar))+$/i, "");
const idx = bare.indexOf(" - ");
if (idx <= 0)
return null;
const creator = bare.slice(0, idx).trim();
return creator.length > 0 ? creator : null;
}
//# sourceMappingURL=creator.js.map

1
worker/dist/archive/creator.js.map vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"creator.js","sourceRoot":"","sources":["../../src/archive/creator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AACH,MAAM,UAAU,0BAA0B,CAAC,QAAgB;IACzD,iEAAiE;IACjE,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,sCAAsC,EAAE,EAAE,CAAC,CAAC;IAE1E,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAChC,IAAI,GAAG,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAE1B,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1C,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7C,CAAC"}

15
worker/dist/archive/detect.d.ts vendored Normal file
View File

@@ -0,0 +1,15 @@
export type ArchiveFormat = "ZIP" | "RAR";
export interface MultipartInfo {
baseName: string;
partNumber: number;
format: ArchiveFormat;
pattern: "ZIP_NUMBERED" | "ZIP_LEGACY" | "RAR_PART" | "RAR_LEGACY" | "SINGLE";
}
/**
* Detect if a filename is an archive and extract multipart info.
*/
export declare function detectArchive(fileName: string): MultipartInfo | null;
/**
* Check if a filename looks like any archive attachment we should process.
*/
export declare function isArchiveAttachment(fileName: string): boolean;

77
worker/dist/archive/detect.js vendored Normal file
View File

@@ -0,0 +1,77 @@
const patterns = [
// pack.zip.001, pack.zip.002
{
regex: /^(.+\.zip)\.(\d{3,})$/i,
format: "ZIP",
pattern: "ZIP_NUMBERED",
getBaseName: (m) => m[1],
getPartNumber: (m) => parseInt(m[2], 10),
},
// pack.z01, pack.z02 (legacy split — final part is pack.zip)
{
regex: /^(.+)\.z(\d{2,})$/i,
format: "ZIP",
pattern: "ZIP_LEGACY",
getBaseName: (m) => m[1],
getPartNumber: (m) => parseInt(m[2], 10),
},
// pack.part1.rar, pack.part2.rar
{
regex: /^(.+)\.part(\d+)\.rar$/i,
format: "RAR",
pattern: "RAR_PART",
getBaseName: (m) => m[1],
getPartNumber: (m) => parseInt(m[2], 10),
},
// pack.r00, pack.r01 (legacy split — final part is pack.rar)
{
regex: /^(.+)\.r(\d{2,})$/i,
format: "RAR",
pattern: "RAR_LEGACY",
getBaseName: (m) => m[1],
getPartNumber: (m) => parseInt(m[2], 10),
},
];
/**
* Detect if a filename is an archive and extract multipart info.
*/
export function detectArchive(fileName) {
// Check multipart patterns first
for (const p of patterns) {
const match = fileName.match(p.regex);
if (match) {
return {
baseName: p.getBaseName(match),
partNumber: p.getPartNumber(match),
format: p.format,
pattern: p.pattern,
};
}
}
// Single .zip file — could be a standalone or the final part of a ZIP_LEGACY set
if (/\.zip$/i.test(fileName)) {
return {
baseName: fileName.replace(/\.zip$/i, ""),
partNumber: -1, // -1 signals "could be single or final legacy part"
format: "ZIP",
pattern: "SINGLE",
};
}
// Single .rar file — could be standalone or final part of RAR_LEGACY set
if (/\.rar$/i.test(fileName)) {
return {
baseName: fileName.replace(/\.rar$/i, ""),
partNumber: -1,
format: "RAR",
pattern: "SINGLE",
};
}
return null;
}
/**
* Check if a filename looks like any archive attachment we should process.
*/
export function isArchiveAttachment(fileName) {
return detectArchive(fileName) !== null;
}
//# sourceMappingURL=detect.js.map

1
worker/dist/archive/detect.js.map vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"detect.js","sourceRoot":"","sources":["../../src/archive/detect.ts"],"names":[],"mappings":"AASA,MAAM,QAAQ,GAMR;IACJ,6BAA6B;IAC7B;QACE,KAAK,EAAE,wBAAwB;QAC/B,MAAM,EAAE,KAAK;QACb,OAAO,EAAE,cAAc;QACvB,WAAW,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACxB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;KACzC;IACD,6DAA6D;IAC7D;QACE,KAAK,EAAE,oBAAoB;QAC3B,MAAM,EAAE,KAAK;QACb,OAAO,EAAE,YAAY;QACrB,WAAW,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACxB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;KACzC;IACD,iCAAiC;IACjC;QACE,KAAK,EAAE,yBAAyB;QAChC,MAAM,EAAE,KAAK;QACb,OAAO,EAAE,UAAU;QACnB,WAAW,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACxB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;KACzC;IACD,6DAA6D;IAC7D;QACE,KAAK,EAAE,oBAAoB;QAC3B,MAAM,EAAE,KAAK;QACb,OAAO,EAAE,YAAY;QACrB,WAAW,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACxB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;KACzC;CACF,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,QAAgB;IAC5C,iCAAiC;IACjC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QACtC,IAAI,KAAK,EAAE,CAAC;YACV,OAAO;gBACL,QAAQ,EAAE,CAAC,CAAC,WAAW,CAAC,KAAK,CAAC;gBAC9B,UAAU,EAAE,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC;gBAClC,MAAM,EAAE,CAAC,CAAC,MAAM;gBAChB,OAAO,EAAE,CAAC,CAAC,OAAO;aACnB,CAAC;QACJ,CAAC;IACH,CAAC;IAED,iFAAiF;IACjF,IAAI,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC7B,OAAO;YACL,QAAQ,EAAE,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC;YACzC,UAAU,EAAE,CAAC,CAAC,EAAE,oDAAoD;YACpE,MAAM,EAAE,KAAK;YACb,OAAO,EAAE,QAAQ;SAClB,CAAC;IACJ,CAAC;IAED,yEAAyE;IACzE,IAAI,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC7B,OAAO;YACL,QAAQ,EAAE,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC;YACzC,UAAU,EAAE,CAAC,CAAC;YACd,MAAM,EAAE,KAAK;YACb,OAAO,EAAE,QAAQ;SAClB,CAAC;IACJ,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,QAAgB;IAClD,OAAO,aAAa,CAAC,QAAQ,CAAC,KAAK,IAAI,CAAC;AAC1C,CAAC"}

6
worker/dist/archive/hash.d.ts vendored Normal file
View File

@@ -0,0 +1,6 @@
/**
* Compute SHA-256 hash of one or more files by streaming them in order.
* Memory usage: O(1) — reads in 64KB chunks regardless of total size.
* For multipart archives, pass all parts sorted by part number.
*/
export declare function hashParts(filePaths: string[]): Promise<string>;

22
worker/dist/archive/hash.js vendored Normal file
View File

@@ -0,0 +1,22 @@
import { createReadStream } from "fs";
import { createHash } from "crypto";
import { pipeline } from "stream/promises";
import { PassThrough } from "stream";
/**
* Compute SHA-256 hash of one or more files by streaming them in order.
* Memory usage: O(1) — reads in 64KB chunks regardless of total size.
* For multipart archives, pass all parts sorted by part number.
*/
export async function hashParts(filePaths) {
const hash = createHash("sha256");
for (const filePath of filePaths) {
await pipeline(createReadStream(filePath), new PassThrough({
transform(chunk, _encoding, callback) {
hash.update(chunk);
callback();
},
}));
}
return hash.digest("hex");
}
//# sourceMappingURL=hash.js.map

1
worker/dist/archive/hash.js.map vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"hash.js","sourceRoot":"","sources":["../../src/archive/hash.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,IAAI,CAAC;AACtC,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAC3C,OAAO,EAAE,WAAW,EAAE,MAAM,QAAQ,CAAC;AAErC;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,SAAmB;IACjD,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;IAClC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,MAAM,QAAQ,CACZ,gBAAgB,CAAC,QAAQ,CAAC,EAC1B,IAAI,WAAW,CAAC;YACd,SAAS,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ;gBAClC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBACnB,QAAQ,EAAE,CAAC;YACb,CAAC;SACF,CAAC,CACH,CAAC;IACJ,CAAC;IACD,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC5B,CAAC"}

19
worker/dist/archive/multipart.d.ts vendored Normal file
View File

@@ -0,0 +1,19 @@
import { type ArchiveFormat } from "./detect.js";
export interface TelegramMessage {
id: bigint;
fileName: string;
fileId: string;
fileSize: bigint;
date: Date;
}
export interface ArchiveSet {
type: ArchiveFormat;
baseName: string;
parts: TelegramMessage[];
isMultipart: boolean;
}
/**
* Group messages into archive sets (single files + multipart groups).
* Messages should be pre-filtered to only include archive attachments.
*/
export declare function groupArchiveSets(messages: TelegramMessage[]): ArchiveSet[];

74
worker/dist/archive/multipart.js vendored Normal file
View File

@@ -0,0 +1,74 @@
import { detectArchive } from "./detect.js";
import { config } from "../util/config.js";
import { childLogger } from "../util/logger.js";
const log = childLogger("multipart");
/**
* Group messages into archive sets (single files + multipart groups).
* Messages should be pre-filtered to only include archive attachments.
*/
export function groupArchiveSets(messages) {
// Detect and annotate each message
const annotated = [];
for (const msg of messages) {
const info = detectArchive(msg.fileName);
if (info) {
annotated.push({ msg, info });
}
}
// Group by baseName + format
const groups = new Map();
for (const item of annotated) {
const key = `${item.info.format}:${item.info.baseName.toLowerCase()}`;
const group = groups.get(key) ?? [];
group.push(item);
groups.set(key, group);
}
const results = [];
for (const [, group] of groups) {
const format = group[0].info.format;
const baseName = group[0].info.baseName;
// Separate explicit multipart entries from potential singles
const multipartEntries = group.filter((g) => g.info.pattern !== "SINGLE");
const singleEntries = group.filter((g) => g.info.pattern === "SINGLE");
if (multipartEntries.length > 0) {
// This is a multipart set
// Check if any single entry is the "final part" of a legacy split
const allEntries = [...multipartEntries, ...singleEntries];
// Check time span — skip if parts span too long (0 = no limit)
if (config.multipartTimeoutHours > 0) {
const dates = allEntries.map((e) => e.msg.date.getTime());
const span = Math.max(...dates) - Math.min(...dates);
const maxSpanMs = config.multipartTimeoutHours * 60 * 60 * 1000;
if (span > maxSpanMs) {
log.warn({ baseName, format, span: span / 3600000 }, "Multipart set spans too long, skipping");
continue;
}
}
// Sort by part number (singles get a very high number so they come last — they're the final part)
allEntries.sort((a, b) => {
const aNum = a.info.partNumber === -1 ? 999999 : a.info.partNumber;
const bNum = b.info.partNumber === -1 ? 999999 : b.info.partNumber;
return aNum - bNum;
});
results.push({
type: format,
baseName,
parts: allEntries.map((e) => e.msg),
isMultipart: true,
});
}
else {
// All entries are singles — each is its own archive set
for (const entry of singleEntries) {
results.push({
type: format,
baseName: entry.info.baseName,
parts: [entry.msg],
isMultipart: false,
});
}
}
}
return results;
}
//# sourceMappingURL=multipart.js.map

1
worker/dist/archive/multipart.js.map vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"multipart.js","sourceRoot":"","sources":["../../src/archive/multipart.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAA0C,MAAM,aAAa,CAAC;AACpF,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAC3C,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAEhD,MAAM,GAAG,GAAG,WAAW,CAAC,WAAW,CAAC,CAAC;AAiBrC;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,QAA2B;IAC1D,mCAAmC;IACnC,MAAM,SAAS,GAAoD,EAAE,CAAC;IACtE,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACzC,IAAI,IAAI,EAAE,CAAC;YACT,SAAS,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IAED,6BAA6B;IAC7B,MAAM,MAAM,GAAG,IAAI,GAAG,EAA2D,CAAC;IAClF,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC7B,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,EAAE,CAAC;QACtE,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QACpC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjB,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IACzB,CAAC;IAED,MAAM,OAAO,GAAiB,EAAE,CAAC;IAEjC,KAAK,MAAM,CAAC,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;QAC/B,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC;QACpC,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC;QAExC,6DAA6D;QAC7D,MAAM,gBAAgB,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,KAAK,QAAQ,CAAC,CAAC;QAC1E,MAAM,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,KAAK,QAAQ,CAAC,CAAC;QAEvE,IAAI,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChC,0BAA0B;YAC1B,kEAAkE;YAClE,MAAM,UAAU,GAAG,CAAC,GAAG,gBAAgB,EAAE,GAAG,aAAa,CAAC,CAAC;YAE3D,+DAA+D;YAC/D,IAAI,MAAM,CAAC,qBAAqB,GAAG,CAAC,EAAE,CAAC;gBACrC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;gBAC1D,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC;gBACrD,MAAM,SAAS,GAAG,MAAM,CAAC,qBAAqB,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;gBAEhE,IAAI,IAAI,GAAG,SAAS,EAAE,CAAC;oBACrB,GAAG,CAAC,IAAI,CACN,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,GAAG,OAAO,EAAE,EAC1C,wCAAwC,CACzC,CAAC;oBACF,SAAS;gBACX,CAAC;YACH,CAAC;YAED,kGAAkG;YAClG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;gBACvB,MAAM,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,UAAU,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC;gBACnE,MAAM,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,UAAU,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC;gBACnE,OAAO,IAAI,GAAG,IAAI,CAAC;YACrB,CAAC,CAAC,CAAC;YAEH,OAAO,CAAC,IAAI,CAAC;gBACX,IAAI,EAAE,MAAM;gBACZ,QAAQ;gBACR,KAAK,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;gBACnC,WAAW,EAAE,IAAI;aAClB,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,wDAAwD;YACxD,KAAK,MAAM,KAAK,IAAI,aAAa,EAAE,CAAC;gBAClC,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,MAAM;oBACZ,QAAQ,EAAE,KAAK,CAAC,IAAI,CAAC,QAAQ;oBAC7B,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC;oBAClB,WAAW,EAAE,KAAK;iBACnB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}

6
worker/dist/archive/rar-reader.d.ts vendored Normal file
View File

@@ -0,0 +1,6 @@
import type { FileEntry } from "./zip-reader.js";
/**
* Parse output of `unrar l -v <file>` to extract file metadata.
* unrar automatically discovers sibling parts when they're co-located.
*/
export declare function readRarContents(firstPartPath: string): Promise<FileEntry[]>;

77
worker/dist/archive/rar-reader.js vendored Normal file
View File

@@ -0,0 +1,77 @@
import { execFile } from "child_process";
import { promisify } from "util";
import path from "path";
import { childLogger } from "../util/logger.js";
const execFileAsync = promisify(execFile);
const log = childLogger("rar-reader");
/**
* Parse output of `unrar l -v <file>` to extract file metadata.
* unrar automatically discovers sibling parts when they're co-located.
*/
export async function readRarContents(firstPartPath) {
try {
const { stdout } = await execFileAsync("unrar", ["l", "-v", firstPartPath], {
timeout: 30000,
maxBuffer: 10 * 1024 * 1024, // 10MB for very large archives
});
return parseUnrarOutput(stdout);
}
catch (err) {
log.warn({ err, file: firstPartPath }, "Failed to read RAR contents");
return []; // Fallback: return empty on error
}
}
/**
* Parse the tabular output of `unrar l -v`.
*
* Example output format:
* Archive: test.rar
* Details: RAR 5
*
* Attributes Size Packed Ratio Date Time CRC-32 Name
* ----------- --------- --------- ----- -------- ----- -------- ----
* ...A.... 12345 10234 83% 2024-01-15 10:30 DEADBEEF folder/file.stl
* ----------- --------- --------- ----- -------- ----- -------- ----
*/
function parseUnrarOutput(output) {
const entries = [];
const lines = output.split("\n");
let inFileList = false;
let separatorCount = 0;
for (const line of lines) {
const trimmed = line.trim();
// Detect separator lines (------- pattern)
if (/^-{5,}/.test(trimmed)) {
separatorCount++;
if (separatorCount === 1) {
inFileList = true;
}
else if (separatorCount >= 2) {
inFileList = false;
}
continue;
}
if (!inFileList)
continue;
// Parse file entry line
// Format: Attributes Size Packed Ratio Date Time CRC Name
const match = trimmed.match(/^\S+\s+(\d+)\s+(\d+)\s+\d+%\s+\S+\s+\S+\s+([0-9A-Fa-f]+)\s+(.+)$/);
if (match) {
const [, uncompressedStr, compressedStr, crc32, filePath] = match;
// Skip directory entries (typically end with / or have size 0 with dir attributes)
if (filePath.endsWith("/") || filePath.endsWith("\\"))
continue;
const ext = path.extname(filePath).toLowerCase();
entries.push({
path: filePath,
fileName: path.basename(filePath),
extension: ext ? ext.slice(1) : null,
compressedSize: BigInt(compressedStr),
uncompressedSize: BigInt(uncompressedStr),
crc32: crc32.toLowerCase(),
});
}
}
return entries;
}
//# sourceMappingURL=rar-reader.js.map

1
worker/dist/archive/rar-reader.js.map vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"rar-reader.js","sourceRoot":"","sources":["../../src/archive/rar-reader.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAE,SAAS,EAAE,MAAM,MAAM,CAAC;AACjC,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGhD,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC;AAC1C,MAAM,GAAG,GAAG,WAAW,CAAC,YAAY,CAAC,CAAC;AAEtC;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,aAAqB;IAErB,IAAI,CAAC;QACH,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,IAAI,EAAE,aAAa,CAAC,EAAE;YAC1E,OAAO,EAAE,KAAK;YACd,SAAS,EAAE,EAAE,GAAG,IAAI,GAAG,IAAI,EAAE,+BAA+B;SAC7D,CAAC,CAAC;QAEH,OAAO,gBAAgB,CAAC,MAAM,CAAC,CAAC;IAClC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,aAAa,EAAE,EAAE,6BAA6B,CAAC,CAAC;QACtE,OAAO,EAAE,CAAC,CAAC,kCAAkC;IAC/C,CAAC;AACH,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAS,gBAAgB,CAAC,MAAc;IACtC,MAAM,OAAO,GAAgB,EAAE,CAAC;IAChC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAEjC,IAAI,UAAU,GAAG,KAAK,CAAC;IACvB,IAAI,cAAc,GAAG,CAAC,CAAC;IAEvB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAE5B,2CAA2C;QAC3C,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC3B,cAAc,EAAE,CAAC;YACjB,IAAI,cAAc,KAAK,CAAC,EAAE,CAAC;gBACzB,UAAU,GAAG,IAAI,CAAC;YACpB,CAAC;iBAAM,IAAI,cAAc,IAAI,CAAC,EAAE,CAAC;gBAC/B,UAAU,GAAG,KAAK,CAAC;YACrB,CAAC;YACD,SAAS;QACX,CAAC;QAED,IAAI,CAAC,UAAU;YAAE,SAAS;QAE1B,wBAAwB;QACxB,0DAA0D;QAC1D,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CACzB,kEAAkE,CACnE,CAAC;QAEF,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,CAAC,EAAE,eAAe,EAAE,aAAa,EAAE,KAAK,EAAE,QAAQ,CAAC,GAAG,KAAK,CAAC;YAElE,mFAAmF;YACnF,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC;gBAAE,SAAS;YAEhE,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;YACjD,OAAO,CAAC,IAAI,CAAC;gBACX,IAAI,EAAE,QAAQ;gBACd,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;gBACjC,SAAS,EAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI;gBACpC,cAAc,EAAE,MAAM,CAAC,aAAa,CAAC;gBACrC,gBAAgB,EAAE,MAAM,CAAC,eAAe,CAAC;gBACzC,KAAK,EAAE,KAAK,CAAC,WAAW,EAAE;aAC3B,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}

11
worker/dist/archive/split.d.ts vendored Normal file
View File

@@ -0,0 +1,11 @@
/**
* Split a file into ≤2GB parts using byte-level splitting.
* Returns paths to the split parts. If the file is already ≤2GB, returns the original path.
*/
export declare function byteLevelSplit(filePath: string): Promise<string[]>;
/**
* Concatenate multiple files into a single output file by streaming
* each input sequentially. Used for repacking multipart archives
* that have oversized parts (>2GB) before re-splitting.
*/
export declare function concatenateFiles(inputPaths: string[], outputPath: string): Promise<void>;

55
worker/dist/archive/split.js vendored Normal file
View File

@@ -0,0 +1,55 @@
import { createReadStream, createWriteStream } from "fs";
import { stat } from "fs/promises";
import path from "path";
import { pipeline } from "stream/promises";
import { childLogger } from "../util/logger.js";
const log = childLogger("split");
/** 2GB in bytes — Telegram's file size limit */
const MAX_PART_SIZE = 2n * 1024n * 1024n * 1024n;
/**
* Split a file into ≤2GB parts using byte-level splitting.
* Returns paths to the split parts. If the file is already ≤2GB, returns the original path.
*/
export async function byteLevelSplit(filePath) {
const stats = await stat(filePath);
const fileSize = BigInt(stats.size);
if (fileSize <= MAX_PART_SIZE) {
return [filePath];
}
const dir = path.dirname(filePath);
const baseName = path.basename(filePath);
const partSize = Number(MAX_PART_SIZE);
const totalParts = Math.ceil(Number(fileSize) / partSize);
const parts = [];
log.info({ filePath, fileSize: Number(fileSize), totalParts }, "Splitting file");
for (let i = 0; i < totalParts; i++) {
const partNum = String(i + 1).padStart(3, "0");
const partPath = path.join(dir, `${baseName}.${partNum}`);
const start = i * partSize;
const end = Math.min(start + partSize - 1, Number(fileSize) - 1);
await pipeline(createReadStream(filePath, { start, end }), createWriteStream(partPath));
parts.push(partPath);
}
log.info({ filePath, parts: parts.length }, "File split complete");
return parts;
}
/**
* Concatenate multiple files into a single output file by streaming
* each input sequentially. Used for repacking multipart archives
* that have oversized parts (>2GB) before re-splitting.
*/
export async function concatenateFiles(inputPaths, outputPath) {
const out = createWriteStream(outputPath);
for (let i = 0; i < inputPaths.length; i++) {
log.info({ part: i + 1, total: inputPaths.length, file: path.basename(inputPaths[i]) }, "Concatenating part");
await pipeline(createReadStream(inputPaths[i]), out, { end: false });
}
// Close the output stream
await new Promise((resolve, reject) => {
out.end(() => resolve());
out.on("error", reject);
});
const stats = await stat(outputPath);
log.info({ outputPath, totalBytes: stats.size, parts: inputPaths.length }, "Concatenation complete");
}
//# sourceMappingURL=split.js.map

1
worker/dist/archive/split.js.map vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"split.js","sourceRoot":"","sources":["../../src/archive/split.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,IAAI,CAAC;AACzD,OAAO,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AACnC,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAC3C,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAEhD,MAAM,GAAG,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;AAEjC,gDAAgD;AAChD,MAAM,aAAa,GAAG,EAAE,GAAG,KAAK,GAAG,KAAK,GAAG,KAAK,CAAC;AAEjD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,QAAgB;IACnD,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAC;IACnC,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAEpC,IAAI,QAAQ,IAAI,aAAa,EAAE,CAAC;QAC9B,OAAO,CAAC,QAAQ,CAAC,CAAC;IACpB,CAAC;IAED,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IACnC,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IACzC,MAAM,QAAQ,GAAG,MAAM,CAAC,aAAa,CAAC,CAAC;IACvC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,QAAQ,CAAC,CAAC;IAC1D,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,GAAG,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,CAAC,EAAE,UAAU,EAAE,EAAE,gBAAgB,CAAC,CAAC;IAEjF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,MAAM,OAAO,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,QAAQ,IAAI,OAAO,EAAE,CAAC,CAAC;QAC1D,MAAM,KAAK,GAAG,CAAC,GAAG,QAAQ,CAAC;QAC3B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,QAAQ,GAAG,CAAC,EAAE,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC;QAEjE,MAAM,QAAQ,CACZ,gBAAgB,CAAC,QAAQ,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,EAC1C,iBAAiB,CAAC,QAAQ,CAAC,CAC5B,CAAC;QAEF,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACvB,CAAC;IAED,GAAG,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,KAAK,CAAC,MAAM,EAAE,EAAE,qBAAqB,CAAC,CAAC;IACnE,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,UAAoB,EACpB,UAAkB;IAElB,MAAM,GAAG,GAAG,iBAAiB,CAAC,UAAU,CAAC,CAAC;IAE1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,GAAG,CAAC,IAAI,CACN,EAAE,IAAI,EAAE,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,UAAU,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,EAC7E,oBAAoB,CACrB,CAAC;QACF,MAAM,QAAQ,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;IACvE,CAAC;IAED,0BAA0B;IAC1B,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QAC1C,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,OAAO,EAAE,CAAC,CAAC;QACzB,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAC1B,CAAC,CAAC,CAAC;IAEH,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,CAAC;IACrC,GAAG,CAAC,IAAI,CACN,EAAE,UAAU,EAAE,UAAU,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE,UAAU,CAAC,MAAM,EAAE,EAChE,wBAAwB,CACzB,CAAC;AACJ,CAAC"}

15
worker/dist/archive/zip-reader.d.ts vendored Normal file
View File

@@ -0,0 +1,15 @@
export interface FileEntry {
path: string;
fileName: string;
extension: string | null;
compressedSize: bigint;
uncompressedSize: bigint;
crc32: string | null;
}
/**
* Read the central directory of a ZIP file without extracting any contents.
* For multipart ZIPs (.zip.001, .zip.002 etc.), uses a custom random-access
* reader that spans all parts seamlessly so yauzl can find the central
* directory at the end of the combined data.
*/
export declare function readZipCentralDirectory(filePaths: string[]): Promise<FileEntry[]>;

161
worker/dist/archive/zip-reader.js vendored Normal file
View File

@@ -0,0 +1,161 @@
import yauzl from "yauzl";
import { open as fsOpen, stat as fsStat } from "fs/promises";
import path from "path";
import { Readable } from "stream";
import { childLogger } from "../util/logger.js";
const log = childLogger("zip-reader");
/**
* Read the central directory of a ZIP file without extracting any contents.
* For multipart ZIPs (.zip.001, .zip.002 etc.), uses a custom random-access
* reader that spans all parts seamlessly so yauzl can find the central
* directory at the end of the combined data.
*/
export async function readZipCentralDirectory(filePaths) {
if (filePaths.length === 1) {
return readSingleZip(filePaths[0]);
}
// Multipart: use a spanning random-access reader
return readMultipartZip(filePaths);
}
/** Read a single (non-split) ZIP file. */
function readSingleZip(targetFile) {
return new Promise((resolve) => {
yauzl.open(targetFile, { lazyEntries: true, autoClose: true }, (err, zipFile) => {
if (err) {
log.warn({ err, file: targetFile }, "Failed to open ZIP for reading");
resolve([]);
return;
}
const entries = [];
zipFile.readEntry();
zipFile.on("entry", (entry) => {
if (!entry.fileName.endsWith("/")) {
const ext = path.extname(entry.fileName).toLowerCase();
entries.push({
path: entry.fileName,
fileName: path.basename(entry.fileName),
extension: ext ? ext.slice(1) : null,
compressedSize: BigInt(entry.compressedSize),
uncompressedSize: BigInt(entry.uncompressedSize),
crc32: entry.crc32 !== 0 ? entry.crc32.toString(16).padStart(8, "0") : null,
});
}
zipFile.readEntry();
});
zipFile.on("end", () => resolve(entries));
zipFile.on("error", (error) => {
log.warn({ error, file: targetFile }, "Error reading ZIP entries");
resolve(entries);
});
});
});
}
/**
* Read a multipart split ZIP using yauzl's RandomAccessReader API.
* This creates a virtual "file" that spans all parts so yauzl can
* seek freely across the entire archive to read the central directory.
*/
async function readMultipartZip(filePaths) {
// Get sizes of all parts
const partSizes = [];
for (const fp of filePaths) {
const s = await fsStat(fp);
partSizes.push(s.size);
}
const totalSize = partSizes.reduce((a, b) => a + b, 0);
log.debug({ parts: filePaths.length, totalSize }, "Reading multipart ZIP via spanning reader");
return new Promise((resolve) => {
const reader = createMultiPartReader(filePaths, partSizes);
yauzl.fromRandomAccessReader(reader, totalSize, { lazyEntries: true, autoClose: true }, (err, zipFile) => {
if (err) {
log.warn({ err }, "Failed to open multipart ZIP for reading");
reader.close(() => { });
resolve([]);
return;
}
const entries = [];
zipFile.readEntry();
zipFile.on("entry", (entry) => {
if (!entry.fileName.endsWith("/")) {
const ext = path.extname(entry.fileName).toLowerCase();
entries.push({
path: entry.fileName,
fileName: path.basename(entry.fileName),
extension: ext ? ext.slice(1) : null,
compressedSize: BigInt(entry.compressedSize),
uncompressedSize: BigInt(entry.uncompressedSize),
crc32: entry.crc32 !== 0 ? entry.crc32.toString(16).padStart(8, "0") : null,
});
}
zipFile.readEntry();
});
zipFile.on("end", () => {
log.info({ entries: entries.length }, "Multipart ZIP entries read");
resolve(entries);
});
zipFile.on("error", (error) => {
log.warn({ error }, "Error reading multipart ZIP entries");
resolve(entries);
});
});
});
}
/**
* Create a yauzl RandomAccessReader that reads across multiple split part files.
* Maps a global offset to the correct part file and local offset.
*
* Uses Object.create to properly inherit from yauzl.RandomAccessReader
* (whose constructor + prototype is defined at runtime, not as a TS class).
*/
function createMultiPartReader(filePaths, partSizes) {
// Build cumulative offset table
const partOffsets = [];
let offset = 0;
for (const size of partSizes) {
partOffsets.push(offset);
offset += size;
}
// Create an instance by calling the parent constructor
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const reader = new yauzl.RandomAccessReader();
// Override _readStreamForRange — yauzl calls this to read a range of bytes
// eslint-disable-next-line @typescript-eslint/no-explicit-any
reader._readStreamForRange = function (start, end) {
const readable = new Readable({ read() { } });
readRange(start, end, readable).catch((err) => {
readable.destroy(err);
});
return readable;
};
async function readRange(start, end, readable) {
let remaining = end - start;
let globalOffset = start;
while (remaining > 0) {
// Find which part this offset falls in
let partIdx = partOffsets.length - 1;
for (let i = 0; i < partOffsets.length; i++) {
if (i + 1 < partOffsets.length && globalOffset < partOffsets[i + 1]) {
partIdx = i;
break;
}
}
const localOffset = globalOffset - partOffsets[partIdx];
const partRemaining = partSizes[partIdx] - localOffset;
const toRead = Math.min(remaining, partRemaining);
const fh = await fsOpen(filePaths[partIdx], "r");
try {
const buf = Buffer.alloc(toRead);
const { bytesRead } = await fh.read(buf, 0, toRead, localOffset);
readable.push(buf.subarray(0, bytesRead));
remaining -= bytesRead;
globalOffset += bytesRead;
}
finally {
await fh.close();
}
}
readable.push(null); // Signal end of stream
}
return reader;
}
//# sourceMappingURL=zip-reader.js.map

1
worker/dist/archive/zip-reader.js.map vendored Normal file

File diff suppressed because one or more lines are too long