feat: add 7z archive content listing via p7zip

- Add p7zip-full to worker Docker image
- New read7zContents() parser using 7z l output
- 7z archives now get full file listings like ZIP/RAR
- Standalone DOCUMENT types still show as single entry
This commit is contained in:
admin
2026-03-21 21:13:58 +01:00
parent 9ac66e9d7d
commit a90f653314
3 changed files with 93 additions and 5 deletions

View File

@@ -0,0 +1,85 @@
import { execFile } from "child_process";
import { promisify } from "util";
import path from "path";
import { childLogger } from "../util/logger.js";
import type { FileEntry } from "./zip-reader.js";
const execFileAsync = promisify(execFile);
const log = childLogger("7z-reader");
/**
* Parse output of `7z l <file>` to extract file metadata.
*
* Example output:
* Date Time Attr Size Compressed Name
* ------------------- ----- ------------ ------------ ------------------------
* 2024-01-15 10:30:00 ....A 12345 10234 folder/file.stl
* ------------------- ----- ------------ ------------ ------------------------
*/
export async function read7zContents(
filePath: string
): Promise<FileEntry[]> {
try {
const { stdout } = await execFileAsync("7z", ["l", filePath], {
timeout: 30000,
maxBuffer: 10 * 1024 * 1024,
});
return parse7zOutput(stdout);
} catch (err) {
log.warn({ err, file: filePath }, "Failed to read 7z contents");
return [];
}
}
function parse7zOutput(output: string): FileEntry[] {
const entries: FileEntry[] = [];
const lines = output.split("\n");
let inFileList = false;
let separatorCount = 0;
for (const line of lines) {
const trimmed = line.trim();
// Detect separator lines (------- pattern)
if (/^-{5,}/.test(trimmed)) {
separatorCount++;
if (separatorCount === 1) {
inFileList = true;
} else if (separatorCount >= 2) {
inFileList = false;
}
continue;
}
if (!inFileList) continue;
// Parse: Date Time Attr Size Compressed Name
// 2024-01-15 10:30:00 ....A 12345 10234 folder/file.stl
const match = trimmed.match(
/^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\s+\S+\s+(\d+)\s+(\d+)\s+(.+)$/
);
if (match) {
const [, uncompressedStr, compressedStr, filePath] = match;
// Skip directory entries
if (filePath.endsWith("/") || filePath.endsWith("\\")) continue;
// Skip entries with 0 size (typically directories without trailing slash)
if (uncompressedStr === "0" && compressedStr === "0") continue;
const ext = path.extname(filePath).toLowerCase();
entries.push({
path: filePath,
fileName: path.basename(filePath),
extension: ext ? ext.slice(1) : null,
compressedSize: BigInt(compressedStr),
uncompressedSize: BigInt(uncompressedStr),
crc32: null,
});
}
}
return entries;
}

View File

@@ -40,6 +40,7 @@ import { extractCreatorFromFileName, extractCreatorFromChannelTitle } from "./ar
import { hashParts } from "./archive/hash.js";
import { readZipCentralDirectory } from "./archive/zip-reader.js";
import { readRarContents } from "./archive/rar-reader.js";
import { read7zContents } from "./archive/sevenz-reader.js";
import { byteLevelSplit, concatenateFiles } from "./archive/split.js";
import { uploadToChannel } from "./upload/channel.js";
import type { TelegramAccount, TelegramChannel } from "@prisma/client";
@@ -875,9 +876,11 @@ async function processOneArchiveSet(
entries = await readZipCentralDirectory(tempPaths);
} else if (archiveSet.type === "RAR") {
entries = await readRarContents(tempPaths[0]);
} else if (archiveSet.type === "DOCUMENT" || archiveSet.type === "7Z") {
// Standalone documents (PDF, STL, etc.) and 7z files — no extraction needed,
// just record the file itself as the single entry
} else if (archiveSet.type === "7Z") {
entries = await read7zContents(tempPaths[0]);
} else if (archiveSet.type === "DOCUMENT") {
// Standalone documents (PDF, STL, etc.) — no extraction,
// record the file itself as the single entry
const part = archiveSet.parts[0];
const ext = part.fileName.match(/\.([^.]+)$/)?.[1] ?? null;
entries = [{