From a90f653314e60b6a2d0605eaf6188f6c77c8a49f Mon Sep 17 00:00:00 2001 From: admin Date: Sat, 21 Mar 2026 21:13:58 +0100 Subject: [PATCH] feat: add 7z archive content listing via p7zip - Add p7zip-full to worker Docker image - New read7zContents() parser using 7z l output - 7z archives now get full file listings like ZIP/RAR - Standalone DOCUMENT types still show as single entry --- worker/Dockerfile | 4 +- worker/src/archive/sevenz-reader.ts | 85 +++++++++++++++++++++++++++++ worker/src/worker.ts | 9 ++- 3 files changed, 93 insertions(+), 5 deletions(-) create mode 100644 worker/src/archive/sevenz-reader.ts diff --git a/worker/Dockerfile b/worker/Dockerfile index e8e2fda..5a53dc4 100644 --- a/worker/Dockerfile +++ b/worker/Dockerfile @@ -3,7 +3,7 @@ FROM node:20-bookworm-slim AS deps RUN sed -i 's/^Components: main$/Components: main non-free/' /etc/apt/sources.list.d/debian.sources && \ apt-get update && apt-get install -y \ - libssl-dev zlib1g-dev unrar \ + libssl-dev zlib1g-dev unrar p7zip-full \ && rm -rf /var/lib/apt/lists/* WORKDIR /app @@ -26,7 +26,7 @@ FROM node:20-bookworm-slim AS runner RUN sed -i 's/^Components: main$/Components: main non-free/' /etc/apt/sources.list.d/debian.sources && \ apt-get update && apt-get install -y \ - libssl3 zlib1g unrar \ + libssl3 zlib1g unrar p7zip-full \ && rm -rf /var/lib/apt/lists/* WORKDIR /app diff --git a/worker/src/archive/sevenz-reader.ts b/worker/src/archive/sevenz-reader.ts new file mode 100644 index 0000000..76728d2 --- /dev/null +++ b/worker/src/archive/sevenz-reader.ts @@ -0,0 +1,85 @@ +import { execFile } from "child_process"; +import { promisify } from "util"; +import path from "path"; +import { childLogger } from "../util/logger.js"; +import type { FileEntry } from "./zip-reader.js"; + +const execFileAsync = promisify(execFile); +const log = childLogger("7z-reader"); + +/** + * Parse output of `7z l ` to extract file metadata. + * + * Example output: + * Date Time Attr Size Compressed Name + * ------------------- ----- ------------ ------------ ------------------------ + * 2024-01-15 10:30:00 ....A 12345 10234 folder/file.stl + * ------------------- ----- ------------ ------------ ------------------------ + */ +export async function read7zContents( + filePath: string +): Promise { + try { + const { stdout } = await execFileAsync("7z", ["l", filePath], { + timeout: 30000, + maxBuffer: 10 * 1024 * 1024, + }); + + return parse7zOutput(stdout); + } catch (err) { + log.warn({ err, file: filePath }, "Failed to read 7z contents"); + return []; + } +} + +function parse7zOutput(output: string): FileEntry[] { + const entries: FileEntry[] = []; + const lines = output.split("\n"); + + let inFileList = false; + let separatorCount = 0; + + for (const line of lines) { + const trimmed = line.trim(); + + // Detect separator lines (------- pattern) + if (/^-{5,}/.test(trimmed)) { + separatorCount++; + if (separatorCount === 1) { + inFileList = true; + } else if (separatorCount >= 2) { + inFileList = false; + } + continue; + } + + if (!inFileList) continue; + + // Parse: Date Time Attr Size Compressed Name + // 2024-01-15 10:30:00 ....A 12345 10234 folder/file.stl + const match = trimmed.match( + /^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\s+\S+\s+(\d+)\s+(\d+)\s+(.+)$/ + ); + + if (match) { + const [, uncompressedStr, compressedStr, filePath] = match; + + // Skip directory entries + if (filePath.endsWith("/") || filePath.endsWith("\\")) continue; + // Skip entries with 0 size (typically directories without trailing slash) + if (uncompressedStr === "0" && compressedStr === "0") continue; + + const ext = path.extname(filePath).toLowerCase(); + entries.push({ + path: filePath, + fileName: path.basename(filePath), + extension: ext ? ext.slice(1) : null, + compressedSize: BigInt(compressedStr), + uncompressedSize: BigInt(uncompressedStr), + crc32: null, + }); + } + } + + return entries; +} diff --git a/worker/src/worker.ts b/worker/src/worker.ts index 5d1816c..bc6b0a7 100644 --- a/worker/src/worker.ts +++ b/worker/src/worker.ts @@ -40,6 +40,7 @@ import { extractCreatorFromFileName, extractCreatorFromChannelTitle } from "./ar import { hashParts } from "./archive/hash.js"; import { readZipCentralDirectory } from "./archive/zip-reader.js"; import { readRarContents } from "./archive/rar-reader.js"; +import { read7zContents } from "./archive/sevenz-reader.js"; import { byteLevelSplit, concatenateFiles } from "./archive/split.js"; import { uploadToChannel } from "./upload/channel.js"; import type { TelegramAccount, TelegramChannel } from "@prisma/client"; @@ -875,9 +876,11 @@ async function processOneArchiveSet( entries = await readZipCentralDirectory(tempPaths); } else if (archiveSet.type === "RAR") { entries = await readRarContents(tempPaths[0]); - } else if (archiveSet.type === "DOCUMENT" || archiveSet.type === "7Z") { - // Standalone documents (PDF, STL, etc.) and 7z files — no extraction needed, - // just record the file itself as the single entry + } else if (archiveSet.type === "7Z") { + entries = await read7zContents(tempPaths[0]); + } else if (archiveSet.type === "DOCUMENT") { + // Standalone documents (PDF, STL, etc.) — no extraction, + // record the file itself as the single entry const part = archiveSet.parts[0]; const ext = part.fileName.match(/\.([^.]+)$/)?.[1] ?? null; entries = [{