feat: add 7z archive content listing via p7zip

- Add p7zip-full to worker Docker image - New read7zContents() parser using 7z l output - 7z archives now get full file listings like ZIP/RAR - Standalone DOCUMENT types still show as single entry
2026-07-24 07:42:47 +00:00 · 2026-03-21 21:13:58 +01:00
parent 9ac66e9d7d
commit a90f653314
3 changed files with 93 additions and 5 deletions
--- a/worker/src/archive/sevenz-reader.ts
+++ b/worker/src/archive/sevenz-reader.ts
@@ -0,0 +1,85 @@
+import { execFile } from "child_process";
+import { promisify } from "util";
+import path from "path";
+import { childLogger } from "../util/logger.js";
+import type { FileEntry } from "./zip-reader.js";
+
+const execFileAsync = promisify(execFile);
+const log = childLogger("7z-reader");
+
+/**
+ * Parse output of `7z l <file>` to extract file metadata.
+ *
+ * Example output:
+ *    Date      Time    Attr         Size   Compressed  Name
+ *   ------------------- ----- ------------ ------------  ------------------------
+ *   2024-01-15 10:30:00 ....A        12345        10234  folder/file.stl
+ *   ------------------- ----- ------------ ------------  ------------------------
+ */
+export async function read7zContents(
+  filePath: string
+): Promise<FileEntry[]> {
+  try {
+    const { stdout } = await execFileAsync("7z", ["l", filePath], {
+      timeout: 30000,
+      maxBuffer: 10 * 1024 * 1024,
+    });
+
+    return parse7zOutput(stdout);
+  } catch (err) {
+    log.warn({ err, file: filePath }, "Failed to read 7z contents");
+    return [];
+  }
+}
+
+function parse7zOutput(output: string): FileEntry[] {
+  const entries: FileEntry[] = [];
+  const lines = output.split("\n");
+
+  let inFileList = false;
+  let separatorCount = 0;
+
+  for (const line of lines) {
+    const trimmed = line.trim();
+
+    // Detect separator lines (------- pattern)
+    if (/^-{5,}/.test(trimmed)) {
+      separatorCount++;
+      if (separatorCount === 1) {
+        inFileList = true;
+      } else if (separatorCount >= 2) {
+        inFileList = false;
+      }
+      continue;
+    }
+
+    if (!inFileList) continue;
+
+    // Parse: Date Time Attr Size Compressed Name
+    // 2024-01-15 10:30:00 ....A        12345        10234  folder/file.stl
+    const match = trimmed.match(
+      /^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\s+\S+\s+(\d+)\s+(\d+)\s+(.+)$/
+    );
+
+    if (match) {
+      const [, uncompressedStr, compressedStr, filePath] = match;
+
+      // Skip directory entries
+      if (filePath.endsWith("/") || filePath.endsWith("\\")) continue;
+      // Skip entries with 0 size (typically directories without trailing slash)
+      if (uncompressedStr === "0" && compressedStr === "0") continue;
+
+      const ext = path.extname(filePath).toLowerCase();
+      entries.push({
+        path: filePath,
+        fileName: path.basename(filePath),
+        extension: ext ? ext.slice(1) : null,
+        compressedSize: BigInt(compressedStr),
+        uncompressedSize: BigInt(uncompressedStr),
+        crc32: null,
+      });
+    }
+  }
+
+  return entries;
+}
--- a/worker/src/worker.ts
+++ b/worker/src/worker.ts
@@ -40,6 +40,7 @@ import { extractCreatorFromFileName, extractCreatorFromChannelTitle } from "./ar
 import { hashParts } from "./archive/hash.js";
 import { readZipCentralDirectory } from "./archive/zip-reader.js";
 import { readRarContents } from "./archive/rar-reader.js";
+import { read7zContents } from "./archive/sevenz-reader.js";
 import { byteLevelSplit, concatenateFiles } from "./archive/split.js";
 import { uploadToChannel } from "./upload/channel.js";
 import type { TelegramAccount, TelegramChannel } from "@prisma/client";
@@ -875,9 +876,11 @@ async function processOneArchiveSet(
        entries = await readZipCentralDirectory(tempPaths);
      } else if (archiveSet.type === "RAR") {
        entries = await readRarContents(tempPaths[0]);
-      } else if (archiveSet.type === "DOCUMENT" || archiveSet.type === "7Z") {
-        // Standalone documents (PDF, STL, etc.) and 7z files — no extraction needed,
-        // just record the file itself as the single entry
+      } else if (archiveSet.type === "7Z") {
+        entries = await read7zContents(tempPaths[0]);
+      } else if (archiveSet.type === "DOCUMENT") {
+        // Standalone documents (PDF, STL, etc.) — no extraction,
+        // record the file itself as the single entry
        const part = archiveSet.parts[0];
        const ext = part.fileName.match(/\.([^.]+)$/)?.[1] ?? null;
        entries = [{