From a90f653314e60b6a2d0605eaf6188f6c77c8a49f Mon Sep 17 00:00:00 2001
From: admin <admin@samagsteribbe.nl>
Date: Sat, 21 Mar 2026 21:13:58 +0100
Subject: [PATCH] feat: add 7z archive content listing via p7zip

- Add p7zip-full to worker Docker image
- New read7zContents() parser using 7z l output
- 7z archives now get full file listings like ZIP/RAR
- Standalone DOCUMENT types still show as single entry
---
 worker/Dockerfile                   |  4 +-
 worker/src/archive/sevenz-reader.ts | 85 +++++++++++++++++++++++++++++
 worker/src/worker.ts                |  9 ++-
 3 files changed, 93 insertions(+), 5 deletions(-)
 create mode 100644 worker/src/archive/sevenz-reader.ts
diff --git a/worker/Dockerfile b/worker/Dockerfile
index e8e2fda..5a53dc4 100644
--- a/worker/Dockerfile
+++ b/worker/Dockerfile
@@ -3,7 +3,7 @@ FROM node:20-bookworm-slim AS deps
 
 RUN sed -i 's/^Components: main$/Components: main non-free/' /etc/apt/sources.list.d/debian.sources && \
     apt-get update && apt-get install -y \
-    libssl-dev zlib1g-dev unrar \
+    libssl-dev zlib1g-dev unrar p7zip-full \
     && rm -rf /var/lib/apt/lists/*
 
 WORKDIR /app
@@ -26,7 +26,7 @@ FROM node:20-bookworm-slim AS runner
 
 RUN sed -i 's/^Components: main$/Components: main non-free/' /etc/apt/sources.list.d/debian.sources && \
     apt-get update && apt-get install -y \
-    libssl3 zlib1g unrar \
+    libssl3 zlib1g unrar p7zip-full \
     && rm -rf /var/lib/apt/lists/*
 
 WORKDIR /app
diff --git a/worker/src/archive/sevenz-reader.ts b/worker/src/archive/sevenz-reader.ts
new file mode 100644
index 0000000..76728d2
--- /dev/null
+++ b/worker/src/archive/sevenz-reader.ts
@@ -0,0 +1,85 @@
+import { execFile } from "child_process";
+import { promisify } from "util";
+import path from "path";
+import { childLogger } from "../util/logger.js";
+import type { FileEntry } from "./zip-reader.js";
+
+const execFileAsync = promisify(execFile);
+const log = childLogger("7z-reader");
+
+/**
+ * Parse output of `7z l <file>` to extract file metadata.
+ *
+ * Example output:
+ *    Date      Time    Attr         Size   Compressed  Name
+ *   ------------------- ----- ------------ ------------  ------------------------
+ *   2024-01-15 10:30:00 ....A        12345        10234  folder/file.stl
+ *   ------------------- ----- ------------ ------------  ------------------------
+ */
+export async function read7zContents(
+  filePath: string
+): Promise<FileEntry[]> {
+  try {
+    const { stdout } = await execFileAsync("7z", ["l", filePath], {
+      timeout: 30000,
+      maxBuffer: 10 * 1024 * 1024,
+    });
+
+    return parse7zOutput(stdout);
+  } catch (err) {
+    log.warn({ err, file: filePath }, "Failed to read 7z contents");
+    return [];
+  }
+}
+
+function parse7zOutput(output: string): FileEntry[] {
+  const entries: FileEntry[] = [];
+  const lines = output.split("\n");
+
+  let inFileList = false;
+  let separatorCount = 0;
+
+  for (const line of lines) {
+    const trimmed = line.trim();
+
+    // Detect separator lines (------- pattern)
+    if (/^-{5,}/.test(trimmed)) {
+      separatorCount++;
+      if (separatorCount === 1) {
+        inFileList = true;
+      } else if (separatorCount >= 2) {
+        inFileList = false;
+      }
+      continue;
+    }
+
+    if (!inFileList) continue;
+
+    // Parse: Date Time Attr Size Compressed Name
+    // 2024-01-15 10:30:00 ....A        12345        10234  folder/file.stl
+    const match = trimmed.match(
+      /^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\s+\S+\s+(\d+)\s+(\d+)\s+(.+)$/
+    );
+
+    if (match) {
+      const [, uncompressedStr, compressedStr, filePath] = match;
+
+      // Skip directory entries
+      if (filePath.endsWith("/") || filePath.endsWith("\\")) continue;
+      // Skip entries with 0 size (typically directories without trailing slash)
+      if (uncompressedStr === "0" && compressedStr === "0") continue;
+
+      const ext = path.extname(filePath).toLowerCase();
+      entries.push({
+        path: filePath,
+        fileName: path.basename(filePath),
+        extension: ext ? ext.slice(1) : null,
+        compressedSize: BigInt(compressedStr),
+        uncompressedSize: BigInt(uncompressedStr),
+        crc32: null,
+      });
+    }
+  }
+
+  return entries;
+}
diff --git a/worker/src/worker.ts b/worker/src/worker.ts
index 5d1816c..bc6b0a7 100644
--- a/worker/src/worker.ts
+++ b/worker/src/worker.ts
@@ -40,6 +40,7 @@ import { extractCreatorFromFileName, extractCreatorFromChannelTitle } from "./ar
 import { hashParts } from "./archive/hash.js";
 import { readZipCentralDirectory } from "./archive/zip-reader.js";
 import { readRarContents } from "./archive/rar-reader.js";
+import { read7zContents } from "./archive/sevenz-reader.js";
 import { byteLevelSplit, concatenateFiles } from "./archive/split.js";
 import { uploadToChannel } from "./upload/channel.js";
 import type { TelegramAccount, TelegramChannel } from "@prisma/client";
@@ -875,9 +876,11 @@ async function processOneArchiveSet(
         entries = await readZipCentralDirectory(tempPaths);
       } else if (archiveSet.type === "RAR") {
         entries = await readRarContents(tempPaths[0]);
-      } else if (archiveSet.type === "DOCUMENT" || archiveSet.type === "7Z") {
-        // Standalone documents (PDF, STL, etc.) and 7z files — no extraction needed,
-        // just record the file itself as the single entry
+      } else if (archiveSet.type === "7Z") {
+        entries = await read7zContents(tempPaths[0]);
+      } else if (archiveSet.type === "DOCUMENT") {
+        // Standalone documents (PDF, STL, etc.) — no extraction,
+        // record the file itself as the single entry
         const part = archiveSet.parts[0];
         const ext = part.fileName.match(/\.([^.]+)$/)?.[1] ?? null;
         entries = [{