From 53a76a8136367dc2dd9d61daf6ef0160f2440ab6 Mon Sep 17 00:00:00 2001 From: admin Date: Sat, 21 Mar 2026 20:25:00 +0100 Subject: [PATCH] feat: add support for 7z, PDF, STL, and other document types - Add 7Z and DOCUMENT to ArchiveType enum - Detect .7z, .pdf, .stl, .obj, .3mf, .step, .blend, .gcode, .svg, .dxf, .ai, .eps, .psd files as fetchable documents - Handle DOCUMENT and 7Z formats in worker pipeline (skip extraction, record file as single entry) - Add Prisma migration for new enum values --- .../migration.sql | 3 ++ prisma/schema.prisma | 2 ++ worker/src/archive/detect.ts | 28 +++++++++++++++++-- worker/src/worker.ts | 17 +++++++++-- 4 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 prisma/migrations/20260321180000_add_archive_types/migration.sql diff --git a/prisma/migrations/20260321180000_add_archive_types/migration.sql b/prisma/migrations/20260321180000_add_archive_types/migration.sql new file mode 100644 index 0000000..25e1973 --- /dev/null +++ b/prisma/migrations/20260321180000_add_archive_types/migration.sql @@ -0,0 +1,3 @@ +-- AlterEnum +ALTER TYPE "ArchiveType" ADD VALUE 'SEVEN_Z'; +ALTER TYPE "ArchiveType" ADD VALUE 'DOCUMENT'; diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 33fd24f..125ee88 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -377,6 +377,8 @@ enum ChannelRole { enum ArchiveType { ZIP RAR + SEVEN_Z + DOCUMENT } enum IngestionStatus { diff --git a/worker/src/archive/detect.ts b/worker/src/archive/detect.ts index 44eeb16..f4a0c7a 100644 --- a/worker/src/archive/detect.ts +++ b/worker/src/archive/detect.ts @@ -1,4 +1,4 @@ -export type ArchiveFormat = "ZIP" | "RAR"; +export type ArchiveFormat = "ZIP" | "RAR" | "7Z" | "DOCUMENT"; export interface MultipartInfo { baseName: string; @@ -48,6 +48,9 @@ const patterns: { }, ]; +/** Extensions we recognize as fetchable documents (archives + standalone files) */ +const DOCUMENT_EXTENSIONS = /\.(pdf|stl|obj|3mf|step|stp|blend|gcode|svg|dxf|ai|eps|psd)$/i; + /** * Detect if a filename is an archive and extract multipart info. */ @@ -85,11 +88,32 @@ export function detectArchive(fileName: string): MultipartInfo | null { }; } + // Single .7z file + if (/\.7z$/i.test(fileName)) { + return { + baseName: fileName.replace(/\.7z$/i, ""), + partNumber: -1, + format: "7Z", + pattern: "SINGLE", + }; + } + + // Standalone documents (PDFs, STLs, 3D files, etc.) + if (DOCUMENT_EXTENSIONS.test(fileName)) { + const ext = fileName.match(DOCUMENT_EXTENSIONS)![0]; + return { + baseName: fileName.replace(DOCUMENT_EXTENSIONS, ""), + partNumber: -1, + format: "DOCUMENT", + pattern: "SINGLE", + }; + } + return null; } /** - * Check if a filename looks like any archive attachment we should process. + * Check if a filename looks like any attachment we should process. */ export function isArchiveAttachment(fileName: string): boolean { return detectArchive(fileName) !== null; diff --git a/worker/src/worker.ts b/worker/src/worker.ts index 27601be..a822f40 100644 --- a/worker/src/worker.ts +++ b/worker/src/worker.ts @@ -873,8 +873,21 @@ async function processOneArchiveSet( try { if (archiveSet.type === "ZIP") { entries = await readZipCentralDirectory(tempPaths); - } else { + } else if (archiveSet.type === "RAR") { entries = await readRarContents(tempPaths[0]); + } else if (archiveSet.type === "DOCUMENT" || archiveSet.type === "7Z") { + // Standalone documents (PDF, STL, etc.) and 7z files — no extraction needed, + // just record the file itself as the single entry + const part = archiveSet.parts[0]; + const ext = part.fileName.match(/\.([^.]+)$/)?.[1] ?? null; + entries = [{ + path: part.fileName, + fileName: part.fileName, + extension: ext, + compressedSize: part.fileSize, + uncompressedSize: part.fileSize, + crc32: null, + }]; } } catch (err) { accountLog.warn({ err, baseName: archiveSet.baseName }, "Failed to read archive metadata, ingesting without file list"); @@ -975,7 +988,7 @@ async function processOneArchiveSet( contentHash, fileName: archiveName, fileSize: totalSize, - archiveType: archiveSet.type, + archiveType: archiveSet.type === "7Z" ? "SEVEN_Z" : archiveSet.type, sourceChannelId: channel.id, sourceMessageId: archiveSet.parts[0].id, sourceTopicId,