feat: add support for 7z, PDF, STL, and other document types

- Add 7Z and DOCUMENT to ArchiveType enum
- Detect .7z, .pdf, .stl, .obj, .3mf, .step, .blend, .gcode, .svg,
  .dxf, .ai, .eps, .psd files as fetchable documents
- Handle DOCUMENT and 7Z formats in worker pipeline (skip extraction,
  record file as single entry)
- Add Prisma migration for new enum values
This commit is contained in:
admin
2026-03-21 20:25:00 +01:00
parent ba3d3a6040
commit 53a76a8136
4 changed files with 46 additions and 4 deletions

View File

@@ -1,4 +1,4 @@
export type ArchiveFormat = "ZIP" | "RAR";
export type ArchiveFormat = "ZIP" | "RAR" | "7Z" | "DOCUMENT";
export interface MultipartInfo {
baseName: string;
@@ -48,6 +48,9 @@ const patterns: {
},
];
/** Extensions we recognize as fetchable documents (archives + standalone files) */
const DOCUMENT_EXTENSIONS = /\.(pdf|stl|obj|3mf|step|stp|blend|gcode|svg|dxf|ai|eps|psd)$/i;
/**
* Detect if a filename is an archive and extract multipart info.
*/
@@ -85,11 +88,32 @@ export function detectArchive(fileName: string): MultipartInfo | null {
};
}
// Single .7z file
if (/\.7z$/i.test(fileName)) {
return {
baseName: fileName.replace(/\.7z$/i, ""),
partNumber: -1,
format: "7Z",
pattern: "SINGLE",
};
}
// Standalone documents (PDFs, STLs, 3D files, etc.)
if (DOCUMENT_EXTENSIONS.test(fileName)) {
const ext = fileName.match(DOCUMENT_EXTENSIONS)![0];
return {
baseName: fileName.replace(DOCUMENT_EXTENSIONS, ""),
partNumber: -1,
format: "DOCUMENT",
pattern: "SINGLE",
};
}
return null;
}
/**
* Check if a filename looks like any archive attachment we should process.
* Check if a filename looks like any attachment we should process.
*/
export function isArchiveAttachment(fileName: string): boolean {
return detectArchive(fileName) !== null;

View File

@@ -873,8 +873,21 @@ async function processOneArchiveSet(
try {
if (archiveSet.type === "ZIP") {
entries = await readZipCentralDirectory(tempPaths);
} else {
} else if (archiveSet.type === "RAR") {
entries = await readRarContents(tempPaths[0]);
} else if (archiveSet.type === "DOCUMENT" || archiveSet.type === "7Z") {
// Standalone documents (PDF, STL, etc.) and 7z files — no extraction needed,
// just record the file itself as the single entry
const part = archiveSet.parts[0];
const ext = part.fileName.match(/\.([^.]+)$/)?.[1] ?? null;
entries = [{
path: part.fileName,
fileName: part.fileName,
extension: ext,
compressedSize: part.fileSize,
uncompressedSize: part.fileSize,
crc32: null,
}];
}
} catch (err) {
accountLog.warn({ err, baseName: archiveSet.baseName }, "Failed to read archive metadata, ingesting without file list");
@@ -975,7 +988,7 @@ async function processOneArchiveSet(
contentHash,
fileName: archiveName,
fileSize: totalSize,
archiveType: archiveSet.type,
archiveType: archiveSet.type === "7Z" ? "SEVEN_Z" : archiveSet.type,
sourceChannelId: channel.id,
sourceMessageId: archiveSet.parts[0].id,
sourceTopicId,