feat(worker): auto-tag packages with the slicer(s) their files target

Indexes 86k+ Lychee Slicer (.lys/.lyt), 23k+ ChituBox (.chitubox/.ctb/ .cbddlp), 1k+ Anycubic (.photon/.pwmo/.pwmx), and Bambu (.3mf) slicer-specific files. Until now they were just generic extensions in PackageFile. After this commit: - Newly-ingested packages get tags derived from their file list ("lychee", "chitubox", "anycubic", "bambu", "fdm", "mango") - The `backfill_filelists` listener also applies tags to re-indexed packages - A new pure-DB listener `backfill_slicer_tags` walks existing Packages with file lists and applies tags retroactively — no downloads, no TDLib, takes seconds for thousands of rows. Trigger the one-shot retroactive backfill with: SELECT pg_notify('backfill_slicer_tags', '{"limit":5000}'); Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-07-25 08:13:04 +00:00 · 2026-05-24 08:53:18 +02:00
parent 7d39a13310
commit c4d9be83bd
4 changed files with 164 additions and 4 deletions
--- a/worker/src/archive/slicer-tags.ts
+++ b/worker/src/archive/slicer-tags.ts
@@ -0,0 +1,58 @@
+import type { FileEntry } from "./zip-reader.js";
+
+/**
+ * Mapping from file extensions to slicer tags. Each tag groups a family of
+ * extensions that mean the same thing for end users — "this archive contains
+ * files I can open in <slicer>".
+ *
+ * Extensions are matched case-insensitively without the leading dot.
+ */
+const SLICER_EXTENSION_MAP: Record<string, string> = {
+  // Lychee Slicer
+  lys: "lychee",
+  lyt: "lychee",
+  lyc: "lychee",
+
+  // ChituBox / Anycubic / Phrozen / Elegoo (resin printers)
+  chitubox: "chitubox",
+  ctb: "chitubox",
+  cbddlp: "chitubox",
+
+  // Anycubic Photon family
+  photon: "anycubic",
+  pwmo: "anycubic",
+  pwmx: "anycubic",
+  pwmb: "anycubic",
+  pwma: "anycubic",
+  pws: "anycubic",
+  pwsq: "anycubic",
+  phz: "anycubic",
+
+  // Bambu / Prusa
+  "3mf": "bambu",
+  bgcode: "bambu",
+
+  // FDM gcode (generic)
+  gcode: "fdm",
+
+  // Mango / generic resin formats sometimes seen in releases
+  mfp: "mango",
+  mfpv: "mango",
+  osla: "mango",
+};
+
+/**
+ * Derive a deduplicated list of slicer tags from an archive's file listing.
+ * Returns an empty array if no recognised slicer-specific files are present
+ * (e.g., the archive is just STLs without pre-supports).
+ */
+export function extractSlicerTags(entries: FileEntry[]): string[] {
+  const tags = new Set<string>();
+  for (const entry of entries) {
+    if (!entry.extension) continue;
+    const ext = entry.extension.toLowerCase();
+    const tag = SLICER_EXTENSION_MAP[ext];
+    if (tag) tags.add(tag);
+  }
+  return [...tags].sort();
+}
--- a/worker/src/backfill.ts
+++ b/worker/src/backfill.ts
@@ -10,6 +10,7 @@ import { getActiveAccounts } from "./db/queries.js";
 import { readZipCentralDirectory } from "./archive/zip-reader.js";
 import { readRarContents } from "./archive/rar-reader.js";
 import { read7zContents } from "./archive/sevenz-reader.js";
+import { extractSlicerTags } from "./archive/slicer-tags.js";
 import type { FileEntry } from "./archive/zip-reader.js";

 const log = childLogger("backfill");
@@ -228,6 +229,10 @@ async function processOnePackage(
      return;
    }

+    // Also derive slicer tags from the file list so the backfilled packages
+    // gain the same search/filter context as newly-ingested ones.
+    const slicerTags = extractSlicerTags(entries);
+
    // Write everything in a single transaction so a partial backfill never
    // leaves the Package half-indexed.
    await db.$transaction(async (tx) => {
@@ -235,7 +240,7 @@ async function processOnePackage(
      // have backfilled this package between our read and write.
      const current = await tx.package.findUnique({
        where: { id: pkg.id },
-        select: { fileCount: true },
+        select: { fileCount: true, tags: true },
      });
      if (current && current.fileCount > 0) {
        log.debug({ ...ctx, existingFileCount: current.fileCount }, "Already backfilled by another worker — skipping");
@@ -254,9 +259,15 @@ async function processOnePackage(
          crc32: e.crc32,
        })),
      });
+
+      // Merge slicer tags with whatever's already on the Package (preserve
+      // channel category, manual tags, etc.).
+      const existingTags = current?.tags ?? [];
+      const mergedTags = [...new Set([...existingTags, ...slicerTags])];
+
      await tx.package.update({
        where: { id: pkg.id },
-        data: { fileCount: entries.length },
+        data: { fileCount: entries.length, tags: mergedTags },
      });
    });

@@ -265,3 +276,68 @@ async function processOnePackage(
    await rm(tempDir, { recursive: true, force: true }).catch(() => {});
  }
 }
+
+/**
+ * Cheap pure-DB backfill: walk Packages that already have PackageFile rows
+ * but no slicer tags, recompute the tags from their extensions, and merge
+ * with the existing tag list. No downloads, no TDLib.
+ *
+ * Trigger:
+ *   SELECT pg_notify('backfill_slicer_tags', '{"limit":1000}');
+ */
+export async function processSlicerTagBackfill(payloadJson: string): Promise<void> {
+  let limit = 1000;
+  try {
+    const parsed = JSON.parse(payloadJson) as { limit?: number };
+    if (typeof parsed.limit === "number" && parsed.limit > 0) limit = parsed.limit;
+  } catch {
+    // Use default
+  }
+
+  // KNOWN_TAGS = the slicer tags we know how to derive. A Package missing
+  // all of these is a candidate for recompute. extractSlicerTags is safe
+  // to run on every package (returns [] for archives with no slicer files),
+  // but filtering up-front avoids walking the entire DB.
+  const KNOWN_TAGS = ["lychee", "chitubox", "anycubic", "bambu", "fdm", "mango"];
+
+  const candidates = await db.package.findMany({
+    where: {
+      fileCount: { gt: 0 },
+      NOT: { tags: { hasSome: KNOWN_TAGS } },
+    },
+    select: {
+      id: true,
+      tags: true,
+      files: { select: { extension: true } },
+    },
+    orderBy: { createdAt: "asc" },
+    take: limit,
+  });
+
+  if (candidates.length === 0) {
+    log.info("Slicer tag backfill: no candidates");
+    return;
+  }
+
+  log.info({ count: candidates.length }, "Slicer tag backfill: starting");
+
+  let updated = 0;
+  for (const pkg of candidates) {
+    const fileEntries = pkg.files.map((f) => ({
+      path: "",
+      fileName: "",
+      extension: f.extension,
+      compressedSize: 0n,
+      uncompressedSize: 0n,
+      crc32: null as string | null,
+    }));
+    const slicerTags = extractSlicerTags(fileEntries);
+    if (slicerTags.length === 0) continue;
+    const merged = [...new Set([...pkg.tags, ...slicerTags])];
+    if (merged.length === pkg.tags.length) continue;
+    await db.package.update({ where: { id: pkg.id }, data: { tags: merged } });
+    updated++;
+  }
+
+  log.info({ candidates: candidates.length, updated }, "Slicer tag backfill: done");
+}
--- a/worker/src/fetch-listener.ts
+++ b/worker/src/fetch-listener.ts
@@ -6,7 +6,7 @@ import { processFetchRequest } from "./worker.js";
 import { processExtractRequest } from "./extract-listener.js";
 import { rebuildPackageDatabase } from "./rebuild.js";
 import { processManualUpload } from "./manual-upload.js";
-import { processBackfillRequest } from "./backfill.js";
+import { processBackfillRequest, processSlicerTagBackfill } from "./backfill.js";
 import { generateInviteLink, createSupergroup, searchPublicChat } from "./tdlib/chats.js";
 import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
 import { triggerImmediateCycle } from "./scheduler.js";
@@ -60,6 +60,7 @@ async function connectListener(): Promise<void> {
    await pgClient.query("LISTEN rebuild_packages");
    await pgClient.query("LISTEN manual_upload");
    await pgClient.query("LISTEN backfill_filelists");
+    await pgClient.query("LISTEN backfill_slicer_tags");

    pgClient.on("notification", (msg) => {
      if (msg.channel === "channel_fetch" && msg.payload) {
@@ -80,6 +81,8 @@ async function connectListener(): Promise<void> {
        handleManualUpload(msg.payload);
      } else if (msg.channel === "backfill_filelists") {
        handleBackfillFilelists(msg.payload ?? "{}");
+      } else if (msg.channel === "backfill_slicer_tags") {
+        handleBackfillSlicerTags(msg.payload ?? "{}");
      }
    });

@@ -105,7 +108,7 @@ async function connectListener(): Promise<void> {
      }
    });

-    log.info("Fetch listener started (channel_fetch, generate_invite, create_destination, ingestion_trigger, join_channel, archive_extract, rebuild_packages, manual_upload, backfill_filelists)");
+    log.info("Fetch listener started (channel_fetch, generate_invite, create_destination, ingestion_trigger, join_channel, archive_extract, rebuild_packages, manual_upload, backfill_filelists, backfill_slicer_tags)");
  } catch (err) {
    log.error({ err }, "Failed to start fetch listener — retrying");
    scheduleReconnect();
@@ -546,3 +549,16 @@ function handleBackfillFilelists(payload: string): void {
    .then(() => processBackfillRequest(payload))
    .catch((err) => log.error({ err, payload }, "Backfill request failed"));
 }
+
+// ── Slicer tag backfill handler ──
+//
+// Trigger:
+//   SELECT pg_notify('backfill_slicer_tags', '{"limit":1000}');
+//
+// Pure-DB pass over Packages that have file lists but no slicer tags.
+// No downloads, no TDLib involvement — fast and safe.
+function handleBackfillSlicerTags(payload: string): void {
+  fetchQueue = fetchQueue
+    .then(() => processSlicerTagBackfill(payload))
+    .catch((err) => log.error({ err, payload }, "Slicer tag backfill failed"));
+}
--- a/worker/src/worker.ts
+++ b/worker/src/worker.ts
@@ -47,6 +47,7 @@ import { pickPreviewFile, extractPreviewImage } from "./preview/extract.js";
 import { groupArchiveSets } from "./archive/multipart.js";
 import type { ArchiveSet } from "./archive/multipart.js";
 import { extractCreatorFromFileName, extractCreatorFromChannelTitle } from "./archive/creator.js";
+import { extractSlicerTags } from "./archive/slicer-tags.js";
 import { hashParts } from "./archive/hash.js";
 import { readZipCentralDirectory } from "./archive/zip-reader.js";
 import { readRarContents } from "./archive/rar-reader.js";
@@ -1730,6 +1731,15 @@ async function processOneArchiveSet(
        tags.push(channel.category);
      }

+      // Derive slicer tags from the file listing so users can filter the
+      // catalog by "what software opens these files". Tags include "lychee",
+      // "chitubox", "anycubic", "bambu", "fdm" etc. — only added if matching
+      // slicer-specific files are present in the archive.
+      const slicerTags = extractSlicerTags(entries);
+      for (const tag of slicerTags) {
+        if (!tags.includes(tag)) tags.push(tag);
+      }
+
      stub = await createPackageStub({
        contentHash,
        fileName: archiveName,