feat(worker): auto-tag packages with the slicer(s) their files target

Indexes 86k+ Lychee Slicer (.lys/.lyt), 23k+ ChituBox (.chitubox/.ctb/ .cbddlp), 1k+ Anycubic (.photon/.pwmo/.pwmx), and Bambu (.3mf) slicer-specific files. Until now they were just generic extensions in PackageFile. After this commit: - Newly-ingested packages get tags derived from their file list ("lychee", "chitubox", "anycubic", "bambu", "fdm", "mango") - The `backfill_filelists` listener also applies tags to re-indexed packages - A new pure-DB listener `backfill_slicer_tags` walks existing Packages with file lists and applies tags retroactively — no downloads, no TDLib, takes seconds for thousands of rows. Trigger the one-shot retroactive backfill with: SELECT pg_notify('backfill_slicer_tags', '{"limit":5000}'); Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-13 04:31:16 +00:00 · 2026-05-24 08:53:18 +02:00
parent 7d39a13310
commit c4d9be83bd
4 changed files with 164 additions and 4 deletions
--- a/worker/src/archive/slicer-tags.ts
+++ b/worker/src/archive/slicer-tags.ts
@@ -0,0 +1,58 @@
 import type { FileEntry } from "./zip-reader.js";
 /**
 * Mapping from file extensions to slicer tags. Each tag groups a family of
 * extensions that mean the same thing for end users — "this archive contains
 * files I can open in <slicer>".
 *
 * Extensions are matched case-insensitively without the leading dot.
 */
 const SLICER_EXTENSION_MAP: Record<string, string> = {
  // Lychee Slicer
  lys: "lychee",
  lyt: "lychee",
  lyc: "lychee",
  // ChituBox / Anycubic / Phrozen / Elegoo (resin printers)
  chitubox: "chitubox",
  ctb: "chitubox",
  cbddlp: "chitubox",
  // Anycubic Photon family
  photon: "anycubic",
  pwmo: "anycubic",
  pwmx: "anycubic",
  pwmb: "anycubic",
  pwma: "anycubic",
  pws: "anycubic",
  pwsq: "anycubic",
  phz: "anycubic",
  // Bambu / Prusa
  "3mf": "bambu",
  bgcode: "bambu",
  // FDM gcode (generic)
  gcode: "fdm",
  // Mango / generic resin formats sometimes seen in releases
  mfp: "mango",
  mfpv: "mango",
  osla: "mango",
 };
 /**
 * Derive a deduplicated list of slicer tags from an archive's file listing.
 * Returns an empty array if no recognised slicer-specific files are present
 * (e.g., the archive is just STLs without pre-supports).
 */
 export function extractSlicerTags(entries: FileEntry[]): string[] {
  const tags = new Set<string>();
  for (const entry of entries) {
    if (!entry.extension) continue;
    const ext = entry.extension.toLowerCase();
    const tag = SLICER_EXTENSION_MAP[ext];
    if (tag) tags.add(tag);
  }
  return [...tags].sort();
 }
--- a/worker/src/backfill.ts
+++ b/worker/src/backfill.ts
@@ -10,6 +10,7 @@ import { getActiveAccounts } from "./db/queries.js";
 import { readZipCentralDirectory } from "./archive/zip-reader.js";
 import { readRarContents } from "./archive/rar-reader.js";
 import { read7zContents } from "./archive/sevenz-reader.js";
 import { extractSlicerTags } from "./archive/slicer-tags.js";
 import type { FileEntry } from "./archive/zip-reader.js";
 const log = childLogger("backfill");
@@ -228,6 +229,10 @@ async function processOnePackage(
      return;
    }
    // Also derive slicer tags from the file list so the backfilled packages
    // gain the same search/filter context as newly-ingested ones.
    const slicerTags = extractSlicerTags(entries);
    // Write everything in a single transaction so a partial backfill never
    // leaves the Package half-indexed.
    await db.$transaction(async (tx) => {
@@ -235,7 +240,7 @@ async function processOnePackage(
      // have backfilled this package between our read and write.
      const current = await tx.package.findUnique({
        where: { id: pkg.id },
-        select: { fileCount: true },
+        select: { fileCount: true, tags: true },
      });
      if (current && current.fileCount > 0) {
        log.debug({ ...ctx, existingFileCount: current.fileCount }, "Already backfilled by another worker — skipping");
@@ -254,9 +259,15 @@ async function processOnePackage(
          crc32: e.crc32,
        })),
      });
      // Merge slicer tags with whatever's already on the Package (preserve
      // channel category, manual tags, etc.).
      const existingTags = current?.tags ?? [];
      const mergedTags = [...new Set([...existingTags, ...slicerTags])];
      await tx.package.update({
        where: { id: pkg.id },
-        data: { fileCount: entries.length },
+        data: { fileCount: entries.length, tags: mergedTags },
      });
    });
@@ -265,3 +276,68 @@ async function processOnePackage(
    await rm(tempDir, { recursive: true, force: true }).catch(() => {});
  }
 }
 /**
 * Cheap pure-DB backfill: walk Packages that already have PackageFile rows
 * but no slicer tags, recompute the tags from their extensions, and merge
 * with the existing tag list. No downloads, no TDLib.
 *
 * Trigger:
 *   SELECT pg_notify('backfill_slicer_tags', '{"limit":1000}');
 */
 export async function processSlicerTagBackfill(payloadJson: string): Promise<void> {
  let limit = 1000;
  try {
    const parsed = JSON.parse(payloadJson) as { limit?: number };
    if (typeof parsed.limit === "number" && parsed.limit > 0) limit = parsed.limit;
  } catch {
    // Use default
  }
  // KNOWN_TAGS = the slicer tags we know how to derive. A Package missing
  // all of these is a candidate for recompute. extractSlicerTags is safe
  // to run on every package (returns [] for archives with no slicer files),
  // but filtering up-front avoids walking the entire DB.
  const KNOWN_TAGS = ["lychee", "chitubox", "anycubic", "bambu", "fdm", "mango"];
  const candidates = await db.package.findMany({
    where: {
      fileCount: { gt: 0 },
      NOT: { tags: { hasSome: KNOWN_TAGS } },
    },
    select: {
      id: true,
      tags: true,
      files: { select: { extension: true } },
    },
    orderBy: { createdAt: "asc" },
    take: limit,
  });
  if (candidates.length === 0) {
    log.info("Slicer tag backfill: no candidates");
    return;
  }
  log.info({ count: candidates.length }, "Slicer tag backfill: starting");
  let updated = 0;
  for (const pkg of candidates) {
    const fileEntries = pkg.files.map((f) => ({
      path: "",
      fileName: "",
      extension: f.extension,
      compressedSize: 0n,
      uncompressedSize: 0n,
      crc32: null as string | null,
    }));
    const slicerTags = extractSlicerTags(fileEntries);
    if (slicerTags.length === 0) continue;
    const merged = [...new Set([...pkg.tags, ...slicerTags])];
    if (merged.length === pkg.tags.length) continue;
    await db.package.update({ where: { id: pkg.id }, data: { tags: merged } });
    updated++;
  }
  log.info({ candidates: candidates.length, updated }, "Slicer tag backfill: done");
 }
--- a/worker/src/fetch-listener.ts
+++ b/worker/src/fetch-listener.ts
@@ -6,7 +6,7 @@ import { processFetchRequest } from "./worker.js";
 import { processExtractRequest } from "./extract-listener.js";
 import { rebuildPackageDatabase } from "./rebuild.js";
 import { processManualUpload } from "./manual-upload.js";
-import { processBackfillRequest } from "./backfill.js";
+import { processBackfillRequest, processSlicerTagBackfill } from "./backfill.js";
 import { generateInviteLink, createSupergroup, searchPublicChat } from "./tdlib/chats.js";
 import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
 import { triggerImmediateCycle } from "./scheduler.js";
@@ -60,6 +60,7 @@ async function connectListener(): Promise<void> {
    await pgClient.query("LISTEN rebuild_packages");
    await pgClient.query("LISTEN manual_upload");
    await pgClient.query("LISTEN backfill_filelists");
    await pgClient.query("LISTEN backfill_slicer_tags");
    pgClient.on("notification", (msg) => {
      if (msg.channel === "channel_fetch" && msg.payload) {
@@ -80,6 +81,8 @@ async function connectListener(): Promise<void> {
        handleManualUpload(msg.payload);
      } else if (msg.channel === "backfill_filelists") {
        handleBackfillFilelists(msg.payload ?? "{}");
      } else if (msg.channel === "backfill_slicer_tags") {
        handleBackfillSlicerTags(msg.payload ?? "{}");
      }
    });
@@ -105,7 +108,7 @@ async function connectListener(): Promise<void> {
      }
    });
-    log.info("Fetch listener started (channel_fetch, generate_invite, create_destination, ingestion_trigger, join_channel, archive_extract, rebuild_packages, manual_upload, backfill_filelists)");
+    log.info("Fetch listener started (channel_fetch, generate_invite, create_destination, ingestion_trigger, join_channel, archive_extract, rebuild_packages, manual_upload, backfill_filelists, backfill_slicer_tags)");
  } catch (err) {
    log.error({ err }, "Failed to start fetch listener — retrying");
    scheduleReconnect();
@@ -546,3 +549,16 @@ function handleBackfillFilelists(payload: string): void {
    .then(() => processBackfillRequest(payload))
    .catch((err) => log.error({ err, payload }, "Backfill request failed"));
 }
 // ── Slicer tag backfill handler ──
 //
 // Trigger:
 //   SELECT pg_notify('backfill_slicer_tags', '{"limit":1000}');
 //
 // Pure-DB pass over Packages that have file lists but no slicer tags.
 // No downloads, no TDLib involvement — fast and safe.
 function handleBackfillSlicerTags(payload: string): void {
  fetchQueue = fetchQueue
    .then(() => processSlicerTagBackfill(payload))
    .catch((err) => log.error({ err, payload }, "Slicer tag backfill failed"));
 }
--- a/worker/src/worker.ts
+++ b/worker/src/worker.ts
@@ -47,6 +47,7 @@ import { pickPreviewFile, extractPreviewImage } from "./preview/extract.js";
 import { groupArchiveSets } from "./archive/multipart.js";
 import type { ArchiveSet } from "./archive/multipart.js";
 import { extractCreatorFromFileName, extractCreatorFromChannelTitle } from "./archive/creator.js";
 import { extractSlicerTags } from "./archive/slicer-tags.js";
 import { hashParts } from "./archive/hash.js";
 import { readZipCentralDirectory } from "./archive/zip-reader.js";
 import { readRarContents } from "./archive/rar-reader.js";
@@ -1730,6 +1731,15 @@ async function processOneArchiveSet(
        tags.push(channel.category);
      }
      // Derive slicer tags from the file listing so users can filter the
      // catalog by "what software opens these files". Tags include "lychee",
      // "chitubox", "anycubic", "bambu", "fdm" etc. — only added if matching
      // slicer-specific files are present in the archive.
      const slicerTags = extractSlicerTags(entries);
      for (const tag of slicerTags) {
        if (!tags.includes(tag)) tags.push(tag);
      }
      stub = await createPackageStub({
        contentHash,
        fileName: archiveName,