feat(worker): auto-tag packages with the slicer(s) their files target

Indexes 86k+ Lychee Slicer (.lys/.lyt), 23k+ ChituBox (.chitubox/.ctb/
.cbddlp), 1k+ Anycubic (.photon/.pwmo/.pwmx), and Bambu (.3mf)
slicer-specific files. Until now they were just generic extensions in
PackageFile.

After this commit:
  - Newly-ingested packages get tags derived from their file list
    ("lychee", "chitubox", "anycubic", "bambu", "fdm", "mango")
  - The `backfill_filelists` listener also applies tags to re-indexed
    packages
  - A new pure-DB listener `backfill_slicer_tags` walks existing
    Packages with file lists and applies tags retroactively — no
    downloads, no TDLib, takes seconds for thousands of rows.

Trigger the one-shot retroactive backfill with:
  SELECT pg_notify('backfill_slicer_tags', '{"limit":5000}');

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-24 08:53:18 +02:00
parent 7d39a13310
commit c4d9be83bd
4 changed files with 164 additions and 4 deletions

View File

@@ -0,0 +1,58 @@
import type { FileEntry } from "./zip-reader.js";
/**
* Mapping from file extensions to slicer tags. Each tag groups a family of
* extensions that mean the same thing for end users — "this archive contains
* files I can open in <slicer>".
*
* Extensions are matched case-insensitively without the leading dot.
*/
const SLICER_EXTENSION_MAP: Record<string, string> = {
// Lychee Slicer
lys: "lychee",
lyt: "lychee",
lyc: "lychee",
// ChituBox / Anycubic / Phrozen / Elegoo (resin printers)
chitubox: "chitubox",
ctb: "chitubox",
cbddlp: "chitubox",
// Anycubic Photon family
photon: "anycubic",
pwmo: "anycubic",
pwmx: "anycubic",
pwmb: "anycubic",
pwma: "anycubic",
pws: "anycubic",
pwsq: "anycubic",
phz: "anycubic",
// Bambu / Prusa
"3mf": "bambu",
bgcode: "bambu",
// FDM gcode (generic)
gcode: "fdm",
// Mango / generic resin formats sometimes seen in releases
mfp: "mango",
mfpv: "mango",
osla: "mango",
};
/**
* Derive a deduplicated list of slicer tags from an archive's file listing.
* Returns an empty array if no recognised slicer-specific files are present
* (e.g., the archive is just STLs without pre-supports).
*/
export function extractSlicerTags(entries: FileEntry[]): string[] {
const tags = new Set<string>();
for (const entry of entries) {
if (!entry.extension) continue;
const ext = entry.extension.toLowerCase();
const tag = SLICER_EXTENSION_MAP[ext];
if (tag) tags.add(tag);
}
return [...tags].sort();
}

View File

@@ -10,6 +10,7 @@ import { getActiveAccounts } from "./db/queries.js";
import { readZipCentralDirectory } from "./archive/zip-reader.js";
import { readRarContents } from "./archive/rar-reader.js";
import { read7zContents } from "./archive/sevenz-reader.js";
import { extractSlicerTags } from "./archive/slicer-tags.js";
import type { FileEntry } from "./archive/zip-reader.js";
const log = childLogger("backfill");
@@ -228,6 +229,10 @@ async function processOnePackage(
return;
}
// Also derive slicer tags from the file list so the backfilled packages
// gain the same search/filter context as newly-ingested ones.
const slicerTags = extractSlicerTags(entries);
// Write everything in a single transaction so a partial backfill never
// leaves the Package half-indexed.
await db.$transaction(async (tx) => {
@@ -235,7 +240,7 @@ async function processOnePackage(
// have backfilled this package between our read and write.
const current = await tx.package.findUnique({
where: { id: pkg.id },
select: { fileCount: true },
select: { fileCount: true, tags: true },
});
if (current && current.fileCount > 0) {
log.debug({ ...ctx, existingFileCount: current.fileCount }, "Already backfilled by another worker — skipping");
@@ -254,9 +259,15 @@ async function processOnePackage(
crc32: e.crc32,
})),
});
// Merge slicer tags with whatever's already on the Package (preserve
// channel category, manual tags, etc.).
const existingTags = current?.tags ?? [];
const mergedTags = [...new Set([...existingTags, ...slicerTags])];
await tx.package.update({
where: { id: pkg.id },
data: { fileCount: entries.length },
data: { fileCount: entries.length, tags: mergedTags },
});
});
@@ -265,3 +276,68 @@ async function processOnePackage(
await rm(tempDir, { recursive: true, force: true }).catch(() => {});
}
}
/**
* Cheap pure-DB backfill: walk Packages that already have PackageFile rows
* but no slicer tags, recompute the tags from their extensions, and merge
* with the existing tag list. No downloads, no TDLib.
*
* Trigger:
* SELECT pg_notify('backfill_slicer_tags', '{"limit":1000}');
*/
export async function processSlicerTagBackfill(payloadJson: string): Promise<void> {
let limit = 1000;
try {
const parsed = JSON.parse(payloadJson) as { limit?: number };
if (typeof parsed.limit === "number" && parsed.limit > 0) limit = parsed.limit;
} catch {
// Use default
}
// KNOWN_TAGS = the slicer tags we know how to derive. A Package missing
// all of these is a candidate for recompute. extractSlicerTags is safe
// to run on every package (returns [] for archives with no slicer files),
// but filtering up-front avoids walking the entire DB.
const KNOWN_TAGS = ["lychee", "chitubox", "anycubic", "bambu", "fdm", "mango"];
const candidates = await db.package.findMany({
where: {
fileCount: { gt: 0 },
NOT: { tags: { hasSome: KNOWN_TAGS } },
},
select: {
id: true,
tags: true,
files: { select: { extension: true } },
},
orderBy: { createdAt: "asc" },
take: limit,
});
if (candidates.length === 0) {
log.info("Slicer tag backfill: no candidates");
return;
}
log.info({ count: candidates.length }, "Slicer tag backfill: starting");
let updated = 0;
for (const pkg of candidates) {
const fileEntries = pkg.files.map((f) => ({
path: "",
fileName: "",
extension: f.extension,
compressedSize: 0n,
uncompressedSize: 0n,
crc32: null as string | null,
}));
const slicerTags = extractSlicerTags(fileEntries);
if (slicerTags.length === 0) continue;
const merged = [...new Set([...pkg.tags, ...slicerTags])];
if (merged.length === pkg.tags.length) continue;
await db.package.update({ where: { id: pkg.id }, data: { tags: merged } });
updated++;
}
log.info({ candidates: candidates.length, updated }, "Slicer tag backfill: done");
}

View File

@@ -6,7 +6,7 @@ import { processFetchRequest } from "./worker.js";
import { processExtractRequest } from "./extract-listener.js";
import { rebuildPackageDatabase } from "./rebuild.js";
import { processManualUpload } from "./manual-upload.js";
import { processBackfillRequest } from "./backfill.js";
import { processBackfillRequest, processSlicerTagBackfill } from "./backfill.js";
import { generateInviteLink, createSupergroup, searchPublicChat } from "./tdlib/chats.js";
import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
import { triggerImmediateCycle } from "./scheduler.js";
@@ -60,6 +60,7 @@ async function connectListener(): Promise<void> {
await pgClient.query("LISTEN rebuild_packages");
await pgClient.query("LISTEN manual_upload");
await pgClient.query("LISTEN backfill_filelists");
await pgClient.query("LISTEN backfill_slicer_tags");
pgClient.on("notification", (msg) => {
if (msg.channel === "channel_fetch" && msg.payload) {
@@ -80,6 +81,8 @@ async function connectListener(): Promise<void> {
handleManualUpload(msg.payload);
} else if (msg.channel === "backfill_filelists") {
handleBackfillFilelists(msg.payload ?? "{}");
} else if (msg.channel === "backfill_slicer_tags") {
handleBackfillSlicerTags(msg.payload ?? "{}");
}
});
@@ -105,7 +108,7 @@ async function connectListener(): Promise<void> {
}
});
log.info("Fetch listener started (channel_fetch, generate_invite, create_destination, ingestion_trigger, join_channel, archive_extract, rebuild_packages, manual_upload, backfill_filelists)");
log.info("Fetch listener started (channel_fetch, generate_invite, create_destination, ingestion_trigger, join_channel, archive_extract, rebuild_packages, manual_upload, backfill_filelists, backfill_slicer_tags)");
} catch (err) {
log.error({ err }, "Failed to start fetch listener — retrying");
scheduleReconnect();
@@ -546,3 +549,16 @@ function handleBackfillFilelists(payload: string): void {
.then(() => processBackfillRequest(payload))
.catch((err) => log.error({ err, payload }, "Backfill request failed"));
}
// ── Slicer tag backfill handler ──
//
// Trigger:
// SELECT pg_notify('backfill_slicer_tags', '{"limit":1000}');
//
// Pure-DB pass over Packages that have file lists but no slicer tags.
// No downloads, no TDLib involvement — fast and safe.
function handleBackfillSlicerTags(payload: string): void {
fetchQueue = fetchQueue
.then(() => processSlicerTagBackfill(payload))
.catch((err) => log.error({ err, payload }, "Slicer tag backfill failed"));
}

View File

@@ -47,6 +47,7 @@ import { pickPreviewFile, extractPreviewImage } from "./preview/extract.js";
import { groupArchiveSets } from "./archive/multipart.js";
import type { ArchiveSet } from "./archive/multipart.js";
import { extractCreatorFromFileName, extractCreatorFromChannelTitle } from "./archive/creator.js";
import { extractSlicerTags } from "./archive/slicer-tags.js";
import { hashParts } from "./archive/hash.js";
import { readZipCentralDirectory } from "./archive/zip-reader.js";
import { readRarContents } from "./archive/rar-reader.js";
@@ -1730,6 +1731,15 @@ async function processOneArchiveSet(
tags.push(channel.category);
}
// Derive slicer tags from the file listing so users can filter the
// catalog by "what software opens these files". Tags include "lychee",
// "chitubox", "anycubic", "bambu", "fdm" etc. — only added if matching
// slicer-specific files are present in the archive.
const slicerTags = extractSlicerTags(entries);
for (const tag of slicerTags) {
if (!tags.includes(tag)) tags.push(tag);
}
stub = await createPackageStub({
contentHash,
fileName: archiveName,