mirror of
https://github.com/xCyanGrizzly/DragonsStash.git
synced 2026-06-09 18:51:16 +00:00
feat(worker): auto-tag packages with the slicer(s) their files target
Indexes 86k+ Lychee Slicer (.lys/.lyt), 23k+ ChituBox (.chitubox/.ctb/
.cbddlp), 1k+ Anycubic (.photon/.pwmo/.pwmx), and Bambu (.3mf)
slicer-specific files. Until now they were just generic extensions in
PackageFile.
After this commit:
- Newly-ingested packages get tags derived from their file list
("lychee", "chitubox", "anycubic", "bambu", "fdm", "mango")
- The `backfill_filelists` listener also applies tags to re-indexed
packages
- A new pure-DB listener `backfill_slicer_tags` walks existing
Packages with file lists and applies tags retroactively — no
downloads, no TDLib, takes seconds for thousands of rows.
Trigger the one-shot retroactive backfill with:
SELECT pg_notify('backfill_slicer_tags', '{"limit":5000}');
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
58
worker/src/archive/slicer-tags.ts
Normal file
58
worker/src/archive/slicer-tags.ts
Normal file
@@ -0,0 +1,58 @@
|
||||
import type { FileEntry } from "./zip-reader.js";
|
||||
|
||||
/**
|
||||
* Mapping from file extensions to slicer tags. Each tag groups a family of
|
||||
* extensions that mean the same thing for end users — "this archive contains
|
||||
* files I can open in <slicer>".
|
||||
*
|
||||
* Extensions are matched case-insensitively without the leading dot.
|
||||
*/
|
||||
const SLICER_EXTENSION_MAP: Record<string, string> = {
|
||||
// Lychee Slicer
|
||||
lys: "lychee",
|
||||
lyt: "lychee",
|
||||
lyc: "lychee",
|
||||
|
||||
// ChituBox / Anycubic / Phrozen / Elegoo (resin printers)
|
||||
chitubox: "chitubox",
|
||||
ctb: "chitubox",
|
||||
cbddlp: "chitubox",
|
||||
|
||||
// Anycubic Photon family
|
||||
photon: "anycubic",
|
||||
pwmo: "anycubic",
|
||||
pwmx: "anycubic",
|
||||
pwmb: "anycubic",
|
||||
pwma: "anycubic",
|
||||
pws: "anycubic",
|
||||
pwsq: "anycubic",
|
||||
phz: "anycubic",
|
||||
|
||||
// Bambu / Prusa
|
||||
"3mf": "bambu",
|
||||
bgcode: "bambu",
|
||||
|
||||
// FDM gcode (generic)
|
||||
gcode: "fdm",
|
||||
|
||||
// Mango / generic resin formats sometimes seen in releases
|
||||
mfp: "mango",
|
||||
mfpv: "mango",
|
||||
osla: "mango",
|
||||
};
|
||||
|
||||
/**
|
||||
* Derive a deduplicated list of slicer tags from an archive's file listing.
|
||||
* Returns an empty array if no recognised slicer-specific files are present
|
||||
* (e.g., the archive is just STLs without pre-supports).
|
||||
*/
|
||||
export function extractSlicerTags(entries: FileEntry[]): string[] {
|
||||
const tags = new Set<string>();
|
||||
for (const entry of entries) {
|
||||
if (!entry.extension) continue;
|
||||
const ext = entry.extension.toLowerCase();
|
||||
const tag = SLICER_EXTENSION_MAP[ext];
|
||||
if (tag) tags.add(tag);
|
||||
}
|
||||
return [...tags].sort();
|
||||
}
|
||||
@@ -10,6 +10,7 @@ import { getActiveAccounts } from "./db/queries.js";
|
||||
import { readZipCentralDirectory } from "./archive/zip-reader.js";
|
||||
import { readRarContents } from "./archive/rar-reader.js";
|
||||
import { read7zContents } from "./archive/sevenz-reader.js";
|
||||
import { extractSlicerTags } from "./archive/slicer-tags.js";
|
||||
import type { FileEntry } from "./archive/zip-reader.js";
|
||||
|
||||
const log = childLogger("backfill");
|
||||
@@ -228,6 +229,10 @@ async function processOnePackage(
|
||||
return;
|
||||
}
|
||||
|
||||
// Also derive slicer tags from the file list so the backfilled packages
|
||||
// gain the same search/filter context as newly-ingested ones.
|
||||
const slicerTags = extractSlicerTags(entries);
|
||||
|
||||
// Write everything in a single transaction so a partial backfill never
|
||||
// leaves the Package half-indexed.
|
||||
await db.$transaction(async (tx) => {
|
||||
@@ -235,7 +240,7 @@ async function processOnePackage(
|
||||
// have backfilled this package between our read and write.
|
||||
const current = await tx.package.findUnique({
|
||||
where: { id: pkg.id },
|
||||
select: { fileCount: true },
|
||||
select: { fileCount: true, tags: true },
|
||||
});
|
||||
if (current && current.fileCount > 0) {
|
||||
log.debug({ ...ctx, existingFileCount: current.fileCount }, "Already backfilled by another worker — skipping");
|
||||
@@ -254,9 +259,15 @@ async function processOnePackage(
|
||||
crc32: e.crc32,
|
||||
})),
|
||||
});
|
||||
|
||||
// Merge slicer tags with whatever's already on the Package (preserve
|
||||
// channel category, manual tags, etc.).
|
||||
const existingTags = current?.tags ?? [];
|
||||
const mergedTags = [...new Set([...existingTags, ...slicerTags])];
|
||||
|
||||
await tx.package.update({
|
||||
where: { id: pkg.id },
|
||||
data: { fileCount: entries.length },
|
||||
data: { fileCount: entries.length, tags: mergedTags },
|
||||
});
|
||||
});
|
||||
|
||||
@@ -265,3 +276,68 @@ async function processOnePackage(
|
||||
await rm(tempDir, { recursive: true, force: true }).catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cheap pure-DB backfill: walk Packages that already have PackageFile rows
|
||||
* but no slicer tags, recompute the tags from their extensions, and merge
|
||||
* with the existing tag list. No downloads, no TDLib.
|
||||
*
|
||||
* Trigger:
|
||||
* SELECT pg_notify('backfill_slicer_tags', '{"limit":1000}');
|
||||
*/
|
||||
export async function processSlicerTagBackfill(payloadJson: string): Promise<void> {
|
||||
let limit = 1000;
|
||||
try {
|
||||
const parsed = JSON.parse(payloadJson) as { limit?: number };
|
||||
if (typeof parsed.limit === "number" && parsed.limit > 0) limit = parsed.limit;
|
||||
} catch {
|
||||
// Use default
|
||||
}
|
||||
|
||||
// KNOWN_TAGS = the slicer tags we know how to derive. A Package missing
|
||||
// all of these is a candidate for recompute. extractSlicerTags is safe
|
||||
// to run on every package (returns [] for archives with no slicer files),
|
||||
// but filtering up-front avoids walking the entire DB.
|
||||
const KNOWN_TAGS = ["lychee", "chitubox", "anycubic", "bambu", "fdm", "mango"];
|
||||
|
||||
const candidates = await db.package.findMany({
|
||||
where: {
|
||||
fileCount: { gt: 0 },
|
||||
NOT: { tags: { hasSome: KNOWN_TAGS } },
|
||||
},
|
||||
select: {
|
||||
id: true,
|
||||
tags: true,
|
||||
files: { select: { extension: true } },
|
||||
},
|
||||
orderBy: { createdAt: "asc" },
|
||||
take: limit,
|
||||
});
|
||||
|
||||
if (candidates.length === 0) {
|
||||
log.info("Slicer tag backfill: no candidates");
|
||||
return;
|
||||
}
|
||||
|
||||
log.info({ count: candidates.length }, "Slicer tag backfill: starting");
|
||||
|
||||
let updated = 0;
|
||||
for (const pkg of candidates) {
|
||||
const fileEntries = pkg.files.map((f) => ({
|
||||
path: "",
|
||||
fileName: "",
|
||||
extension: f.extension,
|
||||
compressedSize: 0n,
|
||||
uncompressedSize: 0n,
|
||||
crc32: null as string | null,
|
||||
}));
|
||||
const slicerTags = extractSlicerTags(fileEntries);
|
||||
if (slicerTags.length === 0) continue;
|
||||
const merged = [...new Set([...pkg.tags, ...slicerTags])];
|
||||
if (merged.length === pkg.tags.length) continue;
|
||||
await db.package.update({ where: { id: pkg.id }, data: { tags: merged } });
|
||||
updated++;
|
||||
}
|
||||
|
||||
log.info({ candidates: candidates.length, updated }, "Slicer tag backfill: done");
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ import { processFetchRequest } from "./worker.js";
|
||||
import { processExtractRequest } from "./extract-listener.js";
|
||||
import { rebuildPackageDatabase } from "./rebuild.js";
|
||||
import { processManualUpload } from "./manual-upload.js";
|
||||
import { processBackfillRequest } from "./backfill.js";
|
||||
import { processBackfillRequest, processSlicerTagBackfill } from "./backfill.js";
|
||||
import { generateInviteLink, createSupergroup, searchPublicChat } from "./tdlib/chats.js";
|
||||
import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js";
|
||||
import { triggerImmediateCycle } from "./scheduler.js";
|
||||
@@ -60,6 +60,7 @@ async function connectListener(): Promise<void> {
|
||||
await pgClient.query("LISTEN rebuild_packages");
|
||||
await pgClient.query("LISTEN manual_upload");
|
||||
await pgClient.query("LISTEN backfill_filelists");
|
||||
await pgClient.query("LISTEN backfill_slicer_tags");
|
||||
|
||||
pgClient.on("notification", (msg) => {
|
||||
if (msg.channel === "channel_fetch" && msg.payload) {
|
||||
@@ -80,6 +81,8 @@ async function connectListener(): Promise<void> {
|
||||
handleManualUpload(msg.payload);
|
||||
} else if (msg.channel === "backfill_filelists") {
|
||||
handleBackfillFilelists(msg.payload ?? "{}");
|
||||
} else if (msg.channel === "backfill_slicer_tags") {
|
||||
handleBackfillSlicerTags(msg.payload ?? "{}");
|
||||
}
|
||||
});
|
||||
|
||||
@@ -105,7 +108,7 @@ async function connectListener(): Promise<void> {
|
||||
}
|
||||
});
|
||||
|
||||
log.info("Fetch listener started (channel_fetch, generate_invite, create_destination, ingestion_trigger, join_channel, archive_extract, rebuild_packages, manual_upload, backfill_filelists)");
|
||||
log.info("Fetch listener started (channel_fetch, generate_invite, create_destination, ingestion_trigger, join_channel, archive_extract, rebuild_packages, manual_upload, backfill_filelists, backfill_slicer_tags)");
|
||||
} catch (err) {
|
||||
log.error({ err }, "Failed to start fetch listener — retrying");
|
||||
scheduleReconnect();
|
||||
@@ -546,3 +549,16 @@ function handleBackfillFilelists(payload: string): void {
|
||||
.then(() => processBackfillRequest(payload))
|
||||
.catch((err) => log.error({ err, payload }, "Backfill request failed"));
|
||||
}
|
||||
|
||||
// ── Slicer tag backfill handler ──
|
||||
//
|
||||
// Trigger:
|
||||
// SELECT pg_notify('backfill_slicer_tags', '{"limit":1000}');
|
||||
//
|
||||
// Pure-DB pass over Packages that have file lists but no slicer tags.
|
||||
// No downloads, no TDLib involvement — fast and safe.
|
||||
function handleBackfillSlicerTags(payload: string): void {
|
||||
fetchQueue = fetchQueue
|
||||
.then(() => processSlicerTagBackfill(payload))
|
||||
.catch((err) => log.error({ err, payload }, "Slicer tag backfill failed"));
|
||||
}
|
||||
|
||||
@@ -47,6 +47,7 @@ import { pickPreviewFile, extractPreviewImage } from "./preview/extract.js";
|
||||
import { groupArchiveSets } from "./archive/multipart.js";
|
||||
import type { ArchiveSet } from "./archive/multipart.js";
|
||||
import { extractCreatorFromFileName, extractCreatorFromChannelTitle } from "./archive/creator.js";
|
||||
import { extractSlicerTags } from "./archive/slicer-tags.js";
|
||||
import { hashParts } from "./archive/hash.js";
|
||||
import { readZipCentralDirectory } from "./archive/zip-reader.js";
|
||||
import { readRarContents } from "./archive/rar-reader.js";
|
||||
@@ -1730,6 +1731,15 @@ async function processOneArchiveSet(
|
||||
tags.push(channel.category);
|
||||
}
|
||||
|
||||
// Derive slicer tags from the file listing so users can filter the
|
||||
// catalog by "what software opens these files". Tags include "lychee",
|
||||
// "chitubox", "anycubic", "bambu", "fdm" etc. — only added if matching
|
||||
// slicer-specific files are present in the archive.
|
||||
const slicerTags = extractSlicerTags(entries);
|
||||
for (const tag of slicerTags) {
|
||||
if (!tags.includes(tag)) tags.push(tag);
|
||||
}
|
||||
|
||||
stub = await createPackageStub({
|
||||
contentHash,
|
||||
fileName: archiveName,
|
||||
|
||||
Reference in New Issue
Block a user