mirror of
https://github.com/xCyanGrizzly/DragonsStash.git
synced 2026-05-11 06:11:15 +00:00
feat: add album grouping post-processing to worker pipeline
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
79
worker/src/grouping.ts
Normal file
79
worker/src/grouping.ts
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
import type { Client } from "tdl";
|
||||||
|
import type { TelegramPhoto } from "./preview/match.js";
|
||||||
|
import { downloadPhotoThumbnail } from "./tdlib/download.js";
|
||||||
|
import { createOrFindPackageGroup, linkPackagesToGroup } from "./db/queries.js";
|
||||||
|
import { childLogger } from "./util/logger.js";
|
||||||
|
import { db } from "./db/client.js";
|
||||||
|
|
||||||
|
const log = childLogger("grouping");
|
||||||
|
|
||||||
|
export interface IndexedPackageRef {
|
||||||
|
packageId: string;
|
||||||
|
sourceMessageId: bigint;
|
||||||
|
mediaAlbumId?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* After a scan cycle's packages are individually indexed, detect album groups
|
||||||
|
* and create PackageGroup records linking the members.
|
||||||
|
*/
|
||||||
|
export async function processAlbumGroups(
|
||||||
|
client: Client,
|
||||||
|
sourceChannelId: string,
|
||||||
|
indexedPackages: IndexedPackageRef[],
|
||||||
|
photos: TelegramPhoto[]
|
||||||
|
): Promise<void> {
|
||||||
|
// Group indexed packages by mediaAlbumId
|
||||||
|
const albumMap = new Map<string, IndexedPackageRef[]>();
|
||||||
|
for (const pkg of indexedPackages) {
|
||||||
|
if (!pkg.mediaAlbumId || pkg.mediaAlbumId === "0") continue;
|
||||||
|
const group = albumMap.get(pkg.mediaAlbumId) ?? [];
|
||||||
|
group.push(pkg);
|
||||||
|
albumMap.set(pkg.mediaAlbumId, group);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (albumMap.size === 0) return;
|
||||||
|
|
||||||
|
log.info({ albumCount: albumMap.size }, "Detected album groups to process");
|
||||||
|
|
||||||
|
for (const [albumId, members] of albumMap) {
|
||||||
|
if (members.length < 2) continue;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Find the first package's fileName for the group name fallback
|
||||||
|
const firstPkg = await db.package.findFirst({
|
||||||
|
where: { id: { in: members.map((m) => m.packageId) } },
|
||||||
|
orderBy: { sourceMessageId: "asc" },
|
||||||
|
select: { id: true, fileName: true },
|
||||||
|
});
|
||||||
|
|
||||||
|
// Try to find a caption from the album's photo message
|
||||||
|
const albumPhoto = photos.find((p) => p.mediaAlbumId === albumId);
|
||||||
|
const groupName = albumPhoto?.caption || firstPkg?.fileName || "Unnamed Group";
|
||||||
|
|
||||||
|
// Download preview from album photo if available
|
||||||
|
let previewData: Buffer | null = null;
|
||||||
|
if (albumPhoto) {
|
||||||
|
previewData = await downloadPhotoThumbnail(client, albumPhoto.fileId);
|
||||||
|
}
|
||||||
|
|
||||||
|
const groupId = await createOrFindPackageGroup({
|
||||||
|
mediaAlbumId: albumId,
|
||||||
|
sourceChannelId,
|
||||||
|
name: groupName,
|
||||||
|
previewData,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Idempotent link — safe to re-run if some packages were indexed in prior scans
|
||||||
|
const packageIds = members.map((m) => m.packageId);
|
||||||
|
await linkPackagesToGroup(packageIds, groupId);
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
{ albumId, groupId, groupName, memberCount: packageIds.length },
|
||||||
|
"Linked packages to album group"
|
||||||
|
);
|
||||||
|
} catch (err) {
|
||||||
|
log.warn({ albumId, err }, "Failed to create album group — packages still indexed individually");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -47,6 +47,7 @@ import { readRarContents } from "./archive/rar-reader.js";
|
|||||||
import { read7zContents } from "./archive/sevenz-reader.js";
|
import { read7zContents } from "./archive/sevenz-reader.js";
|
||||||
import { byteLevelSplit, concatenateFiles } from "./archive/split.js";
|
import { byteLevelSplit, concatenateFiles } from "./archive/split.js";
|
||||||
import { uploadToChannel } from "./upload/channel.js";
|
import { uploadToChannel } from "./upload/channel.js";
|
||||||
|
import { processAlbumGroups, type IndexedPackageRef } from "./grouping.js";
|
||||||
import type { TelegramAccount, TelegramChannel } from "@prisma/client";
|
import type { TelegramAccount, TelegramChannel } from "@prisma/client";
|
||||||
import type { Client } from "tdl";
|
import type { Client } from "tdl";
|
||||||
|
|
||||||
@@ -722,10 +723,11 @@ async function processArchiveSets(
|
|||||||
|
|
||||||
// Track the highest message ID that was successfully processed
|
// Track the highest message ID that was successfully processed
|
||||||
let maxProcessedId: bigint | null = null;
|
let maxProcessedId: bigint | null = null;
|
||||||
|
const indexedPackageRefs: IndexedPackageRef[] = [];
|
||||||
|
|
||||||
for (let setIdx = 0; setIdx < archiveSets.length; setIdx++) {
|
for (let setIdx = 0; setIdx < archiveSets.length; setIdx++) {
|
||||||
try {
|
try {
|
||||||
await processOneArchiveSet(
|
const packageId = await processOneArchiveSet(
|
||||||
ctx,
|
ctx,
|
||||||
archiveSets[setIdx],
|
archiveSets[setIdx],
|
||||||
setIdx,
|
setIdx,
|
||||||
@@ -734,6 +736,15 @@ async function processArchiveSets(
|
|||||||
ingestionRunId
|
ingestionRunId
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if (packageId) {
|
||||||
|
const firstPart = archiveSets[setIdx].parts[0];
|
||||||
|
indexedPackageRefs.push({
|
||||||
|
packageId,
|
||||||
|
sourceMessageId: firstPart.id,
|
||||||
|
mediaAlbumId: firstPart.mediaAlbumId,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Set completed (ingested or confirmed duplicate) — advance watermark
|
// Set completed (ingested or confirmed duplicate) — advance watermark
|
||||||
const setMaxId = archiveSets[setIdx].parts.reduce(
|
const setMaxId = archiveSets[setIdx].parts.reduce(
|
||||||
(max, p) => (p.id > max ? p.id : max),
|
(max, p) => (p.id > max ? p.id : max),
|
||||||
@@ -771,6 +782,16 @@ async function processArchiveSets(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Post-processing: group packages by Telegram album ID
|
||||||
|
if (indexedPackageRefs.length > 0) {
|
||||||
|
await processAlbumGroups(
|
||||||
|
ctx.client,
|
||||||
|
channel.id,
|
||||||
|
indexedPackageRefs,
|
||||||
|
scanResult.photos
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
return maxProcessedId;
|
return maxProcessedId;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -784,7 +805,7 @@ async function processOneArchiveSet(
|
|||||||
totalSets: number,
|
totalSets: number,
|
||||||
previewMatches: Map<string, { id: bigint; fileId: string }>,
|
previewMatches: Map<string, { id: bigint; fileId: string }>,
|
||||||
ingestionRunId: string
|
ingestionRunId: string
|
||||||
): Promise<void> {
|
): Promise<string | null> {
|
||||||
const {
|
const {
|
||||||
client, runId, channelTitle, channel,
|
client, runId, channelTitle, channel,
|
||||||
destChannelTelegramId, destChannelId,
|
destChannelTelegramId, destChannelId,
|
||||||
@@ -814,7 +835,7 @@ async function processOneArchiveSet(
|
|||||||
totalFiles: totalSets,
|
totalFiles: totalSets,
|
||||||
zipsDuplicate: counters.zipsDuplicate,
|
zipsDuplicate: counters.zipsDuplicate,
|
||||||
});
|
});
|
||||||
return;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Size guard: skip archives that exceed WORKER_MAX_ZIP_SIZE_MB ──
|
// ── Size guard: skip archives that exceed WORKER_MAX_ZIP_SIZE_MB ──
|
||||||
@@ -848,7 +869,7 @@ async function processOneArchiveSet(
|
|||||||
partCount: archiveSet.parts.length,
|
partCount: archiveSet.parts.length,
|
||||||
accountId: ctx.accountId,
|
accountId: ctx.accountId,
|
||||||
});
|
});
|
||||||
return;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const tempPaths: string[] = [];
|
const tempPaths: string[] = [];
|
||||||
@@ -954,7 +975,7 @@ async function processOneArchiveSet(
|
|||||||
totalFiles: totalSets,
|
totalFiles: totalSets,
|
||||||
zipsDuplicate: counters.zipsDuplicate,
|
zipsDuplicate: counters.zipsDuplicate,
|
||||||
});
|
});
|
||||||
return;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Reading metadata ──
|
// ── Reading metadata ──
|
||||||
@@ -1127,7 +1148,7 @@ async function processOneArchiveSet(
|
|||||||
tags.push(channel.category);
|
tags.push(channel.category);
|
||||||
}
|
}
|
||||||
|
|
||||||
await createPackageWithFiles({
|
const pkg = await createPackageWithFiles({
|
||||||
contentHash,
|
contentHash,
|
||||||
fileName: archiveName,
|
fileName: archiveName,
|
||||||
fileSize: totalSize,
|
fileSize: totalSize,
|
||||||
@@ -1166,6 +1187,8 @@ async function processOneArchiveSet(
|
|||||||
{ fileName: archiveName, contentHash, fileCount: entries.length, creator },
|
{ fileName: archiveName, contentHash, fileCount: entries.length, creator },
|
||||||
"Archive ingested"
|
"Archive ingested"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
return pkg.id;
|
||||||
} finally {
|
} finally {
|
||||||
// ALWAYS delete temp files and the set directory
|
// ALWAYS delete temp files and the set directory
|
||||||
await deleteFiles([...tempPaths, ...splitPaths]);
|
await deleteFiles([...tempPaths, ...splitPaths]);
|
||||||
|
|||||||
Reference in New Issue
Block a user