feat: grouping phase 1 — schema, ungrouped tab, time-window grouping, hash verification

Schema:
- Add GroupingSource enum (ALBUM, MANUAL, AUTO_TIME, AUTO_PATTERN, etc.)
- Add groupingSource field to PackageGroup with backfill
- Add SystemNotification model for persistent alerts
- Add NotificationType and NotificationSeverity enums

Ungrouped staging tab:
- Add listUngroupedPackages/countUngroupedPackages queries
- Add "Ungrouped" tab to STL page showing packages without a group

Time-window auto-grouping:
- After album grouping, cluster ungrouped packages within configurable
  time window (default 5 min, AUTO_GROUP_TIME_WINDOW_MINUTES env var)
- Groups named from common filename prefix
- Groups created with groupingSource=AUTO_TIME

Hash verification after split:
- Re-hash split parts and compare to original contentHash
- Log error and create SystemNotification on mismatch
- Prevents silently corrupted split uploads

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-30 13:00:27 +02:00
parent 194c87a256
commit 9e78cc5d19
10 changed files with 415 additions and 5 deletions

View File

@@ -47,7 +47,8 @@ import { readRarContents } from "./archive/rar-reader.js";
import { read7zContents } from "./archive/sevenz-reader.js";
import { byteLevelSplit, concatenateFiles } from "./archive/split.js";
import { uploadToChannel } from "./upload/channel.js";
import { processAlbumGroups, type IndexedPackageRef } from "./grouping.js";
import { processAlbumGroups, processTimeWindowGroups, type IndexedPackageRef } from "./grouping.js";
import { db } from "./db/client.js";
import type { TelegramAccount, TelegramChannel } from "@prisma/client";
import type { Client } from "tdl";
@@ -790,6 +791,9 @@ async function processArchiveSets(
indexedPackageRefs,
scanResult.photos
);
// Time-window grouping for remaining ungrouped packages
await processTimeWindowGroups(channel.id, indexedPackageRefs);
}
return maxProcessedId;
@@ -1053,6 +1057,43 @@ async function processOneArchiveSet(
uploadPaths = splitPaths;
}
// ── Hash verification after split ──
// If we split/repacked, verify the split parts hash matches the original
if (splitPaths.length > 0) {
const splitHash = await hashParts(splitPaths);
if (splitHash !== contentHash) {
accountLog.error(
{ fileName: archiveName, originalHash: contentHash, splitHash, parts: splitPaths.length },
"Hash mismatch after split — file may be corrupted"
);
// Record notification for visibility
try {
await db.systemNotification.create({
data: {
type: "HASH_MISMATCH",
severity: "ERROR",
title: `Hash mismatch after splitting ${archiveName}`,
message: `Expected ${contentHash.slice(0, 16)}… but got ${splitHash.slice(0, 16)}… after splitting into ${splitPaths.length} parts`,
context: {
fileName: archiveName,
originalHash: contentHash,
splitHash,
partCount: splitPaths.length,
sourceChannelId: channel.id,
},
},
});
} catch {
// Best-effort notification
}
throw new Error(`Hash mismatch after split for ${archiveName}: expected ${contentHash}, got ${splitHash}`);
}
accountLog.debug(
{ fileName: archiveName, hash: contentHash.slice(0, 16), parts: splitPaths.length },
"Split hash verified — matches original"
);
}
// ── Uploading ──
// Check if a prior run already uploaded this file (orphaned upload scenario:
// file reached Telegram but DB write failed or worker crashed before indexing)