feat: group merge, ZIP/reply/caption grouping, integrity audit

Group merge UI:
- Add mergeGroups query and mergeGroupsAction server action
- Add "Start Merge" / "Merge Here" buttons to group row actions
- Two-step UX: click Start on source, click Merge Here on target

ZIP path prefix grouping (Signal 7):
- Compare PackageFile.path root folders across ungrouped packages
- Auto-group if 2+ packages share the same dominant root folder

Reply chain grouping (Signal 6):
- Capture reply_to_message_id during channel scanning
- Group archives that reply to the same root message
- Add replyToMessageId field to Package schema

Caption fuzzy match grouping (Signal 8):
- Capture source caption during channel scanning
- Normalize captions (strip extensions, extract significant words)
- Group packages with matching normalized caption keys
- Add sourceCaption field to Package schema

Periodic integrity audit:
- Check multipart packages for completeness (parts vs destMessageIds)
- Detect orphaned indexes (destChannelId set but no destMessageId)
- Runs after each ingestion cycle, deduplicates notifications

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-30 14:19:36 +02:00
parent 2c46ab0843
commit 7f9a03d4ee
13 changed files with 488 additions and 3 deletions

View File

@@ -47,7 +47,7 @@ import { readRarContents } from "./archive/rar-reader.js";
import { read7zContents } from "./archive/sevenz-reader.js";
import { byteLevelSplit, concatenateFiles } from "./archive/split.js";
import { uploadToChannel } from "./upload/channel.js";
import { processAlbumGroups, processTimeWindowGroups, processPatternGroups, processCreatorGroups, type IndexedPackageRef } from "./grouping.js";
import { processAlbumGroups, processTimeWindowGroups, processPatternGroups, processCreatorGroups, processZipPathGroups, processReplyChainGroups, processCaptionGroups, type IndexedPackageRef } from "./grouping.js";
import { db } from "./db/client.js";
import type { TelegramAccount, TelegramChannel } from "@prisma/client";
import type { Client } from "tdl";
@@ -816,6 +816,15 @@ async function processArchiveSets(
// Creator-based grouping (3+ files from same creator)
await processCreatorGroups(channel.id, indexedPackageRefs);
// ZIP path prefix grouping (shared root folder inside archives)
await processZipPathGroups(channel.id, indexedPackageRefs);
// Reply chain grouping (messages replying to same root)
await processReplyChainGroups(channel.id, indexedPackageRefs);
// Caption fuzzy match grouping
await processCaptionGroups(channel.id, indexedPackageRefs);
}
return maxProcessedId;
@@ -1235,6 +1244,8 @@ async function processOneArchiveSet(
tags,
previewData,
previewMsgId,
sourceCaption: archiveSet.parts[0].caption ?? null,
replyToMessageId: archiveSet.parts[0].replyToMessageId ?? null,
files: entries,
});