mirror of
https://github.com/xCyanGrizzly/DragonsStash.git
synced 2026-05-11 06:11:15 +00:00
feat: complete remaining features — training, FTS, bot groups, repair, re-tag
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
Manual override training (GroupingRule): - Learn patterns from manual group creation (common filename prefix or creator) - Apply learned rules as first auto-grouping pass (highest confidence after albums) - GroupingRule model stores pattern, channel, signal type, confidence Hash verification after upload: - Re-hash upload files on disk before indexing to catch disk corruption - Creates HASH_MISMATCH notification on discrepancy Grouping conflict detection: - After all grouping passes, check if grouped packages match rules from different groups - Creates GROUPING_CONFLICT notification for manual review Per-channel grouping flags: - Add autoGroupEnabled boolean to TelegramChannel (default true) - Auto-grouping passes (all except album) gated behind this flag - Album grouping always runs as it reflects Telegram's native behavior Full-text search (tsvector): - Add searchVector tsvector column with GIN index and auto-update trigger - Backfill 1870 existing packages - FTS with ts_rank for ranked results, ILIKE fallback for short/failed queries - Applied to both web app and bot search Bot group awareness: - /group <query> — view group info or search groups by name - /sendgroup <id> — send all packages in a group to linked Telegram account Bulk repair: - repairPackageAction clears dest info and resets watermark for re-processing - Repair button in notification bell for MISSING_PART and HASH_MISMATCH alerts - /api/notifications/repair endpoint Retroactive category re-tagging: - When channel category changes, auto-update tags on all existing packages - Removes old category tag, adds new one Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -617,7 +617,7 @@ export async function createAutoGroup(input: {
|
||||
sourceChannelId: string;
|
||||
name: string;
|
||||
packageIds: string[];
|
||||
groupingSource: "AUTO_TIME" | "AUTO_PATTERN" | "AUTO_ZIP" | "AUTO_CAPTION" | "AUTO_REPLY";
|
||||
groupingSource: "ALBUM" | "MANUAL" | "AUTO_TIME" | "AUTO_PATTERN" | "AUTO_ZIP" | "AUTO_CAPTION" | "AUTO_REPLY";
|
||||
}): Promise<string> {
|
||||
const group = await db.packageGroup.create({
|
||||
data: {
|
||||
|
||||
@@ -79,6 +79,69 @@ export async function processAlbumGroups(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply learned GroupingRules from manual overrides.
|
||||
* For each rule, find ungrouped packages whose fileName contains the pattern.
|
||||
*/
|
||||
export async function processRuleBasedGroups(
|
||||
sourceChannelId: string,
|
||||
indexedPackages: IndexedPackageRef[]
|
||||
): Promise<void> {
|
||||
const rules = await db.groupingRule.findMany({
|
||||
where: { sourceChannelId },
|
||||
orderBy: { confidence: "desc" },
|
||||
});
|
||||
|
||||
if (rules.length === 0) return;
|
||||
|
||||
const ungrouped = await db.package.findMany({
|
||||
where: {
|
||||
id: { in: indexedPackages.map((p) => p.packageId) },
|
||||
packageGroupId: null,
|
||||
},
|
||||
select: { id: true, fileName: true, creator: true },
|
||||
});
|
||||
|
||||
if (ungrouped.length < 2) return;
|
||||
|
||||
for (const rule of rules) {
|
||||
const matches = ungrouped.filter((pkg) => {
|
||||
const lower = rule.pattern.toLowerCase();
|
||||
return pkg.fileName.toLowerCase().includes(lower) ||
|
||||
(pkg.creator && pkg.creator.toLowerCase().includes(lower));
|
||||
});
|
||||
|
||||
if (matches.length < 2) continue;
|
||||
|
||||
// Check if any are already grouped (by a previous rule in this loop)
|
||||
const stillUngrouped = await db.package.findMany({
|
||||
where: {
|
||||
id: { in: matches.map((m) => m.id) },
|
||||
packageGroupId: null,
|
||||
},
|
||||
select: { id: true },
|
||||
});
|
||||
|
||||
if (stillUngrouped.length < 2) continue;
|
||||
|
||||
try {
|
||||
const groupId = await createAutoGroup({
|
||||
sourceChannelId,
|
||||
name: rule.pattern,
|
||||
packageIds: stillUngrouped.map((m) => m.id),
|
||||
groupingSource: "MANUAL",
|
||||
});
|
||||
|
||||
log.info(
|
||||
{ groupId, ruleId: rule.id, pattern: rule.pattern, memberCount: stillUngrouped.length },
|
||||
"Applied learned grouping rule"
|
||||
);
|
||||
} catch (err) {
|
||||
log.warn({ err, ruleId: rule.id }, "Failed to apply grouping rule");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* After album grouping, cluster remaining ungrouped packages from the same channel
|
||||
* that were posted within a configurable time window.
|
||||
@@ -525,6 +588,64 @@ function extractRootFolder(paths: string[]): string | null {
|
||||
return maxSegment;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect packages that could have been grouped differently.
|
||||
* Checks if any grouped package's filename matches a GroupingRule
|
||||
* that would place it in a different group.
|
||||
*/
|
||||
export async function detectGroupingConflicts(
|
||||
sourceChannelId: string,
|
||||
indexedPackages: IndexedPackageRef[]
|
||||
): Promise<void> {
|
||||
const rules = await db.groupingRule.findMany({
|
||||
where: { sourceChannelId },
|
||||
});
|
||||
if (rules.length === 0) return;
|
||||
|
||||
const grouped = await db.package.findMany({
|
||||
where: {
|
||||
id: { in: indexedPackages.map((p) => p.packageId) },
|
||||
packageGroupId: { not: null },
|
||||
},
|
||||
select: {
|
||||
id: true,
|
||||
fileName: true,
|
||||
packageGroupId: true,
|
||||
packageGroup: { select: { name: true, groupingSource: true } },
|
||||
},
|
||||
});
|
||||
|
||||
for (const pkg of grouped) {
|
||||
for (const rule of rules) {
|
||||
if (pkg.fileName.toLowerCase().includes(rule.pattern.toLowerCase())) {
|
||||
// Check if the rule's source group is different from current group
|
||||
if (rule.createdByGroupId && rule.createdByGroupId !== pkg.packageGroupId) {
|
||||
try {
|
||||
await db.systemNotification.create({
|
||||
data: {
|
||||
type: "GROUPING_CONFLICT",
|
||||
severity: "INFO",
|
||||
title: `Potential grouping conflict: ${pkg.fileName}`,
|
||||
message: `Grouped by ${pkg.packageGroup?.groupingSource ?? "unknown"} into "${pkg.packageGroup?.name}", but also matches rule "${rule.pattern}" from a different manual group`,
|
||||
context: {
|
||||
packageId: pkg.id,
|
||||
fileName: pkg.fileName,
|
||||
currentGroupId: pkg.packageGroupId,
|
||||
matchedRuleId: rule.id,
|
||||
matchedPattern: rule.pattern,
|
||||
},
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
// Best-effort
|
||||
}
|
||||
break; // One notification per package
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the longest common prefix among a list of filenames,
|
||||
* trimming trailing separators and partial words.
|
||||
|
||||
@@ -47,7 +47,7 @@ import { readRarContents } from "./archive/rar-reader.js";
|
||||
import { read7zContents } from "./archive/sevenz-reader.js";
|
||||
import { byteLevelSplit, concatenateFiles } from "./archive/split.js";
|
||||
import { uploadToChannel } from "./upload/channel.js";
|
||||
import { processAlbumGroups, processTimeWindowGroups, processPatternGroups, processCreatorGroups, processZipPathGroups, processReplyChainGroups, processCaptionGroups, type IndexedPackageRef } from "./grouping.js";
|
||||
import { processAlbumGroups, processRuleBasedGroups, processTimeWindowGroups, processPatternGroups, processCreatorGroups, processZipPathGroups, processReplyChainGroups, processCaptionGroups, detectGroupingConflicts, type IndexedPackageRef } from "./grouping.js";
|
||||
import { db } from "./db/client.js";
|
||||
import type { TelegramAccount, TelegramChannel } from "@prisma/client";
|
||||
import type { Client } from "tdl";
|
||||
@@ -808,23 +808,37 @@ async function processArchiveSets(
|
||||
scanResult.photos
|
||||
);
|
||||
|
||||
// Time-window grouping for remaining ungrouped packages
|
||||
await processTimeWindowGroups(channel.id, indexedPackageRefs);
|
||||
// Auto-grouping passes (gated by per-channel flag)
|
||||
const channelRecord = await db.telegramChannel.findUnique({
|
||||
where: { id: channel.id },
|
||||
select: { autoGroupEnabled: true },
|
||||
});
|
||||
|
||||
// Pattern-based grouping (date patterns, project slugs)
|
||||
await processPatternGroups(channel.id, indexedPackageRefs);
|
||||
if (channelRecord?.autoGroupEnabled !== false) {
|
||||
// Learned rule-based grouping (from manual overrides)
|
||||
await processRuleBasedGroups(channel.id, indexedPackageRefs);
|
||||
|
||||
// Creator-based grouping (3+ files from same creator)
|
||||
await processCreatorGroups(channel.id, indexedPackageRefs);
|
||||
// Time-window grouping for remaining ungrouped packages
|
||||
await processTimeWindowGroups(channel.id, indexedPackageRefs);
|
||||
|
||||
// ZIP path prefix grouping (shared root folder inside archives)
|
||||
await processZipPathGroups(channel.id, indexedPackageRefs);
|
||||
// Pattern-based grouping (date patterns, project slugs)
|
||||
await processPatternGroups(channel.id, indexedPackageRefs);
|
||||
|
||||
// Reply chain grouping (messages replying to same root)
|
||||
await processReplyChainGroups(channel.id, indexedPackageRefs);
|
||||
// Creator-based grouping (3+ files from same creator)
|
||||
await processCreatorGroups(channel.id, indexedPackageRefs);
|
||||
|
||||
// Caption fuzzy match grouping
|
||||
await processCaptionGroups(channel.id, indexedPackageRefs);
|
||||
// ZIP path prefix grouping (shared root folder inside archives)
|
||||
await processZipPathGroups(channel.id, indexedPackageRefs);
|
||||
|
||||
// Reply chain grouping (messages replying to same root)
|
||||
await processReplyChainGroups(channel.id, indexedPackageRefs);
|
||||
|
||||
// Caption fuzzy match grouping
|
||||
await processCaptionGroups(channel.id, indexedPackageRefs);
|
||||
}
|
||||
|
||||
// Check for potential grouping conflicts
|
||||
await detectGroupingConflicts(channel.id, indexedPackageRefs);
|
||||
}
|
||||
|
||||
return maxProcessedId;
|
||||
@@ -1162,6 +1176,34 @@ async function processOneArchiveSet(
|
||||
);
|
||||
}
|
||||
|
||||
// ── Post-upload integrity check ──
|
||||
// Verify the files on disk still match before we index
|
||||
if (uploadPaths.length > 0 && !existingUpload) {
|
||||
try {
|
||||
const postUploadHash = await hashParts(uploadPaths);
|
||||
if (splitPaths.length > 0) {
|
||||
// Split files — hash should match the split hash (already verified above)
|
||||
// No additional check needed since we verified split hash = original hash
|
||||
} else if (postUploadHash !== contentHash) {
|
||||
accountLog.error(
|
||||
{ fileName: archiveName, originalHash: contentHash, postUploadHash },
|
||||
"Hash changed between hashing and upload — possible disk corruption"
|
||||
);
|
||||
await db.systemNotification.create({
|
||||
data: {
|
||||
type: "HASH_MISMATCH",
|
||||
severity: "ERROR",
|
||||
title: `Post-upload hash mismatch: ${archiveName}`,
|
||||
message: `Hash changed between download and upload. Original: ${contentHash.slice(0, 16)}…, post-upload: ${postUploadHash.slice(0, 16)}…`,
|
||||
context: { fileName: archiveName, originalHash: contentHash, postUploadHash, sourceChannelId: channel.id },
|
||||
},
|
||||
});
|
||||
}
|
||||
} catch {
|
||||
// Best-effort — don't fail the ingestion
|
||||
}
|
||||
}
|
||||
|
||||
// ── Preview thumbnail ──
|
||||
let previewData: Buffer | null = null;
|
||||
let previewMsgId: bigint | null = null;
|
||||
|
||||
Reference in New Issue
Block a user