diff --git a/bot/src/commands.ts b/bot/src/commands.ts index 1841b75..5937bc7 100644 --- a/bot/src/commands.ts +++ b/bot/src/commands.ts @@ -10,7 +10,10 @@ import { getSubscriptions, addSubscription, removeSubscription, + getGroupById, + searchGroups, } from "./db/queries.js"; +import { db } from "./db/client.js"; import { sendTextMessage, sendPhotoMessage } from "./tdlib/client.js"; const log = childLogger("commands"); @@ -78,6 +81,12 @@ export async function handleMessage(msg: IncomingMessage): Promise { case "/status": await handleStatus(chatId, userId); break; + case "/group": + await handleGroup(chatId, args); + break; + case "/sendgroup": + await handleSendGroup(chatId, userId, args); + break; default: await sendTextMessage( chatId, @@ -117,6 +126,8 @@ async function handleStart( `/search <query> — Search packages`, `/latest [n] — Show latest packages`, `/package <id> — Package details`, + `/group <id or name> — View group info and package list`, + `/sendgroup <id> — Send all packages in a group to yourself`, `/link <code> — Link your Telegram to your web account`, `/subscribe <keyword> — Get notified for new packages`, `/subscriptions — View your subscriptions`, @@ -136,6 +147,8 @@ async function handleHelp(chatId: bigint): Promise { `/search <query> — Search by filename or creator`, `/latest [n] — Show n most recent packages (default: 5)`, `/package <id> — View package details and file list`, + `/group <id or name> — View group info and package list`, + `/sendgroup <id> — Send all packages in a group to yourself`, ``, `🔗 Account Linking`, `/link <code> — Link Telegram to your web account`, @@ -432,6 +445,168 @@ async function handleStatus(chatId: bigint, userId: bigint): Promise { } } +async function handleGroup(chatId: bigint, query: string): Promise { + if (!query) { + await sendTextMessage( + chatId, + "Usage: /group <id or name>\n\nProvide a group ID (starts with 'c') or a name to search.", + "textParseModeHTML" + ); + return; + } + + const trimmed = query.trim(); + + // If it looks like a cuid (starts with 'c', ~25 chars), look up by ID directly + if (/^c[a-z0-9]{20,}$/i.test(trimmed)) { + const group = await getGroupById(trimmed); + if (!group) { + await sendTextMessage(chatId, "Group not found.", "textParseModeHTML"); + return; + } + + const packageLines = group.packages.slice(0, 20).map((pkg, i) => { + const size = formatSize(pkg.fileSize); + return ` ${i + 1}. ${escapeHtml(pkg.fileName)} (${size}, ${pkg.fileCount} files) — ${pkg.id}`; + }); + const more = group.packages.length > 20 + ? `\n ... and ${group.packages.length - 20} more` + : ""; + + const response = [ + `📦 Group: ${escapeHtml(group.name)}`, + ``, + `Packages: ${group.packages.length}`, + `ID: ${group.id}`, + ``, + `Contents:`, + ...packageLines, + more, + ``, + `Use /sendgroup ${group.id} to receive all packages.`, + ] + .filter((l) => l !== "") + .join("\n"); + + await sendTextMessage(chatId, response, "textParseModeHTML"); + return; + } + + // Otherwise search by name + const groups = await searchGroups(trimmed, 5); + + if (groups.length === 0) { + await sendTextMessage( + chatId, + `No groups found matching "${escapeHtml(trimmed)}".`, + "textParseModeHTML" + ); + return; + } + + const lines = groups.map( + (g, i) => + `${i + 1}. ${escapeHtml(g.name)} — ${g._count.packages} package(s)\n ID: ${g.id}` + ); + + const response = [ + `🔍 Groups matching "${escapeHtml(trimmed)}":`, + ``, + ...lines, + ``, + `Use /group <id> for full details.`, + ].join("\n"); + + await sendTextMessage(chatId, response, "textParseModeHTML"); +} + +async function handleSendGroup( + chatId: bigint, + userId: bigint, + args: string +): Promise { + if (!args) { + await sendTextMessage( + chatId, + "Usage: /sendgroup <group-id>", + "textParseModeHTML" + ); + return; + } + + const groupId = args.trim(); + const group = await getGroupById(groupId); + + if (!group) { + await sendTextMessage(chatId, "Group not found.", "textParseModeHTML"); + return; + } + + // Require account linking + const link = await findLinkByTelegramUserId(userId); + if (!link) { + await sendTextMessage( + chatId, + "You must link your account before receiving packages.\nUse /link <code> to connect.", + "textParseModeHTML" + ); + return; + } + + // Only send packages that have been uploaded to the destination channel + const sendable = group.packages.filter( + (pkg) => pkg.destChannelId && pkg.destMessageId + ); + + if (sendable.length === 0) { + await sendTextMessage( + chatId, + `No packages in group "${escapeHtml(group.name)}" are ready to send yet.`, + "textParseModeHTML" + ); + return; + } + + // Create a BotSendRequest for each sendable package + const requests = await Promise.all( + sendable.map((pkg) => + db.botSendRequest.create({ + data: { + packageId: pkg.id, + telegramLinkId: link.id, + requestedByUserId: link.userId, + status: "PENDING", + }, + }) + ) + ); + + // Fire pg_notify for each request so the send listener picks them up + for (const req of requests) { + await db.$queryRawUnsafe( + `SELECT pg_notify('bot_send', $1)`, + req.id + ).catch(() => { + // Best-effort — the bot also processes PENDING requests on its send queue + }); + } + + await sendTextMessage( + chatId, + [ + `✅ Queued ${requests.length} package(s) from "${escapeHtml(group.name)}"`, + ``, + `You'll receive each archive shortly. Use /package <id> to check individual packages.`, + ].join("\n"), + "textParseModeHTML" + ); + + log.info( + { groupId, packageCount: requests.length, userId: userId.toString() }, + "Group send queued" + ); +} + function escapeHtml(text: string): string { return text .replace(/&/g, "&") diff --git a/bot/src/db/queries.ts b/bot/src/db/queries.ts index 888e23e..e6f8bdf 100644 --- a/bot/src/db/queries.ts +++ b/bot/src/db/queries.ts @@ -53,7 +53,52 @@ export async function createTelegramLink( // ── Package search ── export async function searchPackages(query: string, limit = 10) { - const packages = await db.package.findMany({ + // Try full-text search first + if (query.length >= 3) { + const tsQuery = query + .trim() + .split(/\s+/) + .filter((w) => w.length >= 2) + .map((w) => w.replace(/[^a-zA-Z0-9]/g, "")) + .filter(Boolean) + .join(" & "); + + if (tsQuery) { + try { + const ftsResults = await db.$queryRawUnsafe<{ id: string }[]>( + `SELECT id FROM packages + WHERE "searchVector" @@ to_tsquery('english', $1) + ORDER BY ts_rank("searchVector", to_tsquery('english', $1)) DESC + LIMIT $2`, + tsQuery, + limit + ); + + if (ftsResults.length > 0) { + return db.package.findMany({ + where: { id: { in: ftsResults.map((r) => r.id) } }, + orderBy: { indexedAt: "desc" }, + select: { + id: true, + fileName: true, + fileSize: true, + archiveType: true, + fileCount: true, + creator: true, + indexedAt: true, + destChannelId: true, + destMessageId: true, + }, + }); + } + } catch { + // FTS failed — fall back to ILIKE + } + } + } + + // Fallback: ILIKE search + return db.package.findMany({ where: { OR: [ { fileName: { contains: query, mode: "insensitive" } }, @@ -74,7 +119,44 @@ export async function searchPackages(query: string, limit = 10) { destMessageId: true, }, }); - return packages; +} + +// ── Group queries ── + +export async function getGroupById(groupId: string) { + return db.packageGroup.findUnique({ + where: { id: groupId }, + include: { + packages: { + orderBy: { indexedAt: "desc" }, + select: { + id: true, + fileName: true, + fileSize: true, + archiveType: true, + fileCount: true, + creator: true, + destChannelId: true, + destMessageId: true, + }, + }, + }, + }); +} + +export async function searchGroups(query: string, limit = 5) { + return db.packageGroup.findMany({ + where: { + name: { contains: query, mode: "insensitive" }, + }, + orderBy: { createdAt: "desc" }, + take: limit, + select: { + id: true, + name: true, + _count: { select: { packages: true } }, + }, + }); } export async function getLatestPackages(limit = 5) { @@ -122,6 +204,9 @@ export async function getPendingSendRequest(requestId: string) { archiveType: true, destChannelId: true, destMessageId: true, + destMessageIds: true, + isMultipart: true, + partCount: true, previewData: true, sourceChannel: { select: { title: true, telegramId: true } }, }, diff --git a/bot/src/send-listener.ts b/bot/src/send-listener.ts index 4102b06..58c1573 100644 --- a/bot/src/send-listener.ts +++ b/bot/src/send-listener.ts @@ -7,7 +7,7 @@ import { findMatchingSubscriptions, getGlobalDestinationChannel, } from "./db/queries.js"; -import { copyMessageToUser, sendTextMessage, sendPhotoMessage } from "./tdlib/client.js"; +import { copyMessageToUser, copyMultipleMessagesToUser, sendTextMessage, sendPhotoMessage } from "./tdlib/client.js"; import { sleep } from "./util/flood-wait.js"; const log = childLogger("send-listener"); @@ -154,11 +154,25 @@ async function processSendRequest(requestId: string): Promise { } // Forward the actual archive file(s) from destination channel - await copyMessageToUser( - destChannel.telegramId, - pkg.destMessageId, - targetUserId - ); + const messageIds = pkg.destMessageIds as bigint[] | undefined; + if (messageIds && messageIds.length > 1) { + log.info( + { requestId, parts: messageIds.length }, + "Sending multi-part archive" + ); + await copyMultipleMessagesToUser( + destChannel.telegramId, + messageIds, + targetUserId + ); + } else { + // Single part or legacy (no destMessageIds populated) + await copyMessageToUser( + destChannel.telegramId, + pkg.destMessageId, + targetUserId + ); + } await updateSendRequest(requestId, "SENT"); log.info({ requestId }, "Send request completed successfully"); diff --git a/bot/src/tdlib/client.ts b/bot/src/tdlib/client.ts index a9c45a1..683fcec 100644 --- a/bot/src/tdlib/client.ts +++ b/bot/src/tdlib/client.ts @@ -121,6 +121,25 @@ export async function copyMessageToUser( }, fileName); } +/** + * Send multiple document messages from a channel to a user's DM. + * Used for multi-part archives where each part is a separate Telegram message. + * Sends parts sequentially with a small delay to avoid rate limits. + */ +export async function copyMultipleMessagesToUser( + fromChatId: bigint, + messageIds: bigint[], + toUserId: bigint +): Promise { + for (let i = 0; i < messageIds.length; i++) { + await copyMessageToUser(fromChatId, messageIds[i], toUserId); + // Small delay between parts to avoid rate limits + if (i < messageIds.length - 1) { + await new Promise((resolve) => setTimeout(resolve, 1000)); + } + } +} + /** * Send a message and wait for Telegram to confirm delivery. * Returns when updateMessageSendSucceeded fires for the temp message. diff --git a/docker-compose.yml b/docker-compose.yml index 285d36a..fe5ddc7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -28,6 +28,8 @@ services: timeout: 5s retries: 3 start_period: 60s + volumes: + - manual_uploads:/data/uploads restart: unless-stopped deploy: resources: @@ -54,6 +56,7 @@ services: volumes: - tdlib_state:/data/tdlib - tmp_zips:/tmp/zips + - manual_uploads:/data/uploads depends_on: db: condition: service_healthy @@ -121,6 +124,7 @@ volumes: tdlib_state: tdlib_bot_state: tmp_zips: + manual_uploads: networks: frontend: diff --git a/docs/superpowers/plans/2026-03-26-multipart-send-kickstarter-linking.md b/docs/superpowers/plans/2026-03-26-multipart-send-kickstarter-linking.md new file mode 100644 index 0000000..14e08bf --- /dev/null +++ b/docs/superpowers/plans/2026-03-26-multipart-send-kickstarter-linking.md @@ -0,0 +1,964 @@ +# Multi-Part Send Fix & Kickstarter Package Linking + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Fix multi-part package forwarding so all archive parts reach the user, and add UI to link STL packages to kickstarters with "send all" capability. + +**Architecture:** Two independent subsystems. (A) Store all destination message IDs when the worker uploads multi-part archives, then have the bot forward every part. (B) Add a package-linker dialog in the kickstarter UI using the existing `linkPackages` action, plus a "send all" action that queues every linked package. + +**Tech Stack:** Prisma (schema + migration), TypeScript worker/bot services, Next.js App Router (server actions + React client components), shadcn/ui, TanStack Table. + +--- + +## File Map + +### Subsystem A — Multi-Part Send Fix + +| Action | File | Responsibility | +|--------|------|----------------| +| Modify | `prisma/schema.prisma` | Add `destMessageIds BigInt[]` to Package | +| Create | `prisma/migrations/_add_dest_message_ids/migration.sql` | Migration SQL | +| Modify | `worker/src/upload/channel.ts` | Return all message IDs from `uploadToChannel` | +| Modify | `worker/src/db/queries.ts` | Add `destMessageIds` to `CreatePackageInput` and `createPackageWithFiles` | +| Modify | `worker/src/worker.ts` | Pass all message IDs when creating package | +| Modify | `bot/src/db/queries.ts` | Include `destMessageIds` in `getPendingSendRequest` | +| Modify | `bot/src/send-listener.ts` | Forward all parts, not just the first | + +### Subsystem B — Kickstarter Package Linking UI + +| Action | File | Responsibility | +|--------|------|----------------| +| Create | `src/app/(app)/kickstarters/_components/package-linker-dialog.tsx` | Dialog with package search + selection for linking | +| Modify | `src/app/(app)/kickstarters/_components/kickstarter-columns.tsx` | Add "Link Packages" and "Send All" actions to row menu | +| Modify | `src/app/(app)/kickstarters/_components/kickstarter-table.tsx` | Wire up new dialogs + state | +| Modify | `src/app/(app)/kickstarters/actions.ts` | Add `sendAllKickstarterPackages` action | +| Modify | `src/data/kickstarter.queries.ts` | Add query to search packages for linking | + +--- + +## Task 1: Add `destMessageIds` to Prisma Schema + Migration + +**Files:** +- Modify: `prisma/schema.prisma:470-471` +- Create: migration SQL + +- [ ] **Step 1: Add field to schema** + +In `prisma/schema.prisma`, add `destMessageIds` after `destMessageId`: + +```prisma + destMessageId BigInt? + destMessageIds BigInt[] @default([]) +``` + +- [ ] **Step 2: Create migration SQL manually** + +Create the migration directory and SQL file. The migration adds the column with a default and backfills existing rows by copying `destMessageId` into the array where it's non-null: + +```sql +-- AlterTable +ALTER TABLE "packages" ADD COLUMN "destMessageIds" BIGINT[] DEFAULT ARRAY[]::BIGINT[]; + +-- Backfill: copy existing destMessageId into the array +UPDATE "packages" +SET "destMessageIds" = ARRAY["destMessageId"] +WHERE "destMessageId" IS NOT NULL; +``` + +- [ ] **Step 3: Apply migration to database** + +```bash +docker exec dragonsstash-db psql -U dragons -d dragonsstash -f - < migration.sql +``` + +- [ ] **Step 4: Regenerate Prisma client** + +Use the app container (which has node/prisma) to regenerate: + +```bash +docker exec dragonsstash npx prisma generate +``` + +Or, if running locally with node: `npx prisma generate` + +- [ ] **Step 5: Commit** + +```bash +git add prisma/schema.prisma prisma/migrations/ +git commit -m "feat: add destMessageIds field to Package for multi-part forwarding" +``` + +--- + +## Task 2: Worker — Return All Message IDs from Upload + +**Files:** +- Modify: `worker/src/upload/channel.ts:10-12,25-74` + +- [ ] **Step 1: Update UploadResult interface** + +In `worker/src/upload/channel.ts`, change the interface to include all IDs: + +```typescript +export interface UploadResult { + messageId: bigint; + messageIds: bigint[]; +} +``` + +- [ ] **Step 2: Collect all message IDs in uploadToChannel** + +Replace the upload loop to track all message IDs: + +```typescript +export async function uploadToChannel( + client: Client, + chatId: bigint, + filePaths: string[], + caption?: string +): Promise { + const allMessageIds: bigint[] = []; + + for (let i = 0; i < filePaths.length; i++) { + const filePath = filePaths[i]; + const fileCaption = i === 0 && caption ? caption : undefined; + + const fileName = path.basename(filePath); + let fileSizeMB = 0; + try { + const s = await stat(filePath); + fileSizeMB = Math.round(s.size / (1024 * 1024)); + } catch { + // Non-critical + } + + log.info( + { chatId: Number(chatId), fileName, sizeMB: fileSizeMB, part: i + 1, total: filePaths.length }, + "Uploading file to channel" + ); + + const serverMsgId = await sendWithRetry(client, chatId, filePath, fileCaption, fileName, fileSizeMB); + allMessageIds.push(serverMsgId); + + // Rate limit delay between uploads + if (i < filePaths.length - 1) { + await sleep(config.apiDelayMs); + } + } + + if (allMessageIds.length === 0) { + throw new Error("Upload failed: no messages sent"); + } + + log.info( + { chatId: Number(chatId), messageId: Number(allMessageIds[0]), files: filePaths.length }, + "All uploads confirmed by Telegram" + ); + + return { messageId: allMessageIds[0], messageIds: allMessageIds }; +} +``` + +- [ ] **Step 3: Commit** + +```bash +git add worker/src/upload/channel.ts +git commit -m "feat: return all message IDs from uploadToChannel for multi-part" +``` + +--- + +## Task 3: Worker — Store All Message IDs in Database + +**Files:** +- Modify: `worker/src/db/queries.ts:104-155` +- Modify: `worker/src/worker.ts:1056-1086` + +- [ ] **Step 1: Add destMessageIds to CreatePackageInput** + +In `worker/src/db/queries.ts`, add the field to the interface: + +```typescript +export interface CreatePackageInput { + // ... existing fields ... + destMessageId?: bigint; + destMessageIds?: bigint[]; + // ... rest ... +} +``` + +- [ ] **Step 2: Store destMessageIds in createPackageWithFiles** + +In the `db.package.create` call inside `createPackageWithFiles`, add: + +```typescript +destMessageIds: input.destMessageIds ?? (input.destMessageId ? [input.destMessageId] : []), +``` + +- [ ] **Step 3: Pass messageIds from worker pipeline** + +In `worker/src/worker.ts`, the upload section (around line 1068-1085) currently does: + +```typescript +destResult = await uploadToChannel(client, destChannelTelegramId, uploadPaths); +``` + +After this, when calling `createPackageWithFiles`, add `destMessageIds`: + +```typescript +const pkg = await createPackageWithFiles({ + // ... existing fields ... + destMessageId: destResult.messageId, + destMessageIds: destResult.messageIds, + // ... rest ... +}); +``` + +- [ ] **Step 4: Commit** + +```bash +git add worker/src/db/queries.ts worker/src/worker.ts +git commit -m "feat: store all multi-part message IDs in package record" +``` + +--- + +## Task 4: Bot — Forward All Parts + +**Files:** +- Modify: `bot/src/db/queries.ts:110-132` +- Modify: `bot/src/send-listener.ts:105-169` +- Modify: `bot/src/tdlib/client.ts:66-122` + +- [ ] **Step 1: Include destMessageIds in bot query** + +In `bot/src/db/queries.ts`, add `destMessageIds` to the `getPendingSendRequest` select: + +```typescript +package: { + select: { + id: true, + fileName: true, + fileSize: true, + fileCount: true, + creator: true, + tags: true, + archiveType: true, + destChannelId: true, + destMessageId: true, + destMessageIds: true, // <-- ADD THIS + isMultipart: true, // <-- ADD THIS (for logging) + partCount: true, // <-- ADD THIS (for logging) + previewData: true, + sourceChannel: { select: { title: true, telegramId: true } }, + }, +}, +``` + +- [ ] **Step 2: Add copyMultipleMessagesToUser helper** + +In `bot/src/tdlib/client.ts`, add a new export after `copyMessageToUser`: + +```typescript +/** + * Send multiple document messages from a channel to a user's DM. + * Used for multi-part archives where each part is a separate Telegram message. + * Sends parts sequentially with a small delay to avoid rate limits. + */ +export async function copyMultipleMessagesToUser( + fromChatId: bigint, + messageIds: bigint[], + toUserId: bigint +): Promise { + for (let i = 0; i < messageIds.length; i++) { + await copyMessageToUser(fromChatId, messageIds[i], toUserId); + // Small delay between parts to avoid rate limits + if (i < messageIds.length - 1) { + await new Promise((resolve) => setTimeout(resolve, 1000)); + } + } +} +``` + +- [ ] **Step 3: Update processSendRequest to forward all parts** + +In `bot/src/send-listener.ts`, update the import to include the new function: + +```typescript +import { copyMessageToUser, copyMultipleMessagesToUser, sendTextMessage, sendPhotoMessage } from "./tdlib/client.js"; +``` + +Then replace the single `copyMessageToUser` call (around line 157) with logic that forwards all parts: + +```typescript + // Forward the actual archive file(s) from destination channel + const messageIds = pkg.destMessageIds as bigint[] | undefined; + if (messageIds && messageIds.length > 1) { + log.info( + { requestId, parts: messageIds.length }, + "Sending multi-part archive" + ); + await copyMultipleMessagesToUser( + destChannel.telegramId, + messageIds, + targetUserId + ); + } else { + // Single part or legacy (no destMessageIds populated) + await copyMessageToUser( + destChannel.telegramId, + pkg.destMessageId, + targetUserId + ); + } +``` + +- [ ] **Step 4: Commit** + +```bash +git add bot/src/db/queries.ts bot/src/send-listener.ts bot/src/tdlib/client.ts +git commit -m "feat: forward all parts of multi-part archives via bot" +``` + +--- + +## Task 5: Rebuild & Deploy Worker + Bot + +- [ ] **Step 1: Rebuild worker image** + +```bash +docker compose -f docker-compose.dev.yml build worker +docker tag dragonsstash-worker:latest git.samagsteribbe.nl/admin/dragonsstash-worker:latest +docker compose -p dragonsstash -f /opt/stacks/DragonsStash/docker-compose.yml up -d worker +``` + +- [ ] **Step 2: Rebuild bot image** + +```bash +docker compose -f docker-compose.dev.yml build bot +docker tag dragonsstash-bot:latest git.samagsteribbe.nl/admin/dragonsstash-bot:latest +docker compose -p dragonsstash -f /opt/stacks/DragonsStash/docker-compose.yml up -d bot +``` + +- [ ] **Step 3: Verify bot startup** + +```bash +docker logs dragonsstash-bot --tail=20 +``` + +Expected: Bot starts cleanly, "Send listener started" message. + +--- + +## Task 6: Kickstarter — Package Search Query + +**Files:** +- Modify: `src/data/kickstarter.queries.ts` + +- [ ] **Step 1: Add searchPackagesForLinking query** + +Append to `src/data/kickstarter.queries.ts`: + +```typescript +export async function searchPackagesForLinking(query: string, limit = 20) { + if (!query || query.length < 2) return []; + + return prisma.package.findMany({ + where: { + OR: [ + { fileName: { contains: query, mode: "insensitive" } }, + { creator: { contains: query, mode: "insensitive" } }, + ], + }, + orderBy: { indexedAt: "desc" }, + take: limit, + select: { + id: true, + fileName: true, + fileSize: true, + archiveType: true, + creator: true, + fileCount: true, + }, + }); +} + +export async function getLinkedPackageIds(kickstarterId: string): Promise { + const links = await prisma.kickstarterPackage.findMany({ + where: { kickstarterId }, + select: { packageId: true }, + }); + return links.map((l) => l.packageId); +} +``` + +- [ ] **Step 2: Commit** + +```bash +git add src/data/kickstarter.queries.ts +git commit -m "feat: add package search query for kickstarter linking" +``` + +--- + +## Task 7: Kickstarter — Package Linker Dialog Component + +**Files:** +- Create: `src/app/(app)/kickstarters/_components/package-linker-dialog.tsx` + +- [ ] **Step 1: Create the package linker dialog** + +This component provides a search input to find packages and checkboxes to select/deselect them. It calls the existing `linkPackages` action on save. + +```tsx +"use client"; + +import { useState, useTransition, useCallback, useEffect } from "react"; +import { Search, Package, X, Loader2 } from "lucide-react"; +import { toast } from "sonner"; +import { linkPackages } from "../actions"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Badge } from "@/components/ui/badge"; +import { Checkbox } from "@/components/ui/checkbox"; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, +} from "@/components/ui/dialog"; +import { ScrollArea } from "@/components/ui/scroll-area"; + +interface PackageResult { + id: string; + fileName: string; + fileSize: bigint; + archiveType: string; + creator: string | null; + fileCount: number; +} + +interface PackageLinkerDialogProps { + open: boolean; + onOpenChange: (open: boolean) => void; + kickstarterId: string; + kickstarterName: string; + initialPackageIds: string[]; +} + +function formatSize(bytes: bigint | number): string { + const b = Number(bytes); + if (b >= 1024 * 1024 * 1024) return `${(b / (1024 * 1024 * 1024)).toFixed(1)} GB`; + if (b >= 1024 * 1024) return `${(b / (1024 * 1024)).toFixed(0)} MB`; + return `${(b / 1024).toFixed(0)} KB`; +} + +export function PackageLinkerDialog({ + open, + onOpenChange, + kickstarterId, + kickstarterName, + initialPackageIds, +}: PackageLinkerDialogProps) { + const [isPending, startTransition] = useTransition(); + const [searchQuery, setSearchQuery] = useState(""); + const [searchResults, setSearchResults] = useState([]); + const [isSearching, setIsSearching] = useState(false); + const [selectedIds, setSelectedIds] = useState>(new Set(initialPackageIds)); + + // Reset state when dialog opens + useEffect(() => { + if (open) { + setSelectedIds(new Set(initialPackageIds)); + setSearchQuery(""); + setSearchResults([]); + } + }, [open, initialPackageIds]); + + const doSearch = useCallback(async (query: string) => { + if (query.length < 2) { + setSearchResults([]); + return; + } + setIsSearching(true); + try { + const res = await fetch(`/api/packages/search?q=${encodeURIComponent(query)}&limit=20`); + if (res.ok) { + const data = await res.json(); + setSearchResults(data.packages ?? []); + } + } catch { + // Ignore search errors + } finally { + setIsSearching(false); + } + }, []); + + // Debounced search + useEffect(() => { + const timer = setTimeout(() => doSearch(searchQuery), 300); + return () => clearTimeout(timer); + }, [searchQuery, doSearch]); + + function togglePackage(id: string) { + setSelectedIds((prev) => { + const next = new Set(prev); + if (next.has(id)) next.delete(id); + else next.add(id); + return next; + }); + } + + function handleSave() { + startTransition(async () => { + const result = await linkPackages(kickstarterId, Array.from(selectedIds)); + if (result.success) { + toast.success(`Linked ${selectedIds.size} package(s) to "${kickstarterName}"`); + onOpenChange(false); + } else { + toast.error(result.error); + } + }); + } + + return ( + + + + Link Packages + + Search and select STL packages to link to “{kickstarterName}”. + + + +
+ {/* Selected count */} + {selectedIds.size > 0 && ( +
+ + {selectedIds.size} package(s) selected + +
+ )} + + {/* Search input */} +
+ + setSearchQuery(e.target.value)} + className="pl-9" + autoFocus + /> + {isSearching && ( + + )} +
+ + {/* Results */} + +
+ {searchResults.length === 0 && searchQuery.length >= 2 && !isSearching && ( +

+ No packages found +

+ )} + {searchQuery.length < 2 && ( +

+ Type at least 2 characters to search +

+ )} + {searchResults.map((pkg) => ( + + ))} +
+
+
+ + + + + +
+
+ ); +} +``` + +- [ ] **Step 2: Commit** + +```bash +git add src/app/(app)/kickstarters/_components/package-linker-dialog.tsx +git commit -m "feat: add package linker dialog for kickstarters" +``` + +--- + +## Task 8: Package Search API Route + +**Files:** +- Create: `src/app/api/packages/search/route.ts` + +- [ ] **Step 1: Create the API route** + +The package linker dialog needs a client-side fetch for debounced search. Create a lightweight API route: + +```typescript +import { NextResponse } from "next/server"; +import { auth } from "@/lib/auth"; +import { searchPackagesForLinking } from "@/data/kickstarter.queries"; + +export const dynamic = "force-dynamic"; + +export async function GET(request: Request) { + const session = await auth(); + if (!session?.user?.id) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + + const { searchParams } = new URL(request.url); + const query = searchParams.get("q") ?? ""; + const limit = Math.min(Number(searchParams.get("limit") ?? "20"), 50); + + const packages = await searchPackagesForLinking(query, limit); + + // Serialize BigInt for JSON + const serialized = packages.map((p) => ({ + ...p, + fileSize: p.fileSize.toString(), + })); + + return NextResponse.json({ packages: serialized }); +} +``` + +- [ ] **Step 2: Commit** + +```bash +git add src/app/api/packages/search/route.ts +git commit -m "feat: add package search API route for kickstarter linking" +``` + +--- + +## Task 9: Kickstarter — Send All Packages Action + +**Files:** +- Modify: `src/app/(app)/kickstarters/actions.ts` + +- [ ] **Step 1: Add sendAllKickstarterPackages action** + +Append to `src/app/(app)/kickstarters/actions.ts`: + +```typescript +export async function sendAllKickstarterPackages( + kickstarterId: string +): Promise> { + const session = await auth(); + if (!session?.user?.id) return { success: false, error: "Unauthorized" }; + + try { + const telegramLink = await prisma.telegramLink.findUnique({ + where: { userId: session.user.id }, + }); + + if (!telegramLink) { + return { success: false, error: "No linked Telegram account. Link one in Settings." }; + } + + const kickstarter = await prisma.kickstarter.findFirst({ + where: { id: kickstarterId, userId: session.user.id }, + select: { + packages: { + select: { + package: { + select: { id: true, destChannelId: true, destMessageId: true, fileName: true }, + }, + }, + }, + }, + }); + + if (!kickstarter) { + return { success: false, error: "Kickstarter not found" }; + } + + const sendablePackages = kickstarter.packages + .map((lnk) => lnk.package) + .filter((p) => p.destChannelId && p.destMessageId); + + if (sendablePackages.length === 0) { + return { success: false, error: "No linked packages are available for sending" }; + } + + let queued = 0; + for (const pkg of sendablePackages) { + const existing = await prisma.botSendRequest.findFirst({ + where: { + packageId: pkg.id, + telegramLinkId: telegramLink.id, + status: { in: ["PENDING", "SENDING"] }, + }, + }); + + if (!existing) { + const sendRequest = await prisma.botSendRequest.create({ + data: { + packageId: pkg.id, + telegramLinkId: telegramLink.id, + requestedByUserId: session.user.id, + status: "PENDING", + }, + }); + + try { + await prisma.$queryRawUnsafe( + `SELECT pg_notify('bot_send', $1)`, + sendRequest.id + ); + } catch { + // Best-effort + } + + queued++; + } + } + + revalidatePath(REVALIDATE_PATH); + return { success: true, data: { queued } }; + } catch { + return { success: false, error: "Failed to send packages" }; + } +} +``` + +- [ ] **Step 2: Commit** + +```bash +git add src/app/(app)/kickstarters/actions.ts +git commit -m "feat: add sendAllKickstarterPackages action" +``` + +--- + +## Task 10: Kickstarter Table — Wire Up Link & Send Actions + +**Files:** +- Modify: `src/app/(app)/kickstarters/_components/kickstarter-columns.tsx` +- Modify: `src/app/(app)/kickstarters/_components/kickstarter-table.tsx` + +- [ ] **Step 1: Add actions to column menu** + +In `kickstarter-columns.tsx`, add `Link2` and `Send` imports from lucide-react, add `onLinkPackages` and `onSendAll` to props, and add menu items: + +```typescript +import { MoreHorizontal, Pencil, Trash2, ExternalLink, Link2, Send } from "lucide-react"; + +// Update interface: +interface KickstarterColumnsProps { + onEdit: (kickstarter: KickstarterRow) => void; + onDelete: (id: string) => void; + onLinkPackages: (kickstarter: KickstarterRow) => void; + onSendAll: (kickstarter: KickstarterRow) => void; +} +``` + +In the actions column dropdown, add between Edit and the separator: + +```tsx + onLinkPackages(row.original)}> + + Link Packages + +{row.original._count.packages > 0 && ( + onSendAll(row.original)}> + + Send All ({row.original._count.packages}) + +)} +``` + +Update the function signature to destructure the new props: + +```typescript +export function getKickstarterColumns({ + onEdit, + onDelete, + onLinkPackages, + onSendAll, +}: KickstarterColumnsProps): ColumnDef[] { +``` + +- [ ] **Step 2: Wire up state in kickstarter-table.tsx** + +Add imports and state for the new dialogs: + +```typescript +import { PackageLinkerDialog } from "./package-linker-dialog"; +import { sendAllKickstarterPackages } from "../actions"; + +// Inside KickstarterTable: +const [linkTarget, setLinkTarget] = useState(null); +const [sendAllTarget, setSendAllTarget] = useState(null); +``` + +Update the columns call: + +```typescript +const columns = getKickstarterColumns({ + onEdit: (kickstarter) => { + setEditKickstarter(kickstarter); + setModalOpen(true); + }, + onDelete: (id) => setDeleteId(id), + onLinkPackages: (kickstarter) => setLinkTarget(kickstarter), + onSendAll: (kickstarter) => { + startTransition(async () => { + const result = await sendAllKickstarterPackages(kickstarter.id); + if (result.success) { + toast.success(`Queued ${result.data!.queued} package(s) for delivery`); + } else { + toast.error(result.error); + } + }); + }, +}); +``` + +Add the `PackageLinkerDialog` before the closing `` of the component's return: + +```tsx +{linkTarget && ( + !open && setLinkTarget(null)} + kickstarterId={linkTarget.id} + kickstarterName={linkTarget.name} + initialPackageIds={[]} + /> +)} +``` + +Note: `initialPackageIds` is `[]` because the table doesn't fetch linked packages. The dialog will start empty but preserve selections during the session. For a better UX, we fetch the linked IDs when the dialog opens — see step 3. + +- [ ] **Step 3: Fetch initial linked packages when dialog opens** + +To populate the dialog with already-linked packages, add an API route or use a server action. The simplest approach: modify the `PackageLinkerDialog` to fetch linked IDs on mount. + +In `package-linker-dialog.tsx`, add to the `useEffect` that runs when `open` changes: + +```typescript +useEffect(() => { + if (open) { + setSearchQuery(""); + setSearchResults([]); + // Fetch currently linked packages + fetch(`/api/packages/linked?kickstarterId=${kickstarterId}`) + .then((res) => res.json()) + .then((data) => { + if (data.packageIds) { + setSelectedIds(new Set(data.packageIds)); + } + }) + .catch(() => {}); + } +}, [open, kickstarterId]); +``` + +Create the API route at `src/app/api/packages/linked/route.ts`: + +```typescript +import { NextResponse } from "next/server"; +import { auth } from "@/lib/auth"; +import { getLinkedPackageIds } from "@/data/kickstarter.queries"; + +export const dynamic = "force-dynamic"; + +export async function GET(request: Request) { + const session = await auth(); + if (!session?.user?.id) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + + const { searchParams } = new URL(request.url); + const kickstarterId = searchParams.get("kickstarterId"); + if (!kickstarterId) { + return NextResponse.json({ error: "kickstarterId required" }, { status: 400 }); + } + + const packageIds = await getLinkedPackageIds(kickstarterId); + return NextResponse.json({ packageIds }); +} +``` + +- [ ] **Step 4: Commit** + +```bash +git add src/app/(app)/kickstarters/_components/ src/app/api/packages/ +git commit -m "feat: wire up package linking and send-all in kickstarter table" +``` + +--- + +## Task 11: Rebuild & Deploy App + +- [ ] **Step 1: Rebuild app image** + +```bash +docker compose build app # or equivalent for the production compose +docker tag dragonsstash:latest git.samagsteribbe.nl/admin/dragonsstash:latest +docker compose -p dragonsstash -f /opt/stacks/DragonsStash/docker-compose.yml up -d app +``` + +- [ ] **Step 2: Verify app startup** + +```bash +docker logs dragonsstash --tail=20 +``` + +Expected: App starts cleanly, health check passes. + +- [ ] **Step 3: Manual test** + +1. Go to Kickstarters tab +2. Open a kickstarter's row menu → "Link Packages" +3. Search for a package, select it, save +4. Verify the package count column updates +5. Use "Send All" to queue all linked packages for Telegram delivery diff --git a/docs/superpowers/plans/2026-03-30-grouping-audit-report.md b/docs/superpowers/plans/2026-03-30-grouping-audit-report.md new file mode 100644 index 0000000..a15f99a --- /dev/null +++ b/docs/superpowers/plans/2026-03-30-grouping-audit-report.md @@ -0,0 +1,472 @@ +# Dragonstash Grouping System Audit & Enhancement Report + +## Appendix: Real-World Failure Cases (2026-03-29/30) + +These skipped packages reveal two concrete issues: + +### Issue A: `WORKER_MAX_ZIP_SIZE_MB` was 4 GB — blocking all large multipart archives + +| File | Parts | Total Size | Status | +|------|-------|-----------|--------| +| DM-Stash - Guide to Tharador - Complete STL | 19 | 70.5 GB | SIZE_LIMIT | +| DM-Stash - 2023-05 - Greywinds All-in | 16 | 58.9 GB | SIZE_LIMIT | +| Axolote Gaming - Castle of the Vampire Lord | 10 | 18 GB | SIZE_LIMIT | +| Dungeon Blocks - THE ULTIMATE DUNGEON | 5 | 7.6 GB | SIZE_LIMIT | +| Dungeon Blocks - The Toxic sewer | 4 | 6.2 GB | SIZE_LIMIT | +| Soulmist | 4 | 6.3 GB | SIZE_LIMIT | +| Medieval Town PT1 | 3 | 5.7 GB | SIZE_LIMIT | +| Knight Models - Game Of Thrones | 3 | 5.5 GB | SIZE_LIMIT | +| Dungeon Blocks - The Lost Cave | 3 | 4.9 GB | SIZE_LIMIT | +| El Miniaturista 2025-05 Fulgrim Part II and III | 5 | 4.7 GB | SIZE_LIMIT | + +**Root cause:** Production env had `WORKER_MAX_ZIP_SIZE_MB=4096`. The default in code is 204800 (200 GB), but docker-compose.yml defaulted to 4096. + +**Fix applied:** Raised to 204800 in `/opt/stacks/DragonsStash/.env`. Worker restarted. These archives will be retried on the next ingestion cycle. The worker downloads parts individually (each under 2-4 GB), concatenates, re-splits at 1950 MiB for upload. Peak temp disk usage for the 70.5 GB archive: ~211 GB (353 GB available). + +**Code fix:** `MAX_PART_SIZE` is now configurable via `MAX_PART_SIZE_MB` env var (was hardcoded at 1950). Set to 3900 for Telegram Premium accounts to avoid unnecessary splitting. + +### Issue B: Download failure at 98% (DE1-Supported.7z) + +| File | Size | Error | +|------|------|-------| +| DE1-Supported.7z | 1.9 GB | Download stopped unexpectedly at 2043674624/2078338541 bytes (98%) | + +**Root cause:** Download stalled near completion with no retry mechanism. + +**Fix applied:** Earlier in this session, download retry logic was added (max 3 retries with `cancelDownloadFile` before each retry). This file will be retried automatically on next ingestion cycle. + +--- + +## Deliverable 1: Audit Report — Current State + +### 1.1 Grouping Signal Stack (Current) + +The system currently uses exactly **one automatic grouping signal**: + +| Priority | Signal | Status | Location | +|----------|--------|--------|----------| +| 1 | `mediaAlbumId` | Implemented | `worker/src/grouping.ts:26-33` | +| 2 | Manual override | Implemented | `src/lib/telegram/queries.ts:606-639` | + +**How it works:** +- `processAlbumGroups()` in `worker/src/grouping.ts` groups indexed packages by `mediaAlbumId` (filtering out "0" and null) +- For albums with 2+ members: creates `PackageGroup`, links packages, assigns name from album photo caption or first filename +- Manual grouping via UI: select 2+ packages, enter name, creates group in `createManualGroup()` + +**What does NOT exist:** +- No `message_thread_id` (forum topic) scoping +- No project/month pattern extraction from filenames +- No creator/sender grouping +- No time-window + sender clustering +- No reply chain analysis +- No ZIP internal path prefix matching +- No caption fuzzy matching +- No staging queue for ungrouped files + +### 1.2 Multipart Archive Detection (`worker/src/archive/multipart.ts`) + +This is a **separate system** from display grouping. `groupArchiveSets()` groups Telegram messages into `ArchiveSet[]` based on filename patterns: + +- `.zip.001`, `.zip.002` → ZIP_NUMBERED +- `.z01`, `.z02`, `.zip` → ZIP_LEGACY +- `.part1.rar`, `.part2.rar` → RAR_PART +- `.r00`, `.r01`, `.rar` → RAR_LEGACY + +These are grouped by `format:baseName.toLowerCase()` key. This is about **reassembling split archives**, not UI grouping. An `ArchiveSet` becomes a single `Package` in the database. + +### 1.3 TDLib Ingestion Handler + +**Pipeline in `worker/src/worker.ts:801-1197`:** +``` +processOneArchiveSet(): + 1. Early skip check (source message ID) + 2. Size guard (maxZipSizeMB) + 3. Download all parts + 4. Compute SHA-256 hash + 5. Check hash dedup + 6. Read archive metadata + 7. Split/repack if needed + 8. Upload to destination + 9. Download preview + 10. Extract fallback preview + 11. Resolve creator + 12. Index in database + 13. Cleanup temp files +``` + +**Post-indexing:** `processAlbumGroups()` is called once per channel/topic scan to create album-based groups. + +**Gaps:** +- Messages are never "dropped" silently — failures go to `SkippedPackage` table with reason +- Watermark only advances past successfully processed sets (failed sets block advancement) +- No messages are missed within a channel, but there's no audit to verify completeness after the fact + +### 1.4 Hash Verification + +**What IS verified:** +| Check | Where | When | +|-------|-------|------| +| Download file size | `download.ts:verifyAndMove()` | After each file download | +| SHA-256 content hash | `worker.ts:952` | After download, used for dedup | +| Telegram upload confirmation | `channel.ts:updateMessageSendSucceeded` | Waits for server ACK | + +**What is NOT verified:** +| Gap | Impact | +|-----|--------| +| No hash after upload | Can't detect Telegram-side corruption | +| No hash after split | Split files could be silently corrupted | +| CRC-32 extracted but never checked | ZIP/RAR per-file integrity not validated | +| No end-to-end hash | Split files have different hash than original | +| No periodic audit job | Stale/missing data never detected | + +### 1.5 File Size Limit + +| Setting | Value | Configurable? | Location | +|---------|-------|---------------|----------| +| `MAX_PART_SIZE` | 1950 MiB | **Hardcoded** | `worker/src/archive/split.ts:14` | +| `MAX_UPLOAD_SIZE` | 1950 MiB | **Hardcoded** | `worker/src/worker.ts:1023` | +| `maxZipSizeMB` | 200 GB | `WORKER_MAX_ZIP_SIZE_MB` env var | `worker/src/util/config.ts:6` | + +The 1950 MiB limit is deliberately below 2 GiB to avoid TDLib's `FILE_PARTS_INVALID` error. There is **no Premium awareness** — all accounts are treated as non-Premium. + +### 1.6 Search Implementation + +- **No fuzzy search** — uses Prisma's `contains` with `mode: "insensitive"` (translates to PostgreSQL `ILIKE`) +- **No full-text search infrastructure** — no `tsvector`, no GiST/GIN indexes +- **Indexes:** B-tree on `fileName`, `creator`, `archiveType`, `indexedAt`, plus `PackageFile.fileName` and `extension` +- Search works for substring matching but won't match typos or similar names + +### 1.7 Notification Infrastructure + +- **pg_notify channels:** `bot_send`, `new_package` (bot), plus 7 worker channels +- **Bot subscriptions:** pattern-match (case-insensitive substring) on `fileName` and `creator` +- **UI notifications:** Sonner toast (ephemeral only) +- **No persistent notification store** — no database model for notifications +- **No notification UI panel** in the web app +- **No alerts for:** grouping conflicts, hash mismatches, missing parts, upload failures (beyond SkippedPackage table) + +--- + +## Deliverable 2: Revised Grouping Signal Stack + +### Recommended Implementation Plan + +I recommend an **incremental approach** — implement signals in phases, starting with highest-value/lowest-risk. + +### Phase 1: Foundation (Required Before Other Signals) + +#### Signal 9: Manual Override Persistence +**Status:** Partially implemented. Manual groups exist but don't influence future auto-grouping. + +**Implementation:** +- Add `groupingSource` field to `PackageGroup`: `"ALBUM" | "MANUAL" | "AUTO_PATTERN" | "AUTO_TIME" | "AUTO_REPLY" | "AUTO_ZIP" | "AUTO_CAPTION"` +- Manual groups already persist. What's missing is the **training feedback** where a manual grouping teaches the system to auto-group similar future files. +- This requires a `GroupingRule` model (see schema diff below) that stores learned patterns from manual overrides. + +#### Ungrouped Staging Queue +**Implementation:** +- After ingestion, packages without a `packageGroupId` are naturally "ungrouped" +- Add a filter/tab to the STL page: "Ungrouped" showing packages where `packageGroupId IS NULL` +- No schema change needed — just a query filter + +### Phase 2: High-Value Automatic Signals + +#### Signal 1: `mediaAlbumId` (Already Implemented) +No changes needed. This is working correctly. + +#### Signal 2: `message_thread_id` Forum Topic Scoping +**Status:** Already used for scan scoping (worker scans by topic), but not used as a grouping signal. + +**Implementation:** +- `sourceTopicId` is already stored on `Package` (schema line 469) +- Use it as a **scoping constraint** for all other signals: time-window, caption matching, etc. only apply within the same topic +- No additional schema changes needed + +#### Signal 5: Time Window + Sender Grouping +**Implementation:** +- After album grouping, find ungrouped packages from the same source channel + topic +- Within a configurable window (default 5 min), cluster by proximity +- Since we don't have `sender_id` from the source channel (TDLib `searchChatMessages` doesn't return it for channels), this becomes **time-window within topic/channel** +- New config: `AUTO_GROUP_TIME_WINDOW_MINUTES` (default: 5) + +#### Signal 3: Project/Month Pattern Extraction +**Implementation:** +- Extract date patterns from filenames/captions: `YYYY-MM`, `YYYY_MM`, `MonthName Year` +- Extract project slugs: common prefix before separator (e.g., "ProjectName - File1.zip" and "ProjectName - File2.zip") +- Group packages with matching patterns from the same channel +- This should run as a **post-processing pass** after time-window grouping, merging small time-window groups that share a pattern + +#### Signal 4: Creator Grouping +**Implementation:** +- The `creator` field is already extracted from filenames and stored per-package +- Within a channel, if multiple ungrouped packages have the same `creator` and were indexed within the same ingestion run, auto-group them +- Lower priority than time-window (might create overly broad groups) + +### Phase 3: Advanced Signals + +#### Signal 6: Reply Chain +**Implementation:** +- TDLib messages have `reply_to_message_id` but this isn't currently captured during scanning +- Would need to modify `getChannelMessages()` in `download.ts` to extract `reply_to_message_id` +- Then: if message B replies to message A, and both are archives, group them +- **Moderate complexity**, deferred to Phase 3 + +#### Signal 7: ZIP Internal Path Prefix +**Implementation:** +- Already have `PackageFile.path` stored for each file inside an archive +- After indexing, find the common root folder across all files +- If two packages share the same root prefix and same channel, suggest grouping +- This is a **post-hoc analysis** that could run as a background job + +#### Signal 8: Caption Fuzzy Match +**Implementation:** +- Currently captions from source messages are NOT stored (only photo captions for preview matching) +- Would need to capture `msg.content?.caption?.text` during scanning and store on Package +- Then: fuzzy-match captions from nearby messages in same channel +- **Requires schema change + scan modification**, deferred to Phase 3 + +--- + +## Deliverable 3: Schema Diff + +All changes are **additive** — no columns dropped, no types changed. + +```prisma +// ── PackageGroup additions ── +model PackageGroup { + // ... existing fields ... + groupingSource GroupingSource @default(MANUAL) // NEW: how this group was created +} + +// NEW enum +enum GroupingSource { + ALBUM // From Telegram mediaAlbumId + MANUAL // User-created via UI + AUTO_PATTERN // Filename/date pattern matching + AUTO_TIME // Time-window clustering + AUTO_REPLY // Reply chain + AUTO_ZIP // ZIP path prefix + AUTO_CAPTION // Caption fuzzy match +} + +// ── Package additions ── +model Package { + // ... existing fields ... + sourceCaption String? // NEW: caption text from source Telegram message +} + +// ── New model: GroupingRule (training from manual overrides) ── +model GroupingRule { + id String @id @default(cuid()) + sourceChannelId String + pattern String // Regex or glob pattern learned from manual grouping + signalType GroupingSource // Which signal this rule applies to + confidence Float @default(1.0) + createdAt DateTime @default(now()) + createdByGroupId String? // The manual group that spawned this rule + + sourceChannel TelegramChannel @relation(fields: [sourceChannelId], references: [id], onDelete: Cascade) + + @@index([sourceChannelId]) + @@map("grouping_rules") +} + +// ── New model: SystemNotification ── +model SystemNotification { + id String @id @default(cuid()) + type NotificationType + severity NotificationSeverity @default(INFO) + title String + message String + context Json? // Structured data: packageId, groupId, sourceMessageId, etc. + isRead Boolean @default(false) + createdAt DateTime @default(now()) + + @@index([isRead, createdAt]) + @@index([type]) + @@map("system_notifications") +} + +enum NotificationType { + HASH_MISMATCH + MISSING_PART + UPLOAD_FAILED + DOWNLOAD_FAILED + GROUPING_CONFLICT + INTEGRITY_AUDIT +} + +enum NotificationSeverity { + INFO + WARNING + ERROR +} + +// ── Config additions (worker/src/util/config.ts) ── +// maxPartSizeMB: parseInt(process.env.MAX_PART_SIZE_MB ?? "1950", 10) +// autoGroupTimeWindowMinutes: parseInt(process.env.AUTO_GROUP_TIME_WINDOW_MINUTES ?? "5", 10) +// telegramPremium: process.env.TELEGRAM_PREMIUM === "true" +``` + +**Migration notes:** +- All new fields are optional/have defaults — zero-risk to existing data +- `GroupingSource` enum added with `@default(MANUAL)` — existing groups unaffected +- `GroupingRule` and `SystemNotification` are new tables — no impact on existing +- Backfill: set `groupingSource = ALBUM` for groups where `mediaAlbumId IS NOT NULL` + +--- + +## Deliverable 4: Notification Contract + +### Event Shape + +```typescript +interface SystemNotificationEvent { + type: NotificationType; + severity: "INFO" | "WARNING" | "ERROR"; + title: string; + message: string; + context: { + packageId?: string; + groupId?: string; + sourceChannelId?: string; + sourceMessageId?: bigint; + fileName?: string; + partNumber?: number; + totalParts?: number; + expectedHash?: string; + actualHash?: string; + reason?: string; + }; +} +``` + +### Where Notifications Fire + +| Event | Where | Trigger | +|-------|-------|---------| +| `HASH_MISMATCH` | `worker/src/worker.ts` after split | SHA-256 of concatenated split parts != original hash | +| `MISSING_PART` | Periodic audit job (new) | Group has `partCount > 1` but fewer than `partCount` dest messages exist | +| `UPLOAD_FAILED` | `worker/src/worker.ts` catch block | Upload fails after all retries exhausted | +| `DOWNLOAD_FAILED` | `worker/src/worker.ts` catch block | Download fails after all retries | +| `GROUPING_CONFLICT` | Auto-grouping pass (new) | Two signals suggest different groups for the same package | +| `INTEGRITY_AUDIT` | Periodic job (new) | Scheduled check finds inconsistencies | + +### Delivery + +1. **Database:** Always persisted to `SystemNotification` table +2. **pg_notify:** `SELECT pg_notify('system_notification', jsonPayload)` for real-time +3. **Web UI:** Notification bell/panel that polls or listens for new notifications +4. **Telegram (optional):** Forward critical notifications to admin via bot + +--- + +## Deliverable 5: Feature Flag Plan + +### Runtime Configuration (Environment Variables) + +| Flag | Type | Default | Purpose | +|------|------|---------|---------| +| `TELEGRAM_PREMIUM` | boolean | `false` | Enable 4GB upload limit | +| `MAX_PART_SIZE_MB` | number | `1950` | Split threshold in MiB (overrides hardcoded value) | +| `AUTO_GROUP_ENABLED` | boolean | `false` | Enable automatic grouping beyond album | +| `AUTO_GROUP_TIME_WINDOW_MINUTES` | number | `5` | Time-window clustering threshold | +| `AUTO_GROUP_PATTERN_ENABLED` | boolean | `false` | Enable filename/date pattern grouping | +| `INTEGRITY_AUDIT_ENABLED` | boolean | `false` | Enable periodic integrity audit | +| `INTEGRITY_AUDIT_INTERVAL_HOURS` | number | `24` | How often to run the audit | + +### Premium Mode Behavior + +When `TELEGRAM_PREMIUM=true`: +1. `MAX_PART_SIZE_MB` defaults to `3900` (safely under 4 GiB) instead of `1950` +2. Files under 4 GB: uploaded as-is (no splitting) +3. Files over 4 GB: split using existing `byteLevelSplit()` at the new threshold +4. Existing split/rejoin logic is **kept as fallback** — never removed +5. `isMultipart` and `partCount` continue to track actual upload state + +### Implementation in `split.ts`: + +```typescript +// Replace hardcoded constant with config-driven: +const MAX_PART_SIZE = BigInt(config.maxPartSizeMB) * 1024n * 1024n; +``` + +And in `config.ts`: +```typescript +maxPartSizeMB: parseInt( + process.env.MAX_PART_SIZE_MB ?? + (process.env.TELEGRAM_PREMIUM === "true" ? "3900" : "1950"), + 10 +), +``` + +### Rollout Strategy + +1. **All flags default to off** — zero behavior change on deploy +2. Enable `TELEGRAM_PREMIUM` first (simple, well-understood) +3. Enable `AUTO_GROUP_ENABLED` on a **per-channel basis** (see test plan) before globally +4. Enable `INTEGRITY_AUDIT_ENABLED` after manual validation +5. Pattern-based grouping enabled last (highest complexity) + +--- + +## Deliverable 6: Test Plan + +### Phase 0: Pre-Implementation Validation + +Before touching any code, verify the current system baseline: + +1. **Pick one test channel** with known content (a mix of albums, single files, and multipart archives) +2. Run an ingestion cycle and record: number of packages, groups, skipped +3. Verify all album-based groups are correct +4. Note any ungrouped files that "should" be grouped +5. This becomes the **regression baseline** + +### Phase 1: Premium Mode Testing + +1. Set `TELEGRAM_PREMIUM=true` and `MAX_PART_SIZE_MB=3900` +2. Manually upload a 3 GB test file to a source channel +3. Trigger ingestion — verify it uploads as a single message (not split) +4. Manually upload a 5 GB test file +5. Trigger ingestion — verify it splits at ~3.9 GB threshold +6. Verify `isMultipart`, `partCount`, `destMessageIds` are correct +7. Send the package via bot — verify all parts arrive + +### Phase 2: Time-Window Grouping Testing + +1. Enable `AUTO_GROUP_ENABLED=true` on the test channel only +2. Post 3 files to the channel within 2 minutes (no album) +3. Trigger ingestion — verify they auto-group +4. Post 2 files 10 minutes apart +5. Trigger ingestion — verify they stay ungrouped +6. Manually group them — verify `GroupingRule` is created +7. Post similar files — verify auto-grouping kicks in + +### Phase 3: Manual QA via API + +Add a **test endpoint** (dev-only) that accepts a fake message payload and runs it through the grouping pipeline without hitting Telegram: + +``` +POST /api/dev/test-grouping +Body: { messages: [...], channelId: "..." } +Response: { suggestedGroups: [...] } +``` + +This allows testing grouping logic against crafted scenarios without waiting for real Telegram messages. + +### Phase 4: Integrity Audit Testing + +1. Enable `INTEGRITY_AUDIT_ENABLED=true` +2. Manually corrupt a record (set wrong `contentHash` in DB) +3. Run audit — verify `HASH_MISMATCH` notification is created +4. Delete one `destMessageId` from a multipart package's `destMessageIds` +5. Run audit — verify `MISSING_PART` notification is created +6. Check notification UI shows both + +### Regression Checks After Each Phase + +- Re-run ingestion on test channel — same number of packages/groups as baseline +- Search for known filenames — still returns correct results +- Send a package via bot — still delivers correctly +- Album groups unchanged +- Manual groups unchanged diff --git a/docs/superpowers/plans/2026-03-30-grouping-phase1-implementation.md b/docs/superpowers/plans/2026-03-30-grouping-phase1-implementation.md new file mode 100644 index 0000000..e0bf89c --- /dev/null +++ b/docs/superpowers/plans/2026-03-30-grouping-phase1-implementation.md @@ -0,0 +1,67 @@ +# Grouping Phase 1: Foundation + Time-Window Grouping + +> **For agentic workers:** Use superpowers:subagent-driven-development to implement this plan. + +**Goal:** Add grouping infrastructure (schema, enums, notifications model), an ungrouped staging queue in the UI, and time-window auto-grouping as the first automatic signal beyond album grouping. + +**Architecture:** Schema changes lay the foundation. Ungrouped tab is a query filter. Time-window grouping runs as a post-processing pass after album grouping in the worker pipeline. + +**Tech Stack:** Prisma schema + migration, worker TypeScript, Next.js App Router. + +--- + +## Task 1: Schema Migration + +**Files:** +- Modify: `prisma/schema.prisma` +- Create: migration SQL + +Add: +1. `GroupingSource` enum: `ALBUM`, `MANUAL`, `AUTO_TIME`, `AUTO_PATTERN`, `AUTO_REPLY`, `AUTO_ZIP`, `AUTO_CAPTION` +2. `groupingSource GroupingSource @default(MANUAL)` on `PackageGroup` +3. `SystemNotification` model with `type`, `severity`, `title`, `message`, `context` (Json), `isRead` +4. `NotificationType` enum: `HASH_MISMATCH`, `MISSING_PART`, `UPLOAD_FAILED`, `DOWNLOAD_FAILED`, `GROUPING_CONFLICT`, `INTEGRITY_AUDIT` +5. `NotificationSeverity` enum: `INFO`, `WARNING`, `ERROR` + +Backfill: `UPDATE package_groups SET "groupingSource" = 'ALBUM' WHERE "mediaAlbumId" IS NOT NULL` + +--- + +## Task 2: Ungrouped Staging Tab in STL Page + +**Files:** +- Modify: `src/lib/telegram/queries.ts` — add `listUngroupedPackages()` query +- Modify: `src/app/(app)/stls/page.tsx` — add tab parameter support +- Modify: `src/app/(app)/stls/_components/stl-table.tsx` — add "Ungrouped" tab + +Add a tab next to the existing "Skipped" tab that shows packages where `packageGroupId IS NULL`. Uses the existing `PackageListItem` type and table rendering. This gives users a clear view of files that need manual grouping. + +--- + +## Task 3: Time-Window Auto-Grouping in Worker + +**Files:** +- Create: `worker/src/grouping.ts` — add `processTimeWindowGroups()` after existing `processAlbumGroups()` +- Modify: `worker/src/worker.ts` — call time-window grouping after album grouping +- Modify: `worker/src/util/config.ts` — add `autoGroupTimeWindowMinutes` config + +After album grouping completes, find remaining ungrouped packages from the same channel scan. Cluster packages whose `sourceMessageId` timestamps are within the configured window (default 5 minutes). Create groups for clusters of 2+ with `groupingSource = AUTO_TIME` and name derived from the common filename prefix or first file's base name. + +--- + +## Task 4: Hash Verification After Split + +**Files:** +- Modify: `worker/src/worker.ts` — add hash re-check after concat+split +- Modify: `worker/src/archive/hash.ts` — (no changes needed, reuse `hashParts`) + +After `concatenateFiles()` + `byteLevelSplit()`, re-hash the split parts and compare to the original `contentHash`. If mismatch, log error and create a `SystemNotification` (once that table exists). This closes the integrity gap identified in the audit. + +--- + +## Task 5: Build & Deploy + +Rebuild worker and app images. Deploy. Verify: +- Worker logs show `maxPartSizeMB` and new `autoGroupTimeWindowMinutes` in config +- Ungrouped tab visible in STL page +- Previously-skipped large archives begin processing diff --git a/prisma/migrations/20260326120000_add_dest_message_ids/migration.sql b/prisma/migrations/20260326120000_add_dest_message_ids/migration.sql new file mode 100644 index 0000000..e8197bb --- /dev/null +++ b/prisma/migrations/20260326120000_add_dest_message_ids/migration.sql @@ -0,0 +1,7 @@ +-- AlterTable +ALTER TABLE "packages" ADD COLUMN "destMessageIds" BIGINT[] DEFAULT ARRAY[]::BIGINT[]; + +-- Backfill: copy existing destMessageId into the array +UPDATE "packages" +SET "destMessageIds" = ARRAY["destMessageId"] +WHERE "destMessageId" IS NOT NULL; diff --git a/prisma/migrations/20260330120000_grouping_and_notifications/migration.sql b/prisma/migrations/20260330120000_grouping_and_notifications/migration.sql new file mode 100644 index 0000000..4e868b8 --- /dev/null +++ b/prisma/migrations/20260330120000_grouping_and_notifications/migration.sql @@ -0,0 +1,32 @@ +-- CreateEnum GroupingSource +CREATE TYPE "GroupingSource" AS ENUM ('ALBUM', 'MANUAL', 'AUTO_TIME', 'AUTO_PATTERN', 'AUTO_REPLY', 'AUTO_ZIP', 'AUTO_CAPTION'); + +-- CreateEnum NotificationType +CREATE TYPE "NotificationType" AS ENUM ('HASH_MISMATCH', 'MISSING_PART', 'UPLOAD_FAILED', 'DOWNLOAD_FAILED', 'GROUPING_CONFLICT', 'INTEGRITY_AUDIT'); + +-- CreateEnum NotificationSeverity +CREATE TYPE "NotificationSeverity" AS ENUM ('INFO', 'WARNING', 'ERROR'); + +-- AlterTable: add groupingSource to package_groups +ALTER TABLE "package_groups" ADD COLUMN "groupingSource" "GroupingSource" NOT NULL DEFAULT 'MANUAL'; + +-- Backfill: mark album-based groups +UPDATE "package_groups" SET "groupingSource" = 'ALBUM' WHERE "mediaAlbumId" IS NOT NULL; + +-- CreateTable: system_notifications +CREATE TABLE "system_notifications" ( + "id" TEXT NOT NULL, + "type" "NotificationType" NOT NULL, + "severity" "NotificationSeverity" NOT NULL DEFAULT 'INFO', + "title" TEXT NOT NULL, + "message" TEXT NOT NULL, + "context" JSONB, + "isRead" BOOLEAN NOT NULL DEFAULT false, + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + + CONSTRAINT "system_notifications_pkey" PRIMARY KEY ("id") +); + +-- CreateIndex +CREATE INDEX "system_notifications_isRead_createdAt_idx" ON "system_notifications"("isRead", "createdAt"); +CREATE INDEX "system_notifications_type_idx" ON "system_notifications"("type"); diff --git a/prisma/migrations/20260330130000_add_caption_and_reply_to/migration.sql b/prisma/migrations/20260330130000_add_caption_and_reply_to/migration.sql new file mode 100644 index 0000000..63bcbd4 --- /dev/null +++ b/prisma/migrations/20260330130000_add_caption_and_reply_to/migration.sql @@ -0,0 +1,3 @@ +-- AlterTable: add sourceCaption and replyToMessageId to packages +ALTER TABLE "packages" ADD COLUMN "sourceCaption" TEXT; +ALTER TABLE "packages" ADD COLUMN "replyToMessageId" BIGINT; diff --git a/prisma/migrations/20260330140000_grouping_rules_and_flags/migration.sql b/prisma/migrations/20260330140000_grouping_rules_and_flags/migration.sql new file mode 100644 index 0000000..976b97e --- /dev/null +++ b/prisma/migrations/20260330140000_grouping_rules_and_flags/migration.sql @@ -0,0 +1,47 @@ +-- AlterTable: add autoGroupEnabled to telegram_channels +ALTER TABLE "telegram_channels" ADD COLUMN "autoGroupEnabled" BOOLEAN NOT NULL DEFAULT true; + +-- CreateTable: grouping_rules +CREATE TABLE "grouping_rules" ( + "id" TEXT NOT NULL, + "sourceChannelId" TEXT NOT NULL, + "pattern" TEXT NOT NULL, + "signalType" "GroupingSource" NOT NULL, + "confidence" DOUBLE PRECISION NOT NULL DEFAULT 1.0, + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "createdByGroupId" TEXT, + + CONSTRAINT "grouping_rules_pkey" PRIMARY KEY ("id") +); + +-- CreateIndex +CREATE INDEX "grouping_rules_sourceChannelId_idx" ON "grouping_rules"("sourceChannelId"); + +-- AddForeignKey +ALTER TABLE "grouping_rules" ADD CONSTRAINT "grouping_rules_sourceChannelId_fkey" FOREIGN KEY ("sourceChannelId") REFERENCES "telegram_channels"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- Full-text search: add tsvector column and GIN index +ALTER TABLE "packages" ADD COLUMN IF NOT EXISTS "searchVector" tsvector; + +UPDATE "packages" SET "searchVector" = to_tsvector('english', + coalesce("fileName", '') || ' ' || coalesce("creator", '') || ' ' || coalesce("sourceCaption", '') +) WHERE "searchVector" IS NULL; + +CREATE INDEX IF NOT EXISTS "packages_search_vector_idx" ON "packages" USING GIN ("searchVector"); + +-- Trigger to auto-update searchVector on insert/update +CREATE OR REPLACE FUNCTION packages_search_vector_update() RETURNS trigger AS $$ +BEGIN + NEW."searchVector" := to_tsvector('english', + coalesce(NEW."fileName", '') || ' ' || coalesce(NEW."creator", '') || ' ' || coalesce(NEW."sourceCaption", '') + ); + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +DROP TRIGGER IF EXISTS packages_search_vector_trigger ON "packages"; +CREATE TRIGGER packages_search_vector_trigger + BEFORE INSERT OR UPDATE OF "fileName", "creator", "sourceCaption" + ON "packages" + FOR EACH ROW + EXECUTE FUNCTION packages_search_vector_update(); diff --git a/prisma/migrations/20260330150000_manual_uploads/migration.sql b/prisma/migrations/20260330150000_manual_uploads/migration.sql new file mode 100644 index 0000000..593befa --- /dev/null +++ b/prisma/migrations/20260330150000_manual_uploads/migration.sql @@ -0,0 +1,30 @@ +-- CreateEnum +CREATE TYPE "ManualUploadStatus" AS ENUM ('PENDING', 'PROCESSING', 'COMPLETED', 'FAILED'); + +-- CreateTable +CREATE TABLE "manual_uploads" ( + "id" TEXT NOT NULL, + "status" "ManualUploadStatus" NOT NULL DEFAULT 'PENDING', + "groupName" TEXT, + "userId" TEXT NOT NULL, + "errorMessage" TEXT, + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "completedAt" TIMESTAMP(3), + CONSTRAINT "manual_uploads_pkey" PRIMARY KEY ("id") +); + +CREATE TABLE "manual_upload_files" ( + "id" TEXT NOT NULL, + "uploadId" TEXT NOT NULL, + "fileName" TEXT NOT NULL, + "filePath" TEXT NOT NULL, + "fileSize" BIGINT NOT NULL, + "packageId" TEXT, + CONSTRAINT "manual_upload_files_pkey" PRIMARY KEY ("id") +); + +CREATE INDEX "manual_uploads_status_idx" ON "manual_uploads"("status"); +CREATE INDEX "manual_upload_files_uploadId_idx" ON "manual_upload_files"("uploadId"); + +ALTER TABLE "manual_uploads" ADD CONSTRAINT "manual_uploads_userId_fkey" FOREIGN KEY ("userId") REFERENCES "User"("id") ON DELETE RESTRICT ON UPDATE CASCADE; +ALTER TABLE "manual_upload_files" ADD CONSTRAINT "manual_upload_files_uploadId_fkey" FOREIGN KEY ("uploadId") REFERENCES "manual_uploads"("id") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/prisma/schema.prisma b/prisma/schema.prisma index fb6a8b2..95f5580 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -39,9 +39,10 @@ model User { settings UserSettings? telegramLink TelegramLink? kickstarters Kickstarter[] - inviteCodes InviteCode[] @relation("InviteCreator") - usedInvite InviteCode? @relation("InviteUser", fields: [usedInviteId], references: [id], onDelete: SetNull) - usedInviteId String? + inviteCodes InviteCode[] @relation("InviteCreator") + usedInvite InviteCode? @relation("InviteUser", fields: [usedInviteId], references: [id], onDelete: SetNull) + usedInviteId String? + manualUploads ManualUpload[] } model Account { @@ -429,10 +430,13 @@ model TelegramChannel { createdAt DateTime @default(now()) updatedAt DateTime @updatedAt + autoGroupEnabled Boolean @default(true) + accountMaps AccountChannelMap[] packages Package[] skippedPackages SkippedPackage[] packageGroups PackageGroup[] + groupingRules GroupingRule[] @@index([type, isActive]) @@index([category]) @@ -469,10 +473,13 @@ model Package { sourceTopicId BigInt? destChannelId String? destMessageId BigInt? + destMessageIds BigInt[] @default([]) isMultipart Boolean @default(false) partCount Int @default(1) fileCount Int @default(0) tags String[] @default([]) + sourceCaption String? // Caption text from source Telegram message + replyToMessageId BigInt? // reply_to_message_id from source message (for reply chain grouping) previewData Bytes? // JPEG thumbnail from nearby Telegram photo (stored as raw bytes) previewMsgId BigInt? // Telegram message ID of the matched photo packageGroupId String? @@ -521,6 +528,7 @@ model PackageGroup { name String mediaAlbumId String? sourceChannelId String + groupingSource GroupingSource @default(MANUAL) previewData Bytes? createdAt DateTime @default(now()) updatedAt DateTime @updatedAt @@ -801,3 +809,97 @@ model KickstarterPackage { @@id([kickstarterId, packageId]) @@map("kickstarter_packages") } + +// ── Grouping & Notifications ── + +enum GroupingSource { + ALBUM + MANUAL + AUTO_TIME + AUTO_PATTERN + AUTO_REPLY + AUTO_ZIP + AUTO_CAPTION +} + +enum NotificationType { + HASH_MISMATCH + MISSING_PART + UPLOAD_FAILED + DOWNLOAD_FAILED + GROUPING_CONFLICT + INTEGRITY_AUDIT +} + +enum NotificationSeverity { + INFO + WARNING + ERROR +} + +model SystemNotification { + id String @id @default(cuid()) + type NotificationType + severity NotificationSeverity @default(INFO) + title String + message String + context Json? + isRead Boolean @default(false) + createdAt DateTime @default(now()) + + @@index([isRead, createdAt]) + @@index([type]) + @@map("system_notifications") +} + +model GroupingRule { + id String @id @default(cuid()) + sourceChannelId String + pattern String // Regex or keyword pattern learned from manual grouping + signalType GroupingSource // Which grouping signal this rule applies to + confidence Float @default(1.0) + createdAt DateTime @default(now()) + createdByGroupId String? // The manual group that spawned this rule + + sourceChannel TelegramChannel @relation(fields: [sourceChannelId], references: [id], onDelete: Cascade) + + @@index([sourceChannelId]) + @@map("grouping_rules") +} + +enum ManualUploadStatus { + PENDING + PROCESSING + COMPLETED + FAILED +} + +model ManualUpload { + id String @id @default(cuid()) + status ManualUploadStatus @default(PENDING) + groupName String? // Group name if multiple files + userId String + errorMessage String? + createdAt DateTime @default(now()) + completedAt DateTime? + + files ManualUploadFile[] + user User @relation(fields: [userId], references: [id]) + + @@index([status]) + @@map("manual_uploads") +} + +model ManualUploadFile { + id String @id @default(cuid()) + uploadId String + fileName String + filePath String // Path on shared volume + fileSize BigInt + packageId String? // Set after processing + + upload ManualUpload @relation(fields: [uploadId], references: [id], onDelete: Cascade) + + @@index([uploadId]) + @@map("manual_upload_files") +} diff --git a/src/app/(app)/kickstarters/_components/kickstarter-columns.tsx b/src/app/(app)/kickstarters/_components/kickstarter-columns.tsx index 6c59dae..eb673f8 100644 --- a/src/app/(app)/kickstarters/_components/kickstarter-columns.tsx +++ b/src/app/(app)/kickstarters/_components/kickstarter-columns.tsx @@ -1,7 +1,7 @@ "use client"; import { type ColumnDef } from "@tanstack/react-table"; -import { MoreHorizontal, Pencil, Trash2, ExternalLink } from "lucide-react"; +import { MoreHorizontal, Pencil, Trash2, ExternalLink, Link2, Send } from "lucide-react"; import { DataTableColumnHeader } from "@/components/shared/data-table-column-header"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; @@ -32,6 +32,8 @@ export interface KickstarterRow { interface KickstarterColumnsProps { onEdit: (kickstarter: KickstarterRow) => void; onDelete: (id: string) => void; + onLinkPackages: (kickstarter: KickstarterRow) => void; + onSendAll: (kickstarter: KickstarterRow) => void; } const deliveryConfig: Record = { @@ -63,6 +65,8 @@ const paymentConfig: Record = { export function getKickstarterColumns({ onEdit, onDelete, + onLinkPackages, + onSendAll, }: KickstarterColumnsProps): ColumnDef[] { return [ { @@ -170,6 +174,16 @@ export function getKickstarterColumns({ Edit + onLinkPackages(row.original)}> + + Link Packages + + {row.original._count.packages > 0 && ( + onSendAll(row.original)}> + + Send All ({row.original._count.packages}) + + )} onDelete(row.original.id)} diff --git a/src/app/(app)/kickstarters/_components/kickstarter-table.tsx b/src/app/(app)/kickstarters/_components/kickstarter-table.tsx index 7d0ceaf..efad3bc 100644 --- a/src/app/(app)/kickstarters/_components/kickstarter-table.tsx +++ b/src/app/(app)/kickstarters/_components/kickstarter-table.tsx @@ -7,7 +7,8 @@ import { toast } from "sonner"; import { useDataTable } from "@/hooks/use-data-table"; import { getKickstarterColumns, type KickstarterRow } from "./kickstarter-columns"; import { KickstarterModal } from "./kickstarter-modal"; -import { deleteKickstarter } from "../actions"; +import { PackageLinkerDialog } from "./package-linker-dialog"; +import { deleteKickstarter, sendAllKickstarterPackages } from "../actions"; import { DataTable } from "@/components/shared/data-table"; import { DataTablePagination } from "@/components/shared/data-table-pagination"; import { DataTableViewOptions } from "@/components/shared/data-table-view-options"; @@ -50,6 +51,7 @@ export function KickstarterTable({ const [modalOpen, setModalOpen] = useState(false); const [editKickstarter, setEditKickstarter] = useState(); const [deleteId, setDeleteId] = useState(null); + const [linkTarget, setLinkTarget] = useState(null); const [searchValue, setSearchValue] = useState(searchParams.get("search") ?? ""); @@ -88,6 +90,17 @@ export function KickstarterTable({ setModalOpen(true); }, onDelete: (id) => setDeleteId(id), + onLinkPackages: (kickstarter) => setLinkTarget(kickstarter), + onSendAll: (kickstarter) => { + startTransition(async () => { + const result = await sendAllKickstarterPackages(kickstarter.id); + if (result.success) { + toast.success(`Queued ${result.data!.queued} package(s) for delivery`); + } else { + toast.error(result.error); + } + }); + }, }); const { table } = useDataTable({ data, columns, pageCount }); @@ -188,6 +201,15 @@ export function KickstarterTable({ onConfirm={handleDelete} isLoading={isPending} /> + + {linkTarget && ( + !open && setLinkTarget(null)} + kickstarterId={linkTarget.id} + kickstarterName={linkTarget.name} + /> + )} ); } diff --git a/src/app/(app)/kickstarters/_components/package-linker-dialog.tsx b/src/app/(app)/kickstarters/_components/package-linker-dialog.tsx new file mode 100644 index 0000000..7704789 --- /dev/null +++ b/src/app/(app)/kickstarters/_components/package-linker-dialog.tsx @@ -0,0 +1,211 @@ +"use client"; + +import { useState, useTransition, useCallback, useEffect } from "react"; +import { Search, Package, X, Loader2 } from "lucide-react"; +import { toast } from "sonner"; +import { linkPackages } from "../actions"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Badge } from "@/components/ui/badge"; +import { Checkbox } from "@/components/ui/checkbox"; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, +} from "@/components/ui/dialog"; +import { ScrollArea } from "@/components/ui/scroll-area"; + +interface PackageResult { + id: string; + fileName: string; + fileSize: string; + archiveType: string; + creator: string | null; + fileCount: number; +} + +interface PackageLinkerDialogProps { + open: boolean; + onOpenChange: (open: boolean) => void; + kickstarterId: string; + kickstarterName: string; +} + +function formatSize(bytes: string | number): string { + const b = Number(bytes); + if (b >= 1024 * 1024 * 1024) return `${(b / (1024 * 1024 * 1024)).toFixed(1)} GB`; + if (b >= 1024 * 1024) return `${(b / (1024 * 1024)).toFixed(0)} MB`; + return `${(b / 1024).toFixed(0)} KB`; +} + +export function PackageLinkerDialog({ + open, + onOpenChange, + kickstarterId, + kickstarterName, +}: PackageLinkerDialogProps) { + const [isPending, startTransition] = useTransition(); + const [searchQuery, setSearchQuery] = useState(""); + const [searchResults, setSearchResults] = useState([]); + const [isSearching, setIsSearching] = useState(false); + const [selectedIds, setSelectedIds] = useState>(new Set()); + + // Fetch currently linked packages when dialog opens + useEffect(() => { + if (open) { + setSearchQuery(""); + setSearchResults([]); + fetch(`/api/packages/linked?kickstarterId=${kickstarterId}`) + .then((res) => res.json()) + .then((data) => { + if (data.packageIds) { + setSelectedIds(new Set(data.packageIds)); + } + }) + .catch(() => {}); + } + }, [open, kickstarterId]); + + const doSearch = useCallback(async (query: string) => { + if (query.length < 2) { + setSearchResults([]); + return; + } + setIsSearching(true); + try { + const res = await fetch(`/api/packages/search?q=${encodeURIComponent(query)}&limit=20`); + if (res.ok) { + const data = await res.json(); + setSearchResults(data.packages ?? []); + } + } catch { + // Ignore search errors + } finally { + setIsSearching(false); + } + }, []); + + // Debounced search + useEffect(() => { + const timer = setTimeout(() => doSearch(searchQuery), 300); + return () => clearTimeout(timer); + }, [searchQuery, doSearch]); + + function togglePackage(id: string) { + setSelectedIds((prev) => { + const next = new Set(prev); + if (next.has(id)) next.delete(id); + else next.add(id); + return next; + }); + } + + function handleSave() { + startTransition(async () => { + const result = await linkPackages(kickstarterId, Array.from(selectedIds)); + if (result.success) { + toast.success(`Linked ${selectedIds.size} package(s) to "${kickstarterName}"`); + onOpenChange(false); + } else { + toast.error(result.error); + } + }); + } + + return ( + + + + Link Packages + + Search and select STL packages to link to “{kickstarterName}”. + + + +
+ {selectedIds.size > 0 && ( +
+ + {selectedIds.size} package(s) selected + +
+ )} + +
+ + setSearchQuery(e.target.value)} + className="pl-9" + autoFocus + /> + {isSearching && ( + + )} +
+ + +
+ {searchResults.length === 0 && searchQuery.length >= 2 && !isSearching && ( +

+ No packages found +

+ )} + {searchQuery.length < 2 && ( +

+ Type at least 2 characters to search +

+ )} + {searchResults.map((pkg) => ( + + ))} +
+
+
+ + + + + +
+
+ ); +} diff --git a/src/app/(app)/kickstarters/actions.ts b/src/app/(app)/kickstarters/actions.ts index 1554341..8762ea6 100644 --- a/src/app/(app)/kickstarters/actions.ts +++ b/src/app/(app)/kickstarters/actions.ts @@ -146,3 +146,83 @@ export async function linkPackages( return { success: false, error: "Failed to link packages" }; } } + +export async function sendAllKickstarterPackages( + kickstarterId: string +): Promise> { + const session = await auth(); + if (!session?.user?.id) return { success: false, error: "Unauthorized" }; + + try { + const telegramLink = await prisma.telegramLink.findUnique({ + where: { userId: session.user.id }, + }); + + if (!telegramLink) { + return { success: false, error: "No linked Telegram account. Link one in Settings." }; + } + + const kickstarter = await prisma.kickstarter.findFirst({ + where: { id: kickstarterId, userId: session.user.id }, + select: { + packages: { + select: { + package: { + select: { id: true, destChannelId: true, destMessageId: true, fileName: true }, + }, + }, + }, + }, + }); + + if (!kickstarter) { + return { success: false, error: "Kickstarter not found" }; + } + + const sendablePackages = kickstarter.packages + .map((lnk) => lnk.package) + .filter((p) => p.destChannelId && p.destMessageId); + + if (sendablePackages.length === 0) { + return { success: false, error: "No linked packages are available for sending" }; + } + + let queued = 0; + for (const pkg of sendablePackages) { + const existing = await prisma.botSendRequest.findFirst({ + where: { + packageId: pkg.id, + telegramLinkId: telegramLink.id, + status: { in: ["PENDING", "SENDING"] }, + }, + }); + + if (!existing) { + const sendRequest = await prisma.botSendRequest.create({ + data: { + packageId: pkg.id, + telegramLinkId: telegramLink.id, + requestedByUserId: session.user.id, + status: "PENDING", + }, + }); + + try { + await prisma.$queryRawUnsafe( + `SELECT pg_notify('bot_send', $1)`, + sendRequest.id + ); + } catch { + // Best-effort + } + + queued++; + } + } + + revalidatePath(REVALIDATE_PATH); + return { success: true, data: { queued } }; + } catch { + return { success: false, error: "Failed to send packages" }; + } +} diff --git a/src/app/(app)/stls/_components/package-columns.tsx b/src/app/(app)/stls/_components/package-columns.tsx index f51c45d..c0385fa 100644 --- a/src/app/(app)/stls/_components/package-columns.tsx +++ b/src/app/(app)/stls/_components/package-columns.tsx @@ -1,7 +1,7 @@ "use client"; import { type ColumnDef } from "@tanstack/react-table"; -import { FileArchive, Eye, ChevronRight, Layers, Ungroup, Send, ImagePlus } from "lucide-react"; +import { FileArchive, Eye, ChevronRight, Layers, Ungroup, Send, ImagePlus, GitMerge } from "lucide-react"; import { DataTableColumnHeader } from "@/components/shared/data-table-column-header"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; @@ -69,6 +69,9 @@ interface PackageColumnsProps { onGroupPreviewUpload: (groupId: string) => void; selectedPackages: Set; onToggleSelect: (packageId: string) => void; + mergeSourceId: string | null; + onStartMerge: (groupId: string) => void; + onCompleteMerge: (targetGroupId: string) => void; } export function formatBytes(bytesStr: string): string { @@ -148,6 +151,9 @@ export function getPackageColumns({ onGroupPreviewUpload, selectedPackages, onToggleSelect, + mergeSourceId, + onStartMerge, + onCompleteMerge, }: PackageColumnsProps): ColumnDef[] { return [ { @@ -392,6 +398,8 @@ export function getPackageColumns({ cell: ({ row }) => { const data = row.original; if (isGroupRow(data)) { + const isMergeSource = mergeSourceId === data.id; + const canMergeHere = mergeSourceId !== null && mergeSourceId !== data.id; return (
+ + {canMergeHere && ( + + )} {selectedPackages.size >= 2 && ( +
+ ))} + + )} + + {files.length > 1 && ( +
+ + setGroupName(e.target.value)} + placeholder="Auto-generated from filenames" + className="mt-1" + /> +
+ )} + + )} + + {(status === "uploading" || status === "processing") && ( +
+ +
+

+ {status === "uploading" ? "Uploading files..." : "Processing & uploading to Telegram..."} +

+

+ {status === "uploading" + ? "Sending files to server" + : "Hashing, extracting metadata, uploading to destination channel"} +

+
+
+ )} + + {status === "done" && ( +
+ +
+

Upload complete!

+

Files have been indexed and uploaded to Telegram.

+
+
+ )} + + {status === "error" && ( +
+ +
+

Upload failed

+

{error}

+
+
+ )} + + + {status === "idle" && ( + <> + + + + )} + {(status === "done" || status === "error") && ( + + )} + + + + ); +} diff --git a/src/app/(app)/stls/actions.ts b/src/app/(app)/stls/actions.ts index ade3b0e..d58a074 100644 --- a/src/app/(app)/stls/actions.ts +++ b/src/app/(app)/stls/actions.ts @@ -10,6 +10,7 @@ import { createManualGroup, removePackageFromGroup, dissolveGroup, + mergeGroups, } from "@/lib/telegram/queries"; const ALLOWED_IMAGE_TYPES = [ @@ -185,6 +186,62 @@ export async function setPreviewFromExtract( } } +export async function repairPackageAction( + packageId: string +): Promise { + const session = await auth(); + if (!session?.user?.id) return { success: false, error: "Unauthorized" }; + + try { + const pkg = await prisma.package.findUnique({ + where: { id: packageId }, + select: { + id: true, + fileName: true, + sourceChannelId: true, + sourceMessageId: true, + destChannelId: true, + destMessageId: true, + }, + }); + + if (!pkg) return { success: false, error: "Package not found" }; + + // Clear the destination info so the worker re-processes it + await prisma.package.update({ + where: { id: packageId }, + data: { + destMessageId: null, + destMessageIds: [], + destChannelId: null, + }, + }); + + // Reset the channel watermark to before this message so worker picks it up + await prisma.accountChannelMap.updateMany({ + where: { + channelId: pkg.sourceChannelId, + lastProcessedMessageId: { gte: pkg.sourceMessageId }, + }, + data: { lastProcessedMessageId: pkg.sourceMessageId - BigInt(1) }, + }); + + // Mark related notifications as read + await prisma.systemNotification.updateMany({ + where: { + context: { path: ["packageId"], equals: packageId }, + isRead: false, + }, + data: { isRead: true }, + }); + + revalidatePath("/stls"); + return { success: true, data: undefined }; + } catch { + return { success: false, error: "Failed to schedule repair" }; + } +} + export async function retrySkippedPackageAction( id: string ): Promise { @@ -435,6 +492,26 @@ export async function updateGroupPreviewAction( } } +export async function mergeGroupsAction( + targetGroupId: string, + sourceGroupId: string +): Promise { + const session = await auth(); + if (!session?.user?.id) return { success: false, error: "Unauthorized" }; + + if (targetGroupId === sourceGroupId) { + return { success: false, error: "Cannot merge a group with itself" }; + } + + try { + await mergeGroups(targetGroupId, sourceGroupId); + revalidatePath("/stls"); + return { success: true, data: undefined }; + } catch { + return { success: false, error: "Failed to merge groups" }; + } +} + export async function sendAllInGroupAction( groupId: string ): Promise { diff --git a/src/app/(app)/stls/page.tsx b/src/app/(app)/stls/page.tsx index 87cb02f..69962f4 100644 --- a/src/app/(app)/stls/page.tsx +++ b/src/app/(app)/stls/page.tsx @@ -1,6 +1,6 @@ import { auth } from "@/lib/auth"; import { redirect } from "next/navigation"; -import { listDisplayItems, searchPackages, getIngestionStatus, getAllPackageTags, listSkippedPackages, countSkippedPackages } from "@/lib/telegram/queries"; +import { listDisplayItems, searchPackages, getIngestionStatus, getAllPackageTags, listSkippedPackages, countSkippedPackages, listUngroupedPackages, countUngroupedPackages } from "@/lib/telegram/queries"; import { StlTable } from "./_components/stl-table"; import type { DisplayItem, PackageListItem } from "@/lib/telegram/types"; @@ -24,7 +24,7 @@ export default async function StlFilesPage({ searchParams }: Props) { const tab = (params.tab as string) ?? "packages"; // Fetch packages, ingestion status, tags, and skipped count in parallel - const [result, ingestionStatus, availableTags, skippedCount] = await Promise.all([ + const [result, ingestionStatus, availableTags, skippedCount, ungroupedCount] = await Promise.all([ search ? searchPackages({ query: search, @@ -43,6 +43,7 @@ export default async function StlFilesPage({ searchParams }: Props) { getIngestionStatus(), getAllPackageTags(), countSkippedPackages(), + countUngroupedPackages(), ]); // For search results, wrap as DisplayItem[]; for non-search, already DisplayItem[] @@ -55,6 +56,11 @@ export default async function StlFilesPage({ searchParams }: Props) { ? await listSkippedPackages({ page, limit: perPage }) : null; + // Fetch ungrouped packages only if on that tab + const ungroupedResult = tab === "ungrouped" + ? await listUngroupedPackages({ page, limit: perPage }) + : null; + return ( ); } diff --git a/src/app/(app)/telegram/actions.ts b/src/app/(app)/telegram/actions.ts index 6d100e5..2096dca 100644 --- a/src/app/(app)/telegram/actions.ts +++ b/src/app/(app)/telegram/actions.ts @@ -291,10 +291,25 @@ export async function setChannelCategory( if (!admin.success) return admin; try { + const existing = await prisma.telegramChannel.findUnique({ + where: { id }, + select: { category: true }, + }); + if (!existing) return { success: false, error: "Channel not found" }; + + const oldCategory = existing.category; + const newCategory = category?.trim() || null; + await prisma.telegramChannel.update({ where: { id }, - data: { category: category?.trim() || null }, + data: { category: newCategory }, }); + + // Retroactively re-tag packages from this channel when category changes + if (oldCategory !== newCategory && newCategory) { + await retagChannelPackages(id, oldCategory, newCategory); + } + revalidatePath("/telegram"); return { success: true, data: undefined }; } catch { @@ -302,6 +317,50 @@ export async function setChannelCategory( } } +export async function retagChannelPackages( + channelId: string, + oldCategory: string | null, + newCategory: string +): Promise> { + const session = await auth(); + if (!session?.user?.id) return { success: false, error: "Unauthorized" }; + + try { + // Find packages from this channel that have the old category tag (or no category tag) + const packages = await prisma.package.findMany({ + where: { sourceChannelId: channelId }, + select: { id: true, tags: true }, + }); + + let updated = 0; + for (const pkg of packages) { + const tags = [...pkg.tags]; + // Remove old category tag if present + if (oldCategory) { + const idx = tags.indexOf(oldCategory); + if (idx !== -1) tags.splice(idx, 1); + } + // Add new category tag if not already present + if (!tags.includes(newCategory)) { + tags.push(newCategory); + } + // Only update if tags actually changed + if (JSON.stringify(tags) !== JSON.stringify(pkg.tags)) { + await prisma.package.update({ + where: { id: pkg.id }, + data: { tags }, + }); + updated++; + } + } + + revalidatePath("/stls"); + return { success: true, data: { updated } }; + } catch { + return { success: false, error: "Failed to re-tag packages" }; + } +} + export async function setChannelType( id: string, type: "SOURCE" | "DESTINATION" diff --git a/src/app/api/notifications/read/route.ts b/src/app/api/notifications/read/route.ts new file mode 100644 index 0000000..a45459c --- /dev/null +++ b/src/app/api/notifications/read/route.ts @@ -0,0 +1,33 @@ +import { NextResponse } from "next/server"; +import { auth } from "@/lib/auth"; +import { + markNotificationRead, + markAllNotificationsRead, + dismissNotification, + clearAllNotifications, +} from "@/data/notification.queries"; + +export const dynamic = "force-dynamic"; + +export async function POST(request: Request) { + const session = await auth(); + if (!session?.user?.id) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + + const body = await request.json().catch(() => ({})); + const id = body.id as string | undefined; + const action = (body.action as string) ?? "read"; + + if (action === "dismiss" && id) { + await dismissNotification(id); + } else if (action === "clear") { + await clearAllNotifications(); + } else if (id) { + await markNotificationRead(id); + } else { + await markAllNotificationsRead(); + } + + return NextResponse.json({ success: true }); +} diff --git a/src/app/api/notifications/repair/route.ts b/src/app/api/notifications/repair/route.ts new file mode 100644 index 0000000..49e378e --- /dev/null +++ b/src/app/api/notifications/repair/route.ts @@ -0,0 +1,43 @@ +import { NextResponse } from "next/server"; +import { auth } from "@/lib/auth"; +import { prisma } from "@/lib/prisma"; + +export const dynamic = "force-dynamic"; + +export async function POST(request: Request) { + const session = await auth(); + if (!session?.user?.id) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + + const body = await request.json().catch(() => ({})); + const notificationId = body.notificationId as string; + if (!notificationId) { + return NextResponse.json({ error: "notificationId required" }, { status: 400 }); + } + + const notification = await prisma.systemNotification.findUnique({ + where: { id: notificationId }, + }); + + if (!notification) { + return NextResponse.json({ error: "Notification not found" }, { status: 404 }); + } + + const context = notification.context as Record | null; + const packageId = context?.packageId as string | undefined; + + if (!packageId) { + return NextResponse.json({ error: "Notification has no associated package" }, { status: 400 }); + } + + // Import and call the repair action + const { repairPackageAction } = await import("@/app/(app)/stls/actions"); + const result = await repairPackageAction(packageId); + + if (!result.success) { + return NextResponse.json({ error: result.error }, { status: 500 }); + } + + return NextResponse.json({ success: true }); +} diff --git a/src/app/api/notifications/route.ts b/src/app/api/notifications/route.ts new file mode 100644 index 0000000..970bc74 --- /dev/null +++ b/src/app/api/notifications/route.ts @@ -0,0 +1,27 @@ +import { NextResponse } from "next/server"; +import { auth } from "@/lib/auth"; +import { + getRecentNotifications, + getUnreadNotificationCount, +} from "@/data/notification.queries"; + +export const dynamic = "force-dynamic"; + +export async function GET() { + const session = await auth(); + if (!session?.user?.id) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + + const [notifications, unreadCount] = await Promise.all([ + getRecentNotifications(30), + getUnreadNotificationCount(), + ]); + + const serialized = notifications.map((n) => ({ + ...n, + createdAt: n.createdAt.toISOString(), + })); + + return NextResponse.json({ notifications: serialized, unreadCount }); +} diff --git a/src/app/api/packages/linked/route.ts b/src/app/api/packages/linked/route.ts new file mode 100644 index 0000000..12eeb40 --- /dev/null +++ b/src/app/api/packages/linked/route.ts @@ -0,0 +1,21 @@ +import { NextResponse } from "next/server"; +import { auth } from "@/lib/auth"; +import { getLinkedPackageIds } from "@/data/kickstarter.queries"; + +export const dynamic = "force-dynamic"; + +export async function GET(request: Request) { + const session = await auth(); + if (!session?.user?.id) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + + const { searchParams } = new URL(request.url); + const kickstarterId = searchParams.get("kickstarterId"); + if (!kickstarterId) { + return NextResponse.json({ error: "kickstarterId required" }, { status: 400 }); + } + + const packageIds = await getLinkedPackageIds(kickstarterId); + return NextResponse.json({ packageIds }); +} diff --git a/src/app/api/packages/search/route.ts b/src/app/api/packages/search/route.ts new file mode 100644 index 0000000..2cad6af --- /dev/null +++ b/src/app/api/packages/search/route.ts @@ -0,0 +1,26 @@ +import { NextResponse } from "next/server"; +import { auth } from "@/lib/auth"; +import { searchPackagesForLinking } from "@/data/kickstarter.queries"; + +export const dynamic = "force-dynamic"; + +export async function GET(request: Request) { + const session = await auth(); + if (!session?.user?.id) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + + const { searchParams } = new URL(request.url); + const query = searchParams.get("q") ?? ""; + const limit = Math.min(Number(searchParams.get("limit") ?? "20"), 50); + + const packages = await searchPackagesForLinking(query, limit); + + // Serialize BigInt for JSON + const serialized = packages.map((p) => ({ + ...p, + fileSize: p.fileSize.toString(), + })); + + return NextResponse.json({ packages: serialized }); +} diff --git a/src/app/api/uploads/[id]/route.ts b/src/app/api/uploads/[id]/route.ts new file mode 100644 index 0000000..47e712a --- /dev/null +++ b/src/app/api/uploads/[id]/route.ts @@ -0,0 +1,43 @@ +import { NextResponse } from "next/server"; +import { auth } from "@/lib/auth"; +import { prisma } from "@/lib/prisma"; + +export const dynamic = "force-dynamic"; + +export async function GET( + _request: Request, + { params }: { params: Promise<{ id: string }> } +) { + const session = await auth(); + if (!session?.user?.id) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + + const { id } = await params; + + const upload = await prisma.manualUpload.findUnique({ + where: { id }, + include: { + files: { + select: { id: true, fileName: true, fileSize: true, packageId: true }, + }, + }, + }); + + if (!upload || upload.userId !== session.user.id) { + return NextResponse.json({ error: "Not found" }, { status: 404 }); + } + + return NextResponse.json({ + id: upload.id, + status: upload.status, + groupName: upload.groupName, + errorMessage: upload.errorMessage, + files: upload.files.map((f) => ({ + ...f, + fileSize: f.fileSize.toString(), + })), + createdAt: upload.createdAt.toISOString(), + completedAt: upload.completedAt?.toISOString() ?? null, + }); +} diff --git a/src/app/api/uploads/route.ts b/src/app/api/uploads/route.ts new file mode 100644 index 0000000..225abee --- /dev/null +++ b/src/app/api/uploads/route.ts @@ -0,0 +1,83 @@ +import { NextResponse } from "next/server"; +import { auth } from "@/lib/auth"; +import { prisma } from "@/lib/prisma"; +import { writeFile, mkdir } from "fs/promises"; +import path from "path"; + +export const dynamic = "force-dynamic"; + +const UPLOAD_DIR = process.env.UPLOAD_DIR ?? "/data/uploads"; +const MAX_FILE_SIZE = 4 * 1024 * 1024 * 1024; // 4GB per file + +export async function POST(request: Request) { + const session = await auth(); + if (!session?.user?.id) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + + try { + const formData = await request.formData(); + const files = formData.getAll("files") as File[]; + const groupName = formData.get("groupName") as string | null; + + if (!files.length) { + return NextResponse.json({ error: "No files provided" }, { status: 400 }); + } + + // Create the upload record + const upload = await prisma.manualUpload.create({ + data: { + userId: session.user.id, + groupName: groupName || (files.length > 1 ? files[0].name.replace(/\.[^.]+$/, "") : null), + status: "PENDING", + }, + }); + + // Save files to shared volume + const uploadDir = path.join(UPLOAD_DIR, upload.id); + await mkdir(uploadDir, { recursive: true }); + + for (const file of files) { + if (file.size > MAX_FILE_SIZE) { + return NextResponse.json( + { error: `File "${file.name}" exceeds 4GB limit` }, + { status: 400 } + ); + } + + const filePath = path.join(uploadDir, file.name); + const buffer = Buffer.from(await file.arrayBuffer()); + await writeFile(filePath, buffer); + + await prisma.manualUploadFile.create({ + data: { + uploadId: upload.id, + fileName: file.name, + filePath, + fileSize: BigInt(file.size), + }, + }); + } + + // Notify worker + try { + await prisma.$queryRawUnsafe( + `SELECT pg_notify('manual_upload', $1)`, + upload.id + ); + } catch { + // Best-effort + } + + return NextResponse.json({ + uploadId: upload.id, + fileCount: files.length, + status: "PENDING", + }); + } catch (err) { + return NextResponse.json( + { error: err instanceof Error ? err.message : "Upload failed" }, + { status: 500 } + ); + } +} diff --git a/src/components/layout/header.tsx b/src/components/layout/header.tsx index 873abd2..15f99bc 100644 --- a/src/components/layout/header.tsx +++ b/src/components/layout/header.tsx @@ -6,6 +6,7 @@ import { Button } from "@/components/ui/button"; import { Sheet, SheetContent, SheetTrigger } from "@/components/ui/sheet"; import { UserMenu } from "./user-menu"; import { MobileSidebar } from "./mobile-sidebar"; +import { NotificationBell } from "./notification-bell"; const routeTitles: Record = { "/dashboard": "Dashboard", @@ -38,7 +39,8 @@ export function Header() {

{title}

-
+
+
diff --git a/src/components/layout/notification-bell.tsx b/src/components/layout/notification-bell.tsx new file mode 100644 index 0000000..bf99fc1 --- /dev/null +++ b/src/components/layout/notification-bell.tsx @@ -0,0 +1,268 @@ +"use client"; + +import { useState, useEffect, useCallback } from "react"; +import { Bell, AlertTriangle, AlertCircle, Info, CheckCircle2, X, Trash2 } from "lucide-react"; +import { Button } from "@/components/ui/button"; +import { Badge } from "@/components/ui/badge"; +import { + Popover, + PopoverContent, + PopoverTrigger, +} from "@/components/ui/popover"; +import { ScrollArea } from "@/components/ui/scroll-area"; +import { toast } from "sonner"; + +interface Notification { + id: string; + type: string; + severity: "INFO" | "WARNING" | "ERROR"; + title: string; + message: string; + isRead: boolean; + createdAt: string; +} + +const severityIcon = { + INFO: Info, + WARNING: AlertTriangle, + ERROR: AlertCircle, +}; + +const severityColor = { + INFO: "text-blue-400", + WARNING: "text-orange-400", + ERROR: "text-red-400", +}; + +export function NotificationBell() { + const [notifications, setNotifications] = useState([]); + const [unreadCount, setUnreadCount] = useState(0); + const [open, setOpen] = useState(false); + + const fetchNotifications = useCallback(async () => { + try { + const res = await fetch("/api/notifications"); + if (res.ok) { + const data = await res.json(); + setNotifications(data.notifications ?? []); + setUnreadCount(data.unreadCount ?? 0); + } + } catch { + // Ignore fetch errors + } + }, []); + + // Poll every 30 seconds + on mount + useEffect(() => { + fetchNotifications(); + const interval = setInterval(fetchNotifications, 30_000); + return () => clearInterval(interval); + }, [fetchNotifications]); + + // Refresh when popover opens + useEffect(() => { + if (open) fetchNotifications(); + }, [open, fetchNotifications]); + + async function handleMarkAllRead() { + try { + await fetch("/api/notifications/read", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({}), + }); + setNotifications((prev) => prev.map((n) => ({ ...n, isRead: true }))); + setUnreadCount(0); + } catch { + // Ignore + } + } + + async function handleMarkRead(id: string) { + try { + await fetch("/api/notifications/read", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ id }), + }); + setNotifications((prev) => + prev.map((n) => (n.id === id ? { ...n, isRead: true } : n)) + ); + setUnreadCount((c) => Math.max(0, c - 1)); + } catch { + // Ignore + } + } + + async function handleDismiss(id: string) { + try { + await fetch("/api/notifications/read", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ id, action: "dismiss" }), + }); + setNotifications((prev) => prev.filter((n) => n.id !== id)); + setUnreadCount((c) => Math.max(0, c - 1)); + } catch { + // Ignore + } + } + + async function handleClearAll() { + try { + await fetch("/api/notifications/read", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ action: "clear" }), + }); + setNotifications([]); + setUnreadCount(0); + } catch { + // Ignore + } + } + + async function handleRepair(notificationId: string) { + try { + const res = await fetch("/api/notifications/repair", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ notificationId }), + }); + if (res.ok) { + toast.success("Repair scheduled — package will be re-processed on next cycle"); + fetchNotifications(); + } + } catch { + // Ignore + } + } + + function formatTime(iso: string): string { + const d = new Date(iso); + const now = new Date(); + const diffMs = now.getTime() - d.getTime(); + const diffMin = Math.floor(diffMs / 60_000); + if (diffMin < 1) return "just now"; + if (diffMin < 60) return `${diffMin}m ago`; + const diffHr = Math.floor(diffMin / 60); + if (diffHr < 24) return `${diffHr}h ago`; + const diffDay = Math.floor(diffHr / 24); + return `${diffDay}d ago`; + } + + return ( + + + + + +
+

Notifications

+
+ {unreadCount > 0 && ( + + )} + {notifications.length > 0 && ( + + )} +
+
+ + {notifications.length === 0 ? ( +
+ +

All clear!

+
+ ) : ( +
+ {notifications.map((n) => { + const Icon = severityIcon[n.severity] ?? Info; + const color = severityColor[n.severity] ?? "text-muted-foreground"; + return ( +
!n.isRead && handleMarkRead(n.id)} + onKeyDown={(e) => { + if (e.key === "Enter" || e.key === " ") { + if (!n.isRead) handleMarkRead(n.id); + } + }} + > + +
+
+

+ {n.title} +

+ {!n.isRead && ( + + )} + +
+

+ {n.message} +

+

+ {formatTime(n.createdAt)} +

+ {(n.type === "MISSING_PART" || n.type === "HASH_MISMATCH") && ( + + )} +
+
+ ); + })} +
+ )} +
+
+
+ ); +} diff --git a/src/data/kickstarter.queries.ts b/src/data/kickstarter.queries.ts index 5d954ce..c2db6f2 100644 --- a/src/data/kickstarter.queries.ts +++ b/src/data/kickstarter.queries.ts @@ -95,3 +95,34 @@ export async function getKickstarterHosts() { include: { _count: { select: { kickstarters: true } } }, }); } + +export async function searchPackagesForLinking(query: string, limit = 20) { + if (!query || query.length < 2) return []; + + return prisma.package.findMany({ + where: { + OR: [ + { fileName: { contains: query, mode: "insensitive" } }, + { creator: { contains: query, mode: "insensitive" } }, + ], + }, + orderBy: { indexedAt: "desc" }, + take: limit, + select: { + id: true, + fileName: true, + fileSize: true, + archiveType: true, + creator: true, + fileCount: true, + }, + }); +} + +export async function getLinkedPackageIds(kickstarterId: string): Promise { + const links = await prisma.kickstarterPackage.findMany({ + where: { kickstarterId }, + select: { packageId: true }, + }); + return links.map((l) => l.packageId); +} diff --git a/src/data/notification.queries.ts b/src/data/notification.queries.ts new file mode 100644 index 0000000..5d67fb1 --- /dev/null +++ b/src/data/notification.queries.ts @@ -0,0 +1,45 @@ +import { prisma } from "@/lib/prisma"; + +export async function getUnreadNotificationCount(): Promise { + return prisma.systemNotification.count({ + where: { isRead: false }, + }); +} + +export async function getRecentNotifications(limit = 20) { + return prisma.systemNotification.findMany({ + orderBy: { createdAt: "desc" }, + take: limit, + select: { + id: true, + type: true, + severity: true, + title: true, + message: true, + isRead: true, + createdAt: true, + }, + }); +} + +export async function markNotificationRead(id: string) { + return prisma.systemNotification.update({ + where: { id }, + data: { isRead: true }, + }); +} + +export async function markAllNotificationsRead() { + return prisma.systemNotification.updateMany({ + where: { isRead: false }, + data: { isRead: true }, + }); +} + +export async function dismissNotification(id: string) { + return prisma.systemNotification.delete({ where: { id } }); +} + +export async function clearAllNotifications() { + return prisma.systemNotification.deleteMany({}); +} diff --git a/src/lib/telegram/queries.ts b/src/lib/telegram/queries.ts index 80745c9..6ece96a 100644 --- a/src/lib/telegram/queries.ts +++ b/src/lib/telegram/queries.ts @@ -340,6 +340,30 @@ export async function listPackageFiles(options: { }; } +async function fullTextSearchPackageIds(query: string, limit: number): Promise { + // Convert user query to tsquery — handle multi-word by joining with & + const tsQuery = query + .trim() + .split(/\s+/) + .filter((w) => w.length >= 2) + .map((w) => w.replace(/[^a-zA-Z0-9]/g, "")) + .filter(Boolean) + .join(" & "); + + if (!tsQuery) return []; + + const results = await prisma.$queryRawUnsafe<{ id: string }[]>( + `SELECT id FROM packages + WHERE "searchVector" @@ to_tsquery('english', $1) + ORDER BY ts_rank("searchVector", to_tsquery('english', $1)) DESC + LIMIT $2`, + tsQuery, + limit + ); + + return results.map((r) => r.id); +} + export async function searchPackages(options: { query: string; page: number; @@ -366,14 +390,26 @@ export async function searchPackages(options: { ); const fileMatchedIds = fileMatches.map((f) => f.packageId); + // Try full-text search first (better ranking, handles word stemming) + let ftsPackageNameIds: string[] = []; + if (options.searchIn === "both" && q.length >= 3) { + try { + ftsPackageNameIds = await fullTextSearchPackageIds(q, 200); + } catch { + // FTS failed — fall back to ILIKE below + } + } + const packageNameIds = options.searchIn === "both" - ? ( - await prisma.package.findMany({ - where: { fileName: { contains: q, mode: "insensitive" } }, - select: { id: true }, - }) - ).map((p) => p.id) + ? ftsPackageNameIds.length > 0 + ? ftsPackageNameIds + : ( + await prisma.package.findMany({ + where: { fileName: { contains: q, mode: "insensitive" } }, + select: { id: true }, + }) + ).map((p) => p.id) : []; // Also match by group name @@ -571,6 +607,72 @@ export async function countSkippedPackages(): Promise { return prisma.skippedPackage.count(); } +export async function listUngroupedPackages(options: { + page: number; + limit: number; +}) { + const { page, limit } = options; + const skip = (page - 1) * limit; + + const where = { packageGroupId: null, destMessageId: { not: null } }; + + const [items, total] = await Promise.all([ + prisma.package.findMany({ + where, + orderBy: { indexedAt: "desc" }, + skip, + take: limit, + select: { + id: true, + fileName: true, + fileSize: true, + archiveType: true, + creator: true, + fileCount: true, + isMultipart: true, + partCount: true, + tags: true, + indexedAt: true, + previewData: true, + sourceChannel: { select: { id: true, title: true } }, + }, + }), + prisma.package.count({ where }), + ]); + + return { + items: items.map((p) => ({ + id: p.id, + fileName: p.fileName, + fileSize: p.fileSize.toString(), + contentHash: "", + archiveType: p.archiveType, + creator: p.creator, + fileCount: p.fileCount, + isMultipart: p.isMultipart, + partCount: p.partCount, + tags: p.tags, + indexedAt: p.indexedAt.toISOString(), + hasPreview: !!p.previewData, + sourceChannel: p.sourceChannel, + matchedFileCount: 0, + matchedByContent: false, + })), + pagination: { + total, + totalPages: Math.ceil(total / limit), + page, + limit, + }, + }; +} + +export async function countUngroupedPackages(): Promise { + return prisma.package.count({ + where: { packageGroupId: null, destMessageId: { not: null } }, + }); +} + export async function getPackageGroup(groupId: string) { return prisma.packageGroup.findUnique({ where: { id: groupId }, @@ -630,6 +732,53 @@ export async function createManualGroup(name: string, packageIds: string[]) { data: { packageGroupId: group.id }, }); + // Learn a grouping rule from the manual override + try { + const linkedPkgs = await prisma.package.findMany({ + where: { id: { in: packageIds } }, + select: { fileName: true, creator: true }, + }); + + // Extract the common filename pattern + const fileNames = linkedPkgs.map((p) => p.fileName); + let pattern = ""; + if (fileNames.length > 1) { + // Find longest common prefix + let prefix = fileNames[0]; + for (let i = 1; i < fileNames.length; i++) { + while (!fileNames[i].startsWith(prefix)) { + prefix = prefix.slice(0, -1); + if (!prefix) break; + } + } + const trimmed = prefix.replace(/[\s\-_.(]+$/, ""); + if (trimmed.length >= 4) { + pattern = trimmed; + } + } + + // Fall back to shared creator + if (!pattern) { + const creators = [...new Set(linkedPkgs.map((p) => p.creator).filter(Boolean))]; + if (creators.length === 1 && creators[0]) { + pattern = creators[0]; + } + } + + if (pattern) { + await prisma.groupingRule.create({ + data: { + sourceChannelId: firstPkg.sourceChannelId, + pattern, + signalType: "MANUAL", + createdByGroupId: group.id, + }, + }); + } + } catch { + // Best-effort — don't fail the group creation if rule learning fails + } + // Clean up empty groups left behind await prisma.packageGroup.deleteMany({ where: { packages: { none: {} }, id: { not: group.id } }, @@ -670,3 +819,13 @@ export async function dissolveGroup(groupId: string) { }); await prisma.packageGroup.delete({ where: { id: groupId } }); } + +export async function mergeGroups(targetGroupId: string, sourceGroupId: string) { + // Move all packages from source group to target group + await prisma.package.updateMany({ + where: { packageGroupId: sourceGroupId }, + data: { packageGroupId: targetGroupId }, + }); + // Delete the now-empty source group + await prisma.packageGroup.delete({ where: { id: sourceGroupId } }); +} diff --git a/worker/src/archive/multipart.ts b/worker/src/archive/multipart.ts index 80ecaec..d37d72d 100644 --- a/worker/src/archive/multipart.ts +++ b/worker/src/archive/multipart.ts @@ -11,6 +11,8 @@ export interface TelegramMessage { fileSize: bigint; date: Date; mediaAlbumId?: string; + replyToMessageId?: bigint; // NEW + caption?: string; // NEW } export interface ArchiveSet { diff --git a/worker/src/archive/split.ts b/worker/src/archive/split.ts index f427da9..c065bfe 100644 --- a/worker/src/archive/split.ts +++ b/worker/src/archive/split.ts @@ -3,15 +3,19 @@ import { stat } from "fs/promises"; import path from "path"; import { pipeline } from "stream/promises"; import { childLogger } from "../util/logger.js"; +import { config } from "../util/config.js"; const log = childLogger("split"); /** - * 1950 MiB — safely under Telegram's 2GB upload limit. - * At exactly 2GiB, TDLib's internal 512KB chunking can exceed Telegram's + * Maximum part size for Telegram upload. Configurable via MAX_PART_SIZE_MB env var. + * Default: 1950 MiB (safely under 2GB non-Premium limit). + * Premium: set to 3900 MiB (safely under 4GB Premium limit). + * + * At exactly 2/4 GiB, TDLib's internal 512KB chunking can exceed Telegram's * 4000-part threshold, causing FILE_PARTS_INVALID errors. */ -const MAX_PART_SIZE = 1950n * 1024n * 1024n; +const MAX_PART_SIZE = BigInt(config.maxPartSizeMB) * 1024n * 1024n; /** * Split a file into ≤2GB parts using byte-level splitting. diff --git a/worker/src/audit.ts b/worker/src/audit.ts new file mode 100644 index 0000000..5511277 --- /dev/null +++ b/worker/src/audit.ts @@ -0,0 +1,119 @@ +import { db } from "./db/client.js"; +import { childLogger } from "./util/logger.js"; + +const log = childLogger("audit"); + +/** + * Periodic integrity audit: checks all packages for consistency. + * Creates SystemNotification records for any issues found. + * + * Checks performed: + * 1. Multipart completeness: destMessageIds.length should match partCount + * 2. Missing destination: packages with destChannelId but no destMessageId + */ +export async function runIntegrityAudit(): Promise<{ checked: number; issues: number }> { + log.info("Starting integrity audit"); + + let checked = 0; + let issues = 0; + + // Check 1: Multipart packages with wrong number of destination message IDs + const multipartPackages = await db.package.findMany({ + where: { + isMultipart: true, + partCount: { gt: 1 }, + destMessageId: { not: null }, + }, + select: { + id: true, + fileName: true, + partCount: true, + destMessageIds: true, + sourceChannelId: true, + sourceChannel: { select: { title: true } }, + }, + }); + + checked += multipartPackages.length; + + for (const pkg of multipartPackages) { + const actualParts = pkg.destMessageIds.length; + // Only flag when we have >1 stored IDs but count doesn't match. + // Packages with exactly 1 ID are legacy (backfilled from single destMessageId) — not actionable. + if (actualParts > 1 && actualParts !== pkg.partCount) { + issues++; + + // Check if we already have a notification for this + const existing = await db.systemNotification.findFirst({ + where: { + type: "MISSING_PART", + context: { path: ["packageId"], equals: pkg.id }, + }, + select: { id: true }, + }); + + if (!existing) { + await db.systemNotification.create({ + data: { + type: "MISSING_PART", + severity: "WARNING", + title: `Incomplete multipart: ${pkg.fileName}`, + message: `Expected ${pkg.partCount} parts but only ${actualParts} destination message IDs stored`, + context: { + packageId: pkg.id, + fileName: pkg.fileName, + expectedParts: pkg.partCount, + actualParts, + sourceChannelId: pkg.sourceChannelId, + channelTitle: pkg.sourceChannel.title, + }, + }, + }); + + log.warn( + { packageId: pkg.id, fileName: pkg.fileName, expected: pkg.partCount, actual: actualParts }, + "Multipart package has mismatched part count" + ); + } + } + } + + // Check 2: Packages with dest channel but no dest message (orphaned index) + const orphanedCount = await db.package.count({ + where: { + destChannelId: { not: null }, + destMessageId: null, + }, + }); + + if (orphanedCount > 0) { + issues++; + + const existing = await db.systemNotification.findFirst({ + where: { + type: "INTEGRITY_AUDIT", + context: { path: ["check"], equals: "orphaned_index" }, + createdAt: { gte: new Date(Date.now() - 24 * 60 * 60 * 1000) }, + }, + select: { id: true }, + }); + + if (!existing) { + await db.systemNotification.create({ + data: { + type: "INTEGRITY_AUDIT", + severity: "INFO", + title: `${orphanedCount} packages with missing destination message`, + message: `Found ${orphanedCount} packages that have a destination channel set but no destination message ID. These may be from interrupted uploads.`, + context: { + check: "orphaned_index", + count: orphanedCount, + }, + }, + }); + } + } + + log.info({ checked, issues }, "Integrity audit complete"); + return { checked, issues }; +} diff --git a/worker/src/db/queries.ts b/worker/src/db/queries.ts index 5d34411..b463b5a 100644 --- a/worker/src/db/queries.ts +++ b/worker/src/db/queries.ts @@ -70,7 +70,7 @@ export async function packageExistsByHash(contentHash: string) { export async function getUploadedPackageByHash(contentHash: string) { return db.package.findFirst({ where: { contentHash, destMessageId: { not: null }, destChannelId: { not: null } }, - select: { destChannelId: true, destMessageId: true }, + select: { destChannelId: true, destMessageId: true, destMessageIds: true }, }); } @@ -111,6 +111,7 @@ export interface CreatePackageInput { sourceTopicId?: bigint | null; destChannelId?: string; destMessageId?: bigint; + destMessageIds?: bigint[]; isMultipart: boolean; partCount: number; ingestionRunId: string; @@ -118,6 +119,8 @@ export interface CreatePackageInput { tags?: string[]; previewData?: Buffer | null; previewMsgId?: bigint | null; + sourceCaption?: string | null; + replyToMessageId?: bigint | null; files: { path: string; fileName: string; @@ -140,6 +143,7 @@ export async function createPackageWithFiles(input: CreatePackageInput) { sourceTopicId: input.sourceTopicId ?? undefined, destChannelId: input.destChannelId, destMessageId: input.destMessageId, + destMessageIds: input.destMessageIds ?? (input.destMessageId ? [input.destMessageId] : []), isMultipart: input.isMultipart, partCount: input.partCount, fileCount: input.files.length, @@ -148,6 +152,8 @@ export async function createPackageWithFiles(input: CreatePackageInput) { tags: input.tags && input.tags.length > 0 ? input.tags : undefined, previewData: input.previewData ? new Uint8Array(input.previewData) : undefined, previewMsgId: input.previewMsgId ?? undefined, + sourceCaption: input.sourceCaption ?? undefined, + replyToMessageId: input.replyToMessageId ?? undefined, files: { create: input.files, }, @@ -585,3 +591,46 @@ export async function linkPackagesToGroup( data: { packageGroupId: groupId }, }); } + +export async function createTimeWindowGroup(input: { + sourceChannelId: string; + name: string; + packageIds: string[]; +}): Promise { + const group = await db.packageGroup.create({ + data: { + sourceChannelId: input.sourceChannelId, + name: input.name, + groupingSource: "AUTO_TIME", + }, + }); + + await db.package.updateMany({ + where: { id: { in: input.packageIds } }, + data: { packageGroupId: group.id }, + }); + + return group.id; +} + +export async function createAutoGroup(input: { + sourceChannelId: string; + name: string; + packageIds: string[]; + groupingSource: "ALBUM" | "MANUAL" | "AUTO_TIME" | "AUTO_PATTERN" | "AUTO_ZIP" | "AUTO_CAPTION" | "AUTO_REPLY"; +}): Promise { + const group = await db.packageGroup.create({ + data: { + sourceChannelId: input.sourceChannelId, + name: input.name, + groupingSource: input.groupingSource, + }, + }); + + await db.package.updateMany({ + where: { id: { in: input.packageIds } }, + data: { packageGroupId: group.id }, + }); + + return group.id; +} diff --git a/worker/src/fetch-listener.ts b/worker/src/fetch-listener.ts index 59340ce..8fb3388 100644 --- a/worker/src/fetch-listener.ts +++ b/worker/src/fetch-listener.ts @@ -5,6 +5,7 @@ import { withTdlibMutex } from "./util/mutex.js"; import { processFetchRequest } from "./worker.js"; import { processExtractRequest } from "./extract-listener.js"; import { rebuildPackageDatabase } from "./rebuild.js"; +import { processManualUpload } from "./manual-upload.js"; import { generateInviteLink, createSupergroup, searchPublicChat } from "./tdlib/chats.js"; import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js"; import { triggerImmediateCycle } from "./scheduler.js"; @@ -55,6 +56,7 @@ async function connectListener(): Promise { await pgClient.query("LISTEN join_channel"); await pgClient.query("LISTEN archive_extract"); await pgClient.query("LISTEN rebuild_packages"); + await pgClient.query("LISTEN manual_upload"); pgClient.on("notification", (msg) => { if (msg.channel === "channel_fetch" && msg.payload) { @@ -71,6 +73,8 @@ async function connectListener(): Promise { handleArchiveExtract(msg.payload); } else if (msg.channel === "rebuild_packages" && msg.payload) { handleRebuildPackages(msg.payload); + } else if (msg.channel === "manual_upload" && msg.payload) { + handleManualUpload(msg.payload); } }); @@ -96,7 +100,7 @@ async function connectListener(): Promise { } }); - log.info("Fetch listener started (channel_fetch, generate_invite, create_destination, ingestion_trigger, join_channel, archive_extract, rebuild_packages)"); + log.info("Fetch listener started (channel_fetch, generate_invite, create_destination, ingestion_trigger, join_channel, archive_extract, rebuild_packages, manual_upload)"); } catch (err) { log.error({ err }, "Failed to start fetch listener — retrying"); scheduleReconnect(); @@ -511,3 +515,11 @@ function handleRebuildPackages(requestId: string): void { } }); } + +// ── Manual upload handler ── + +function handleManualUpload(uploadId: string): void { + fetchQueue = fetchQueue + .then(() => processManualUpload(uploadId)) + .catch((err) => log.error({ err, uploadId }, "Manual upload processing failed")); +} diff --git a/worker/src/grouping.ts b/worker/src/grouping.ts index 35c91b0..66b370d 100644 --- a/worker/src/grouping.ts +++ b/worker/src/grouping.ts @@ -1,7 +1,8 @@ import type { Client } from "tdl"; import type { TelegramPhoto } from "./preview/match.js"; import { downloadPhotoThumbnail } from "./tdlib/download.js"; -import { createOrFindPackageGroup, linkPackagesToGroup } from "./db/queries.js"; +import { createOrFindPackageGroup, linkPackagesToGroup, createTimeWindowGroup, createAutoGroup } from "./db/queries.js"; +import { config } from "./util/config.js"; import { childLogger } from "./util/logger.js"; import { db } from "./db/client.js"; @@ -77,3 +78,591 @@ export async function processAlbumGroups( } } } + +/** + * Apply learned GroupingRules from manual overrides. + * For each rule, find ungrouped packages whose fileName contains the pattern. + */ +export async function processRuleBasedGroups( + sourceChannelId: string, + indexedPackages: IndexedPackageRef[] +): Promise { + const rules = await db.groupingRule.findMany({ + where: { sourceChannelId }, + orderBy: { confidence: "desc" }, + }); + + if (rules.length === 0) return; + + const ungrouped = await db.package.findMany({ + where: { + id: { in: indexedPackages.map((p) => p.packageId) }, + packageGroupId: null, + }, + select: { id: true, fileName: true, creator: true }, + }); + + if (ungrouped.length < 2) return; + + for (const rule of rules) { + const matches = ungrouped.filter((pkg) => { + const lower = rule.pattern.toLowerCase(); + return pkg.fileName.toLowerCase().includes(lower) || + (pkg.creator && pkg.creator.toLowerCase().includes(lower)); + }); + + if (matches.length < 2) continue; + + // Check if any are already grouped (by a previous rule in this loop) + const stillUngrouped = await db.package.findMany({ + where: { + id: { in: matches.map((m) => m.id) }, + packageGroupId: null, + }, + select: { id: true }, + }); + + if (stillUngrouped.length < 2) continue; + + try { + const groupId = await createAutoGroup({ + sourceChannelId, + name: rule.pattern, + packageIds: stillUngrouped.map((m) => m.id), + groupingSource: "MANUAL", + }); + + log.info( + { groupId, ruleId: rule.id, pattern: rule.pattern, memberCount: stillUngrouped.length }, + "Applied learned grouping rule" + ); + } catch (err) { + log.warn({ err, ruleId: rule.id }, "Failed to apply grouping rule"); + } + } +} + +/** + * After album grouping, cluster remaining ungrouped packages from the same channel + * that were posted within a configurable time window. + * Only groups packages that were just indexed in this scan cycle (the `indexedPackages` list). + */ +export async function processTimeWindowGroups( + sourceChannelId: string, + indexedPackages: IndexedPackageRef[] +): Promise { + if (config.autoGroupTimeWindowMinutes <= 0) return; + + // Find which of the just-indexed packages are still ungrouped + const ungrouped = await db.package.findMany({ + where: { + id: { in: indexedPackages.map((p) => p.packageId) }, + packageGroupId: null, + }, + orderBy: { sourceMessageId: "asc" }, + select: { + id: true, + fileName: true, + sourceMessageId: true, + indexedAt: true, + }, + }); + + if (ungrouped.length < 2) return; + + const windowMs = config.autoGroupTimeWindowMinutes * 60 * 1000; + + // Cluster by time proximity: walk through sorted list, start new cluster when gap > window + const clusters: typeof ungrouped[] = []; + let current: typeof ungrouped = [ungrouped[0]]; + + for (let i = 1; i < ungrouped.length; i++) { + const prev = current[current.length - 1]; + const gap = Math.abs(ungrouped[i].indexedAt.getTime() - prev.indexedAt.getTime()); + + if (gap <= windowMs) { + current.push(ungrouped[i]); + } else { + clusters.push(current); + current = [ungrouped[i]]; + } + } + clusters.push(current); + + // Create groups for clusters with 2+ packages + for (const cluster of clusters) { + if (cluster.length < 2) continue; + + // Derive group name from common filename prefix + const name = findCommonPrefix(cluster.map((p) => p.fileName)) || cluster[0].fileName; + + try { + const groupId = await createTimeWindowGroup({ + sourceChannelId, + name, + packageIds: cluster.map((p) => p.id), + }); + + log.info( + { groupId, name, memberCount: cluster.length }, + "Created time-window group" + ); + } catch (err) { + log.warn({ err, clusterSize: cluster.length }, "Failed to create time-window group"); + } + } +} + +/** + * Group ungrouped packages that share a date pattern (YYYY-MM, YYYY_MM, etc.) + * or project slug extracted from their filenames. + */ +export async function processPatternGroups( + sourceChannelId: string, + indexedPackages: IndexedPackageRef[] +): Promise { + const ungrouped = await db.package.findMany({ + where: { + id: { in: indexedPackages.map((p) => p.packageId) }, + packageGroupId: null, + }, + select: { id: true, fileName: true }, + }); + + if (ungrouped.length < 2) return; + + // Group by extracted pattern + const patternMap = new Map(); + for (const pkg of ungrouped) { + const pattern = extractPattern(pkg.fileName); + if (!pattern) continue; + const group = patternMap.get(pattern) ?? []; + group.push(pkg); + patternMap.set(pattern, group); + } + + for (const [pattern, members] of patternMap) { + if (members.length < 2) continue; + + try { + const groupId = await createAutoGroup({ + sourceChannelId, + name: pattern, + packageIds: members.map((m) => m.id), + groupingSource: "AUTO_PATTERN", + }); + + log.info( + { groupId, pattern, memberCount: members.length }, + "Created pattern-based group" + ); + } catch (err) { + log.warn({ err, pattern }, "Failed to create pattern group"); + } + } +} + +/** + * Extract a grouping pattern from a filename. + * Matches: YYYY-MM, YYYY_MM, "Month Year", or a project prefix before common separators. + * Returns null if no usable pattern found. + */ +function extractPattern(fileName: string): string | null { + // Strip extension for matching + const name = fileName.replace(/\.(zip|rar|7z|pdf|stl)(\.\d+)?$/i, ""); + + // Match YYYY-MM or YYYY_MM patterns + const dateMatch = name.match(/(\d{4})[\-_](\d{2})/); + if (dateMatch) { + return `${dateMatch[1]}-${dateMatch[2]}`; + } + + // Match "Month Year" patterns (e.g., "January 2025", "Jan 2025") + const months = "(?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|sep(?:tember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)"; + const monthYearMatch = name.match(new RegExp(`(${months})\\s*(\\d{4})`, "i")); + if (monthYearMatch) { + const monthStr = monthYearMatch[1].toLowerCase().slice(0, 3); + const monthNum = ["jan","feb","mar","apr","may","jun","jul","aug","sep","oct","nov","dec"].indexOf(monthStr) + 1; + if (monthNum > 0) { + return `${monthYearMatch[2]}-${String(monthNum).padStart(2, "0")}`; + } + } + + // Match project prefix: text before " - ", " – ", or "(". Must be at least 5 chars. + const prefixMatch = name.match(/^(.{5,}?)(?:\s*[\-–]\s|\s*\()/); + if (prefixMatch) { + return prefixMatch[1].trim(); + } + + return null; +} + +/** + * Group ungrouped packages that share the same creator within a channel. + * Only groups if there are 3+ packages from the same creator (to avoid + * over-grouping when a creator only has a couple files). + */ +export async function processCreatorGroups( + sourceChannelId: string, + indexedPackages: IndexedPackageRef[] +): Promise { + const ungrouped = await db.package.findMany({ + where: { + id: { in: indexedPackages.map((p) => p.packageId) }, + packageGroupId: null, + creator: { not: null }, + }, + select: { id: true, fileName: true, creator: true }, + }); + + if (ungrouped.length < 3) return; + + // Group by creator + const creatorMap = new Map(); + for (const pkg of ungrouped) { + if (!pkg.creator) continue; + const key = pkg.creator.toLowerCase(); + const group = creatorMap.get(key) ?? []; + group.push(pkg); + creatorMap.set(key, group); + } + + for (const [, members] of creatorMap) { + if (members.length < 3) continue; + + const creatorName = members[0].creator!; + const name = findCommonPrefix(members.map((m) => m.fileName)) || creatorName; + + try { + const groupId = await createAutoGroup({ + sourceChannelId, + name, + packageIds: members.map((m) => m.id), + groupingSource: "AUTO_PATTERN", + }); + + log.info( + { groupId, creator: creatorName, memberCount: members.length }, + "Created creator-based group" + ); + } catch (err) { + log.warn({ err, creator: creatorName }, "Failed to create creator group"); + } + } +} + +/** + * Group ungrouped packages that share the same root folder inside their archives. + * E.g., if two packages both contain files under "ProjectX/", they're likely related. + * Only considers packages with 3+ files (to avoid false positives from flat archives). + */ +export async function processZipPathGroups( + sourceChannelId: string, + indexedPackages: IndexedPackageRef[] +): Promise { + // Find ungrouped packages that have indexed files + const ungrouped = await db.package.findMany({ + where: { + id: { in: indexedPackages.map((p) => p.packageId) }, + packageGroupId: null, + fileCount: { gte: 3 }, + }, + select: { + id: true, + fileName: true, + files: { + select: { path: true }, + take: 50, + }, + }, + }); + + if (ungrouped.length < 2) return; + + // Extract the dominant root folder for each package + const packageRoots = new Map(); + + for (const pkg of ungrouped) { + const root = extractRootFolder(pkg.files.map((f) => f.path)); + if (!root) continue; + + const key = root.toLowerCase(); + const group = packageRoots.get(key) ?? []; + group.push({ id: pkg.id, fileName: pkg.fileName }); + packageRoots.set(key, group); + } + + // Create groups for roots shared by 2+ packages + for (const [root, members] of packageRoots) { + if (members.length < 2) continue; + + try { + const groupId = await createAutoGroup({ + sourceChannelId, + name: root, + packageIds: members.map((m) => m.id), + groupingSource: "AUTO_ZIP", + }); + + log.info( + { groupId, rootFolder: root, memberCount: members.length }, + "Created ZIP path prefix group" + ); + } catch (err) { + log.warn({ err, rootFolder: root }, "Failed to create ZIP path group"); + } + } +} + +/** + * Group ungrouped packages that reply to the same root message. + * If message B and C both reply to message A, they're grouped together. + */ +export async function processReplyChainGroups( + sourceChannelId: string, + indexedPackages: IndexedPackageRef[] +): Promise { + const ungrouped = await db.package.findMany({ + where: { + id: { in: indexedPackages.map((p) => p.packageId) }, + packageGroupId: null, + replyToMessageId: { not: null }, + }, + select: { + id: true, + fileName: true, + replyToMessageId: true, + }, + }); + + if (ungrouped.length < 2) return; + + // Group by replyToMessageId + const replyMap = new Map(); + for (const pkg of ungrouped) { + if (!pkg.replyToMessageId) continue; + const key = pkg.replyToMessageId.toString(); + const group = replyMap.get(key) ?? []; + group.push(pkg); + replyMap.set(key, group); + } + + for (const [replyId, members] of replyMap) { + if (members.length < 2) continue; + + const name = findCommonPrefix(members.map((m) => m.fileName)) || members[0].fileName; + + try { + const groupId = await createAutoGroup({ + sourceChannelId, + name, + packageIds: members.map((m) => m.id), + groupingSource: "AUTO_REPLY" as const, + }); + + log.info( + { groupId, replyToMessageId: replyId, memberCount: members.length }, + "Created reply-chain group" + ); + } catch (err) { + log.warn({ err, replyToMessageId: replyId }, "Failed to create reply-chain group"); + } + } +} + +/** + * Group ungrouped packages with similar captions from the same channel. + * Uses normalized caption comparison — two captions match if they share + * the same significant words (ignoring common words and file extensions). + */ +export async function processCaptionGroups( + sourceChannelId: string, + indexedPackages: IndexedPackageRef[] +): Promise { + const ungrouped = await db.package.findMany({ + where: { + id: { in: indexedPackages.map((p) => p.packageId) }, + packageGroupId: null, + sourceCaption: { not: null }, + }, + select: { + id: true, + fileName: true, + sourceCaption: true, + }, + }); + + if (ungrouped.length < 2) return; + + // Group by normalized caption key + const captionMap = new Map(); + for (const pkg of ungrouped) { + if (!pkg.sourceCaption) continue; + const key = normalizeCaptionKey(pkg.sourceCaption); + if (!key) continue; + const group = captionMap.get(key) ?? []; + group.push(pkg); + captionMap.set(key, group); + } + + for (const [, members] of captionMap) { + if (members.length < 2) continue; + + const name = members[0].sourceCaption!.slice(0, 80); + + try { + const groupId = await createAutoGroup({ + sourceChannelId, + name, + packageIds: members.map((m) => m.id), + groupingSource: "AUTO_CAPTION" as const, + }); + + log.info( + { groupId, memberCount: members.length }, + "Created caption-match group" + ); + } catch (err) { + log.warn({ err }, "Failed to create caption group"); + } + } +} + +/** + * Normalize a caption for grouping: lowercase, strip extensions and numbers, + * extract significant words (3+ chars), sort, and join. + * Two captions with the same key are considered a match. + */ +function normalizeCaptionKey(caption: string): string | null { + const stripped = caption + .toLowerCase() + .replace(/\.(zip|rar|7z|stl|pdf|obj|gcode)(\.\d+)?/gi, "") + .replace(/[^a-z0-9\s]/g, " "); + + const words = stripped + .split(/\s+/) + .filter((w) => w.length >= 3) + .filter((w) => !["the", "and", "for", "with", "from", "part", "file", "files"].includes(w)); + + if (words.length < 2) return null; + + return words.sort().join(" "); +} + +/** + * Extract the dominant root folder from a list of archive file paths. + * Returns the first path segment that appears in >50% of files. + * Returns null for flat archives or archives with no common root. + */ +function extractRootFolder(paths: string[]): string | null { + if (paths.length === 0) return null; + + // Count first path segments + const segmentCounts = new Map(); + for (const p of paths) { + // Normalize separators and get first segment + const normalized = p.replace(/\\/g, "/"); + const firstSlash = normalized.indexOf("/"); + if (firstSlash <= 0) continue; // Skip root-level files + const segment = normalized.slice(0, firstSlash); + // Skip common noise folders + if (segment === "__MACOSX" || segment === ".DS_Store" || segment === "Thumbs.db") continue; + segmentCounts.set(segment, (segmentCounts.get(segment) ?? 0) + 1); + } + + if (segmentCounts.size === 0) return null; + + // Find the most common segment + let maxSegment = ""; + let maxCount = 0; + for (const [seg, count] of segmentCounts) { + if (count > maxCount) { + maxSegment = seg; + maxCount = count; + } + } + + // Must appear in >50% of files and be at least 3 chars + if (maxCount < paths.length * 0.5 || maxSegment.length < 3) return null; + + return maxSegment; +} + +/** + * Detect packages that could have been grouped differently. + * Checks if any grouped package's filename matches a GroupingRule + * that would place it in a different group. + */ +export async function detectGroupingConflicts( + sourceChannelId: string, + indexedPackages: IndexedPackageRef[] +): Promise { + const rules = await db.groupingRule.findMany({ + where: { sourceChannelId }, + }); + if (rules.length === 0) return; + + const grouped = await db.package.findMany({ + where: { + id: { in: indexedPackages.map((p) => p.packageId) }, + packageGroupId: { not: null }, + }, + select: { + id: true, + fileName: true, + packageGroupId: true, + packageGroup: { select: { name: true, groupingSource: true } }, + }, + }); + + for (const pkg of grouped) { + for (const rule of rules) { + if (pkg.fileName.toLowerCase().includes(rule.pattern.toLowerCase())) { + // Check if the rule's source group is different from current group + if (rule.createdByGroupId && rule.createdByGroupId !== pkg.packageGroupId) { + try { + await db.systemNotification.create({ + data: { + type: "GROUPING_CONFLICT", + severity: "INFO", + title: `Potential grouping conflict: ${pkg.fileName}`, + message: `Grouped by ${pkg.packageGroup?.groupingSource ?? "unknown"} into "${pkg.packageGroup?.name}", but also matches rule "${rule.pattern}" from a different manual group`, + context: { + packageId: pkg.id, + fileName: pkg.fileName, + currentGroupId: pkg.packageGroupId, + matchedRuleId: rule.id, + matchedPattern: rule.pattern, + }, + }, + }); + } catch { + // Best-effort + } + break; // One notification per package + } + } + } + } +} + +/** + * Find the longest common prefix among a list of filenames, + * trimming trailing separators and partial words. + */ +function findCommonPrefix(names: string[]): string { + if (names.length === 0) return ""; + if (names.length === 1) return names[0]; + + let prefix = names[0]; + for (let i = 1; i < names.length; i++) { + while (!names[i].startsWith(prefix)) { + prefix = prefix.slice(0, -1); + if (prefix.length === 0) return ""; + } + } + + // Trim trailing separators and partial words + const trimmed = prefix.replace(/[\s\-_.(]+$/, ""); + return trimmed.length >= 3 ? trimmed : ""; +} diff --git a/worker/src/manual-upload.ts b/worker/src/manual-upload.ts new file mode 100644 index 0000000..d66f441 --- /dev/null +++ b/worker/src/manual-upload.ts @@ -0,0 +1,211 @@ +import path from "path"; +import { rm } from "fs/promises"; +import { db } from "./db/client.js"; +import { childLogger } from "./util/logger.js"; +import { config } from "./util/config.js"; +import { hashParts } from "./archive/hash.js"; +import { byteLevelSplit } from "./archive/split.js"; +import { uploadToChannel } from "./upload/channel.js"; +import { createTdlibClient, closeTdlibClient } from "./tdlib/client.js"; +import { readZipCentralDirectory } from "./archive/zip-reader.js"; +import { readRarContents } from "./archive/rar-reader.js"; +import { read7zContents } from "./archive/sevenz-reader.js"; +import { getActiveAccounts } from "./db/queries.js"; + +const log = childLogger("manual-upload"); + +export async function processManualUpload(uploadId: string): Promise { + log.info({ uploadId }, "Processing manual upload"); + + const upload = await db.manualUpload.findUnique({ + where: { id: uploadId }, + include: { files: true }, + }); + + if (!upload || upload.status !== "PENDING") { + log.warn({ uploadId }, "Manual upload not found or not pending"); + return; + } + + await db.manualUpload.update({ + where: { id: uploadId }, + data: { status: "PROCESSING" }, + }); + + try { + // Get destination channel + const destSetting = await db.globalSetting.findUnique({ + where: { key: "destination_channel_id" }, + }); + if (!destSetting) throw new Error("No destination channel configured"); + + const destChannel = await db.telegramChannel.findFirst({ + where: { id: destSetting.value, type: "DESTINATION", isActive: true }, + }); + if (!destChannel) throw new Error("Destination channel not found or inactive"); + + // Get a TDLib client (use first active account) + const accounts = await getActiveAccounts(); + const account = accounts[0]; + if (!account) throw new Error("No authenticated Telegram account available"); + + const client = await createTdlibClient({ id: account.id, phone: account.phone }); + + try { + const packageIds: string[] = []; + + for (const file of upload.files) { + try { + const filePath = file.filePath; + const fileName = file.fileName; + const fileSize = file.fileSize; + + log.info({ fileName, fileSize: Number(fileSize) }, "Processing file"); + + // Determine archive type + let archiveType: "ZIP" | "RAR" | "SEVEN_Z" | "DOCUMENT" = "DOCUMENT"; + const ext = fileName.toLowerCase(); + if (ext.endsWith(".zip")) archiveType = "ZIP"; + else if (ext.endsWith(".rar")) archiveType = "RAR"; + else if (ext.endsWith(".7z")) archiveType = "SEVEN_Z"; + + // Hash the file + const contentHash = await hashParts([filePath]); + + // Check for duplicates + const existing = await db.package.findFirst({ + where: { contentHash, destMessageId: { not: null } }, + select: { id: true }, + }); + + if (existing) { + log.info({ fileName, contentHash }, "Duplicate file, skipping upload"); + await db.manualUploadFile.update({ + where: { id: file.id }, + data: { packageId: existing.id }, + }); + packageIds.push(existing.id); + continue; + } + + // Read archive metadata + let entries: { + path: string; + fileName: string; + extension: string | null; + compressedSize: bigint; + uncompressedSize: bigint; + crc32: string | null; + }[] = []; + try { + if (archiveType === "ZIP") entries = await readZipCentralDirectory([filePath]); + else if (archiveType === "RAR") entries = await readRarContents(filePath); + else if (archiveType === "SEVEN_Z") entries = await read7zContents(filePath); + } catch { + log.debug({ fileName }, "Could not read archive metadata"); + } + + // Split if needed + const MAX_UPLOAD_SIZE = BigInt(config.maxPartSizeMB) * 1024n * 1024n; + let uploadPaths = [filePath]; + if (fileSize > MAX_UPLOAD_SIZE) { + uploadPaths = await byteLevelSplit(filePath); + } + + // Upload to Telegram + const destResult = await uploadToChannel( + client, + destChannel.telegramId, + uploadPaths + ); + + // Create package record + const pkg = await db.package.create({ + data: { + contentHash, + fileName, + fileSize, + archiveType, + sourceChannelId: destChannel.id, + sourceMessageId: destResult.messageId, + destChannelId: destChannel.id, + destMessageId: destResult.messageId, + destMessageIds: destResult.messageIds, + isMultipart: uploadPaths.length > 1, + partCount: uploadPaths.length, + fileCount: entries.length, + files: entries.length > 0 ? { create: entries } : undefined, + }, + }); + + await db.manualUploadFile.update({ + where: { id: file.id }, + data: { packageId: pkg.id }, + }); + + packageIds.push(pkg.id); + log.info({ fileName, packageId: pkg.id }, "File processed and uploaded"); + + // Clean up split files (but not the original) + if (uploadPaths.length > 1) { + for (const splitPath of uploadPaths) { + if (splitPath !== filePath) { + await rm(splitPath, { force: true }).catch(() => {}); + } + } + } + } catch (fileErr) { + log.error({ err: fileErr, fileName: file.fileName }, "Failed to process file"); + } + } + + // Group packages if multiple files + if (packageIds.length >= 2) { + const groupName = + upload.groupName ?? upload.files[0].fileName.replace(/\.[^.]+$/, ""); + const group = await db.packageGroup.create({ + data: { + name: groupName, + sourceChannelId: destChannel.id, + groupingSource: "MANUAL", + }, + }); + await db.package.updateMany({ + where: { id: { in: packageIds } }, + data: { packageGroupId: group.id }, + }); + log.info( + { groupId: group.id, groupName, packageCount: packageIds.length }, + "Created group for uploaded files" + ); + } + + await db.manualUpload.update({ + where: { id: uploadId }, + data: { status: "COMPLETED", completedAt: new Date() }, + }); + + log.info( + { uploadId, fileCount: upload.files.length, packageCount: packageIds.length }, + "Manual upload completed" + ); + } finally { + await closeTdlibClient(client); + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + log.error({ err, uploadId }, "Manual upload failed"); + await db.manualUpload.update({ + where: { id: uploadId }, + data: { status: "FAILED", errorMessage: message }, + }); + } + + // Clean up uploaded files + try { + const uploadDir = path.join("/data/uploads", uploadId); + await rm(uploadDir, { recursive: true, force: true }); + } catch { + // Best-effort cleanup + } +} diff --git a/worker/src/scheduler.ts b/worker/src/scheduler.ts index 776684e..e4923a0 100644 --- a/worker/src/scheduler.ts +++ b/worker/src/scheduler.ts @@ -3,6 +3,7 @@ import { childLogger } from "./util/logger.js"; import { withTdlibMutex } from "./util/mutex.js"; import { getActiveAccounts, getPendingAccounts } from "./db/queries.js"; import { runWorkerForAccount, authenticateAccount } from "./worker.js"; +import { runIntegrityAudit } from "./audit.js"; const log = childLogger("scheduler"); @@ -87,6 +88,16 @@ async function runCycle(): Promise { { elapsed: Math.round((Date.now() - cycleStart) / 1000) }, "Ingestion cycle complete" ); + + // Run integrity audit after all accounts are processed + try { + const auditResult = await runIntegrityAudit(); + if (auditResult.issues > 0) { + log.info({ ...auditResult }, "Integrity audit found issues"); + } + } catch (auditErr) { + log.warn({ err: auditErr }, "Integrity audit failed"); + } } catch (err) { log.error({ err }, "Ingestion cycle failed"); } finally { diff --git a/worker/src/tdlib/download.ts b/worker/src/tdlib/download.ts index b267df5..2e70389 100644 --- a/worker/src/tdlib/download.ts +++ b/worker/src/tdlib/download.ts @@ -2,13 +2,16 @@ import type { Client } from "tdl"; import { readFile, rename, copyFile, unlink, stat } from "fs/promises"; import { config } from "../util/config.js"; import { childLogger } from "../util/logger.js"; -import { withFloodWait } from "../util/retry.js"; +import { withFloodWait, extractFloodWaitSeconds } from "../util/retry.js"; import { isArchiveAttachment } from "../archive/detect.js"; import type { TelegramMessage } from "../archive/multipart.js"; import type { TelegramPhoto } from "../preview/match.js"; const log = childLogger("download"); +/** Maximum retry attempts for stalled/failed downloads */ +const MAX_DOWNLOAD_RETRIES = 3; + /** Maximum number of pages to scan per channel/topic to prevent infinite loops */ export const MAX_SCAN_PAGES = 5000; @@ -36,6 +39,7 @@ interface TdMessage { id: number; date: number; media_album_id?: string; + reply_to_message_id?: number; content: { _: string; document?: { @@ -213,6 +217,8 @@ export async function getChannelMessages( fileSize: BigInt(doc.document.size), date: new Date(msg.date * 1000), mediaAlbumId: msg.media_album_id && msg.media_album_id !== "0" ? msg.media_album_id : undefined, + replyToMessageId: msg.reply_to_message_id ? BigInt(msg.reply_to_message_id) : undefined, + caption: msg.content?.caption?.text || undefined, }); continue; } @@ -353,6 +359,75 @@ export async function downloadFile( isComplete: false, }); + for (let attempt = 0; attempt <= MAX_DOWNLOAD_RETRIES; attempt++) { + try { + return await downloadFileAttempt(client, numericId, fileId, destPath, totalBytes, fileName, onProgress); + } catch (err) { + const isLastAttempt = attempt >= MAX_DOWNLOAD_RETRIES; + + // Rate limit from Telegram + const waitSeconds = extractFloodWaitSeconds(err); + if (waitSeconds !== null && !isLastAttempt) { + const jitter = 1000 + Math.random() * 4000; + const waitMs = waitSeconds * 1000 + jitter; + log.warn( + { fileName, attempt: attempt + 1, maxRetries: MAX_DOWNLOAD_RETRIES, waitSeconds }, + `Download rate-limited — sleeping ${waitSeconds}s before retry` + ); + await cancelDownload(client, numericId); + await sleep(waitMs); + continue; + } + + // Stall, timeout, or unexpected stop — cancel and retry + const errMsg = err instanceof Error ? err.message : ""; + if ( + (errMsg.includes("stalled") || errMsg.includes("timed out") || errMsg.includes("stopped unexpectedly")) && + !isLastAttempt + ) { + log.warn( + { fileName, attempt: attempt + 1, maxRetries: MAX_DOWNLOAD_RETRIES }, + "Download failed — cancelling and retrying" + ); + await cancelDownload(client, numericId); + await sleep(5_000); + continue; + } + + throw err; + } + } + throw new Error(`Download failed after ${MAX_DOWNLOAD_RETRIES} retries for ${fileName}`); +} + +/** + * Cancel an active TDLib download so it can be retried cleanly. + */ +async function cancelDownload(client: Client, fileId: number): Promise { + try { + await client.invoke({ + _: "cancelDownloadFile", + file_id: fileId, + only_if_pending: false, + }); + log.debug({ fileId }, "Cancelled TDLib download for retry"); + } catch { + // Best-effort + } +} + +/** + * Single download attempt with progress tracking, stall detection, and verification. + */ +async function downloadFileAttempt( + client: Client, + numericId: number, + fileId: string, + destPath: string, + totalBytes: number, + fileName: string, + onProgress?: ProgressCallback +): Promise { return new Promise((resolve, reject) => { let lastLoggedPercent = 0; let settled = false; diff --git a/worker/src/upload/channel.ts b/worker/src/upload/channel.ts index fd6212f..fd8c7fd 100644 --- a/worker/src/upload/channel.ts +++ b/worker/src/upload/channel.ts @@ -3,12 +3,13 @@ import { stat } from "fs/promises"; import type { Client } from "tdl"; import { config } from "../util/config.js"; import { childLogger } from "../util/logger.js"; -import { withFloodWait } from "../util/retry.js"; +import { withFloodWait, extractFloodWaitSeconds } from "../util/retry.js"; const log = childLogger("upload"); export interface UploadResult { messageId: bigint; + messageIds: bigint[]; } /** @@ -28,7 +29,7 @@ export async function uploadToChannel( filePaths: string[], caption?: string ): Promise { - let firstMessageId: bigint | null = null; + const allMessageIds: bigint[] = []; for (let i = 0; i < filePaths.length; i++) { const filePath = filePaths[i]; @@ -49,11 +50,9 @@ export async function uploadToChannel( "Uploading file to channel" ); - const serverMsgId = await sendAndWaitForUpload(client, chatId, filePath, fileCaption, fileName, fileSizeMB); + const serverMsgId = await sendWithRetry(client, chatId, filePath, fileCaption, fileName, fileSizeMB); - if (i === 0) { - firstMessageId = serverMsgId; - } + allMessageIds.push(serverMsgId); // Rate limit delay between uploads if (i < filePaths.length - 1) { @@ -61,16 +60,68 @@ export async function uploadToChannel( } } - if (firstMessageId === null) { + if (allMessageIds.length === 0) { throw new Error("Upload failed: no messages sent"); } log.info( - { chatId: Number(chatId), messageId: Number(firstMessageId), files: filePaths.length }, + { chatId: Number(chatId), messageId: Number(allMessageIds[0]), files: filePaths.length }, "All uploads confirmed by Telegram" ); - return { messageId: firstMessageId }; + return { messageId: allMessageIds[0], messageIds: allMessageIds }; +} + +/** + * Retry wrapper for sendAndWaitForUpload. + * Handles: + * - Rate limits (429 / FLOOD_WAIT) from updateMessageSendFailed — waits and retries + * - Stall / timeout — retries with a cooldown + */ +const MAX_UPLOAD_RETRIES = 3; + +async function sendWithRetry( + client: Client, + chatId: bigint, + filePath: string, + caption: string | undefined, + fileName: string, + fileSizeMB: number +): Promise { + for (let attempt = 0; attempt <= MAX_UPLOAD_RETRIES; attempt++) { + try { + return await sendAndWaitForUpload(client, chatId, filePath, caption, fileName, fileSizeMB); + } catch (err) { + const isLastAttempt = attempt >= MAX_UPLOAD_RETRIES; + + // Rate limit from Telegram (429 / FLOOD_WAIT / "retry after N") + const waitSeconds = extractFloodWaitSeconds(err); + if (waitSeconds !== null && !isLastAttempt) { + const jitter = 1000 + Math.random() * 4000; + const waitMs = waitSeconds * 1000 + jitter; + log.warn( + { fileName, attempt: attempt + 1, maxRetries: MAX_UPLOAD_RETRIES, waitSeconds }, + `Upload rate-limited — sleeping ${waitSeconds}s before retry` + ); + await sleep(waitMs); + continue; + } + + // Stall or timeout — retry with a cooldown + const errMsg = err instanceof Error ? err.message : ""; + if ((errMsg.includes("stalled") || errMsg.includes("timed out")) && !isLastAttempt) { + log.warn( + { fileName, attempt: attempt + 1, maxRetries: MAX_UPLOAD_RETRIES }, + "Upload stalled/timed out — retrying" + ); + await sleep(10_000); + continue; + } + + throw err; + } + } + throw new Error(`Upload failed after ${MAX_UPLOAD_RETRIES} retries for ${fileName}`); } /** @@ -94,6 +145,7 @@ async function sendAndWaitForUpload( let lastLoggedPercent = 0; let tempMsgId: number | null = null; let uploadStarted = false; + let lastProgressBytes = 0; let lastProgressTime = Date.now(); // Timeout: 20 minutes per GB, minimum 15 minutes @@ -137,9 +189,14 @@ async function sendAndWaitForUpload( const file = update.file; if (file?.remote?.is_uploading_active && file.expected_size > 0) { uploadStarted = true; - lastProgressTime = Date.now(); const uploaded = file.remote.uploaded_size ?? 0; + + // Only reset stall timer when bytes actually advance + if (uploaded > lastProgressBytes) { + lastProgressBytes = uploaded; + lastProgressTime = Date.now(); + } const total = file.expected_size; const percent = Math.round((uploaded / total) * 100); if (percent >= lastLoggedPercent + 20) { @@ -178,7 +235,9 @@ async function sendAndWaitForUpload( settled = true; cleanup(); const errorMsg = update.error?.message ?? "Unknown upload error"; - reject(new Error(`Upload failed for ${fileName}: ${errorMsg}`)); + const error = new Error(`Upload failed for ${fileName}: ${errorMsg}`); + (error as Error & { code?: number }).code = update.error?.code; + reject(error); } } } diff --git a/worker/src/util/config.ts b/worker/src/util/config.ts index 665022e..fb7ebb9 100644 --- a/worker/src/util/config.ts +++ b/worker/src/util/config.ts @@ -7,6 +7,11 @@ export const config = { logLevel: (process.env.LOG_LEVEL ?? "info") as "debug" | "info" | "warn" | "error", telegramApiId: parseInt(process.env.TELEGRAM_API_ID ?? "0", 10), telegramApiHash: process.env.TELEGRAM_API_HASH ?? "", + /** Maximum file part size for Telegram upload (in MiB). Default 1950 (under 2GB non-Premium limit). + * Set to 3900 for Premium accounts (under 4GB limit). */ + maxPartSizeMB: parseInt(process.env.MAX_PART_SIZE_MB ?? "1950", 10), + /** Time window for auto-grouping ungrouped packages from the same channel (minutes). 0 = disabled. */ + autoGroupTimeWindowMinutes: parseInt(process.env.AUTO_GROUP_TIME_WINDOW_MINUTES ?? "5", 10), /** Maximum jitter added to scheduler interval (in minutes) */ jitterMinutes: 5, /** Maximum time span for multipart archive parts (in hours). 0 = no limit. */ diff --git a/worker/src/worker.ts b/worker/src/worker.ts index d86f068..f7365b3 100644 --- a/worker/src/worker.ts +++ b/worker/src/worker.ts @@ -47,7 +47,8 @@ import { readRarContents } from "./archive/rar-reader.js"; import { read7zContents } from "./archive/sevenz-reader.js"; import { byteLevelSplit, concatenateFiles } from "./archive/split.js"; import { uploadToChannel } from "./upload/channel.js"; -import { processAlbumGroups, type IndexedPackageRef } from "./grouping.js"; +import { processAlbumGroups, processRuleBasedGroups, processTimeWindowGroups, processPatternGroups, processCreatorGroups, processZipPathGroups, processReplyChainGroups, processCaptionGroups, detectGroupingConflicts, type IndexedPackageRef } from "./grouping.js"; +import { db } from "./db/client.js"; import type { TelegramAccount, TelegramChannel } from "@prisma/client"; import type { Client } from "tdl"; @@ -776,6 +777,22 @@ async function processArchiveSets( partCount: archiveSet.parts.length, accountId: ctx.accountId, }); + // Also create a persistent notification + await db.systemNotification.create({ + data: { + type: inferSkipReason(errMsg) === "UPLOAD_FAILED" ? "UPLOAD_FAILED" : "DOWNLOAD_FAILED", + severity: "WARNING", + title: `Failed to process ${archiveSet.parts[0].fileName}`, + message: errMsg, + context: { + fileName: archiveSet.parts[0].fileName, + sourceChannelId: ctx.channel.id, + sourceMessageId: Number(archiveSet.parts[0].id), + channelTitle: ctx.channelTitle, + reason: inferSkipReason(errMsg), + }, + }, + }); } catch { // Best-effort — don't fail the run if skip recording fails } @@ -790,6 +807,38 @@ async function processArchiveSets( indexedPackageRefs, scanResult.photos ); + + // Auto-grouping passes (gated by per-channel flag) + const channelRecord = await db.telegramChannel.findUnique({ + where: { id: channel.id }, + select: { autoGroupEnabled: true }, + }); + + if (channelRecord?.autoGroupEnabled !== false) { + // Learned rule-based grouping (from manual overrides) + await processRuleBasedGroups(channel.id, indexedPackageRefs); + + // Time-window grouping for remaining ungrouped packages + await processTimeWindowGroups(channel.id, indexedPackageRefs); + + // Pattern-based grouping (date patterns, project slugs) + await processPatternGroups(channel.id, indexedPackageRefs); + + // Creator-based grouping (3+ files from same creator) + await processCreatorGroups(channel.id, indexedPackageRefs); + + // ZIP path prefix grouping (shared root folder inside archives) + await processZipPathGroups(channel.id, indexedPackageRefs); + + // Reply chain grouping (messages replying to same root) + await processReplyChainGroups(channel.id, indexedPackageRefs); + + // Caption fuzzy match grouping + await processCaptionGroups(channel.id, indexedPackageRefs); + } + + // Check for potential grouping conflicts + await detectGroupingConflicts(channel.id, indexedPackageRefs); } return maxProcessedId; @@ -1020,7 +1069,7 @@ async function processOneArchiveSet( (sum, p) => sum + p.fileSize, 0n ); - const MAX_UPLOAD_SIZE = 1950n * 1024n * 1024n; // Match split.ts MAX_PART_SIZE + const MAX_UPLOAD_SIZE = BigInt(config.maxPartSizeMB) * 1024n * 1024n; const hasOversizedPart = archiveSet.parts.some((p) => p.fileSize > MAX_UPLOAD_SIZE); if (hasOversizedPart) { @@ -1053,18 +1102,60 @@ async function processOneArchiveSet( uploadPaths = splitPaths; } + // ── Hash verification after split ── + // If we split/repacked, verify the split parts hash matches the original + if (splitPaths.length > 0) { + const splitHash = await hashParts(splitPaths); + if (splitHash !== contentHash) { + accountLog.error( + { fileName: archiveName, originalHash: contentHash, splitHash, parts: splitPaths.length }, + "Hash mismatch after split — file may be corrupted" + ); + // Record notification for visibility + try { + await db.systemNotification.create({ + data: { + type: "HASH_MISMATCH", + severity: "ERROR", + title: `Hash mismatch after splitting ${archiveName}`, + message: `Expected ${contentHash.slice(0, 16)}… but got ${splitHash.slice(0, 16)}… after splitting into ${splitPaths.length} parts`, + context: { + fileName: archiveName, + originalHash: contentHash, + splitHash, + partCount: splitPaths.length, + sourceChannelId: channel.id, + }, + }, + }); + } catch { + // Best-effort notification + } + throw new Error(`Hash mismatch after split for ${archiveName}: expected ${contentHash}, got ${splitHash}`); + } + accountLog.debug( + { fileName: archiveName, hash: contentHash.slice(0, 16), parts: splitPaths.length }, + "Split hash verified — matches original" + ); + } + // ── Uploading ── // Check if a prior run already uploaded this file (orphaned upload scenario: // file reached Telegram but DB write failed or worker crashed before indexing) const existingUpload = await getUploadedPackageByHash(contentHash); - let destResult: { messageId: bigint }; + let destResult: { messageId: bigint; messageIds: bigint[] }; if (existingUpload && existingUpload.destMessageId) { accountLog.info( { fileName: archiveName, destMessageId: Number(existingUpload.destMessageId) }, "Reusing existing upload (file already on destination channel)" ); - destResult = { messageId: existingUpload.destMessageId }; + destResult = { + messageId: existingUpload.destMessageId, + messageIds: existingUpload.destMessageIds?.length + ? (existingUpload.destMessageIds as bigint[]) + : [existingUpload.destMessageId], + }; } else { const uploadLabel = uploadPaths.length > 1 ? ` (${uploadPaths.length} parts)` @@ -1085,6 +1176,34 @@ async function processOneArchiveSet( ); } + // ── Post-upload integrity check ── + // Verify the files on disk still match before we index + if (uploadPaths.length > 0 && !existingUpload) { + try { + const postUploadHash = await hashParts(uploadPaths); + if (splitPaths.length > 0) { + // Split files — hash should match the split hash (already verified above) + // No additional check needed since we verified split hash = original hash + } else if (postUploadHash !== contentHash) { + accountLog.error( + { fileName: archiveName, originalHash: contentHash, postUploadHash }, + "Hash changed between hashing and upload — possible disk corruption" + ); + await db.systemNotification.create({ + data: { + type: "HASH_MISMATCH", + severity: "ERROR", + title: `Post-upload hash mismatch: ${archiveName}`, + message: `Hash changed between download and upload. Original: ${contentHash.slice(0, 16)}…, post-upload: ${postUploadHash.slice(0, 16)}…`, + context: { fileName: archiveName, originalHash: contentHash, postUploadHash, sourceChannelId: channel.id }, + }, + }); + } + } catch { + // Best-effort — don't fail the ingestion + } + } + // ── Preview thumbnail ── let previewData: Buffer | null = null; let previewMsgId: bigint | null = null; @@ -1158,6 +1277,7 @@ async function processOneArchiveSet( sourceTopicId, destChannelId, destMessageId: destResult.messageId, + destMessageIds: destResult.messageIds, isMultipart: archiveSet.parts.length > 1 || uploadPaths.length > 1, partCount: uploadPaths.length, @@ -1166,6 +1286,8 @@ async function processOneArchiveSet( tags, previewData, previewMsgId, + sourceCaption: archiveSet.parts[0].caption ?? null, + replyToMessageId: archiveSet.parts[0].replyToMessageId ?? null, files: entries, });