feat: add Telegram integration with forum topic support and creator tracking

Adds full Telegram ZIP ingestion pipeline: TDLib worker service scans source
channels for archive files, deduplicates by content hash, extracts metadata,
uploads to archive channel, and indexes in Postgres. Forum supergroups are
scanned per-topic with topic names used as creator. Filename-based creator
extraction (e.g. "Mammoth Factory - 2026-01.zip") serves as fallback.

Includes admin UI for managing accounts/channels, simplified account setup
(API credentials via env vars), auth code/password submission dialog,
package browser with creator column, and live ingestion activity tracking.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
xCyanGrizzly
2026-02-24 16:02:06 +01:00
parent beb9cfb312
commit b427193d17
70 changed files with 8627 additions and 2 deletions

View File

@@ -0,0 +1,183 @@
-- CreateEnum
CREATE TYPE "AuthState" AS ENUM ('PENDING', 'AWAITING_CODE', 'AWAITING_PASSWORD', 'AUTHENTICATED', 'EXPIRED');
-- CreateEnum
CREATE TYPE "ChannelType" AS ENUM ('SOURCE', 'DESTINATION');
-- CreateEnum
CREATE TYPE "ChannelRole" AS ENUM ('READER', 'WRITER');
-- CreateEnum
CREATE TYPE "ArchiveType" AS ENUM ('ZIP', 'RAR');
-- CreateEnum
CREATE TYPE "IngestionStatus" AS ENUM ('RUNNING', 'COMPLETED', 'FAILED', 'CANCELLED');
-- CreateTable
CREATE TABLE "telegram_accounts" (
"id" TEXT NOT NULL,
"phone" TEXT NOT NULL,
"displayName" TEXT,
"apiId" INTEGER NOT NULL,
"apiHash" TEXT NOT NULL,
"sessionPath" TEXT NOT NULL,
"isActive" BOOLEAN NOT NULL DEFAULT true,
"authState" "AuthState" NOT NULL DEFAULT 'PENDING',
"authCode" TEXT,
"lastSeenAt" TIMESTAMP(3),
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
CONSTRAINT "telegram_accounts_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "telegram_channels" (
"id" TEXT NOT NULL,
"telegramId" BIGINT NOT NULL,
"title" TEXT NOT NULL,
"type" "ChannelType" NOT NULL,
"isActive" BOOLEAN NOT NULL DEFAULT true,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
CONSTRAINT "telegram_channels_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "account_channel_map" (
"id" TEXT NOT NULL,
"accountId" TEXT NOT NULL,
"channelId" TEXT NOT NULL,
"role" "ChannelRole" NOT NULL DEFAULT 'READER',
"lastProcessedMessageId" BIGINT,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT "account_channel_map_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "packages" (
"id" TEXT NOT NULL,
"contentHash" TEXT NOT NULL,
"fileName" TEXT NOT NULL,
"fileSize" BIGINT NOT NULL,
"archiveType" "ArchiveType" NOT NULL,
"sourceChannelId" TEXT NOT NULL,
"sourceMessageId" BIGINT NOT NULL,
"destChannelId" TEXT,
"destMessageId" BIGINT,
"isMultipart" BOOLEAN NOT NULL DEFAULT false,
"partCount" INTEGER NOT NULL DEFAULT 1,
"fileCount" INTEGER NOT NULL DEFAULT 0,
"indexedAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"ingestionRunId" TEXT,
CONSTRAINT "packages_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "package_files" (
"id" TEXT NOT NULL,
"packageId" TEXT NOT NULL,
"path" TEXT NOT NULL,
"fileName" TEXT NOT NULL,
"extension" TEXT,
"compressedSize" BIGINT NOT NULL DEFAULT 0,
"uncompressedSize" BIGINT NOT NULL DEFAULT 0,
"crc32" TEXT,
CONSTRAINT "package_files_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "ingestion_runs" (
"id" TEXT NOT NULL,
"accountId" TEXT NOT NULL,
"status" "IngestionStatus" NOT NULL DEFAULT 'RUNNING',
"startedAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"finishedAt" TIMESTAMP(3),
"messagesScanned" INTEGER NOT NULL DEFAULT 0,
"zipsFound" INTEGER NOT NULL DEFAULT 0,
"zipsDuplicate" INTEGER NOT NULL DEFAULT 0,
"zipsIngested" INTEGER NOT NULL DEFAULT 0,
"errorMessage" TEXT,
CONSTRAINT "ingestion_runs_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE UNIQUE INDEX "telegram_accounts_phone_key" ON "telegram_accounts"("phone");
-- CreateIndex
CREATE INDEX "telegram_accounts_isActive_idx" ON "telegram_accounts"("isActive");
-- CreateIndex
CREATE UNIQUE INDEX "telegram_channels_telegramId_key" ON "telegram_channels"("telegramId");
-- CreateIndex
CREATE INDEX "telegram_channels_type_isActive_idx" ON "telegram_channels"("type", "isActive");
-- CreateIndex
CREATE INDEX "account_channel_map_accountId_idx" ON "account_channel_map"("accountId");
-- CreateIndex
CREATE INDEX "account_channel_map_channelId_idx" ON "account_channel_map"("channelId");
-- CreateIndex
CREATE UNIQUE INDEX "account_channel_map_accountId_channelId_key" ON "account_channel_map"("accountId", "channelId");
-- CreateIndex
CREATE UNIQUE INDEX "packages_contentHash_key" ON "packages"("contentHash");
-- CreateIndex
CREATE INDEX "packages_sourceChannelId_idx" ON "packages"("sourceChannelId");
-- CreateIndex
CREATE INDEX "packages_destChannelId_idx" ON "packages"("destChannelId");
-- CreateIndex
CREATE INDEX "packages_fileName_idx" ON "packages"("fileName");
-- CreateIndex
CREATE INDEX "packages_indexedAt_idx" ON "packages"("indexedAt");
-- CreateIndex
CREATE INDEX "packages_archiveType_idx" ON "packages"("archiveType");
-- CreateIndex
CREATE INDEX "package_files_packageId_idx" ON "package_files"("packageId");
-- CreateIndex
CREATE INDEX "package_files_extension_idx" ON "package_files"("extension");
-- CreateIndex
CREATE INDEX "package_files_fileName_idx" ON "package_files"("fileName");
-- CreateIndex
CREATE INDEX "ingestion_runs_accountId_idx" ON "ingestion_runs"("accountId");
-- CreateIndex
CREATE INDEX "ingestion_runs_status_idx" ON "ingestion_runs"("status");
-- CreateIndex
CREATE INDEX "ingestion_runs_startedAt_idx" ON "ingestion_runs"("startedAt");
-- AddForeignKey
ALTER TABLE "account_channel_map" ADD CONSTRAINT "account_channel_map_accountId_fkey" FOREIGN KEY ("accountId") REFERENCES "telegram_accounts"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "account_channel_map" ADD CONSTRAINT "account_channel_map_channelId_fkey" FOREIGN KEY ("channelId") REFERENCES "telegram_channels"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "packages" ADD CONSTRAINT "packages_sourceChannelId_fkey" FOREIGN KEY ("sourceChannelId") REFERENCES "telegram_channels"("id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "packages" ADD CONSTRAINT "packages_ingestionRunId_fkey" FOREIGN KEY ("ingestionRunId") REFERENCES "ingestion_runs"("id") ON DELETE SET NULL ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "package_files" ADD CONSTRAINT "package_files_packageId_fkey" FOREIGN KEY ("packageId") REFERENCES "packages"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "ingestion_runs" ADD CONSTRAINT "ingestion_runs_accountId_fkey" FOREIGN KEY ("accountId") REFERENCES "telegram_accounts"("id") ON DELETE RESTRICT ON UPDATE CASCADE;

View File

@@ -0,0 +1,11 @@
-- AlterTable
ALTER TABLE "ingestion_runs" ADD COLUMN "currentActivity" TEXT,
ADD COLUMN "currentChannel" TEXT,
ADD COLUMN "currentFile" TEXT,
ADD COLUMN "currentFileNum" INTEGER,
ADD COLUMN "currentStep" TEXT,
ADD COLUMN "downloadPercent" INTEGER,
ADD COLUMN "downloadedBytes" BIGINT,
ADD COLUMN "lastActivityAt" TIMESTAMP(3),
ADD COLUMN "totalBytes" BIGINT,
ADD COLUMN "totalFiles" INTEGER;

View File

@@ -0,0 +1,3 @@
-- AlterTable
ALTER TABLE "packages" ADD COLUMN "previewData" BYTEA,
ADD COLUMN "previewMsgId" BIGINT;

View File

@@ -0,0 +1,12 @@
/*
Warnings:
- You are about to drop the column `apiHash` on the `telegram_accounts` table. All the data in the column will be lost.
- You are about to drop the column `apiId` on the `telegram_accounts` table. All the data in the column will be lost.
- You are about to drop the column `sessionPath` on the `telegram_accounts` table. All the data in the column will be lost.
*/
-- AlterTable
ALTER TABLE "telegram_accounts" DROP COLUMN "apiHash",
DROP COLUMN "apiId",
DROP COLUMN "sessionPath";

View File

@@ -0,0 +1,29 @@
-- AlterTable
ALTER TABLE "packages" ADD COLUMN "creator" TEXT,
ADD COLUMN "sourceTopicId" BIGINT;
-- AlterTable
ALTER TABLE "telegram_channels" ADD COLUMN "isForum" BOOLEAN NOT NULL DEFAULT false;
-- CreateTable
CREATE TABLE "topic_progress" (
"id" TEXT NOT NULL,
"accountChannelMapId" TEXT NOT NULL,
"topicId" BIGINT NOT NULL,
"topicName" TEXT,
"lastProcessedMessageId" BIGINT,
CONSTRAINT "topic_progress_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE INDEX "topic_progress_accountChannelMapId_idx" ON "topic_progress"("accountChannelMapId");
-- CreateIndex
CREATE UNIQUE INDEX "topic_progress_accountChannelMapId_topicId_key" ON "topic_progress"("accountChannelMapId", "topicId");
-- CreateIndex
CREATE INDEX "packages_creator_idx" ON "packages"("creator");
-- AddForeignKey
ALTER TABLE "topic_progress" ADD CONSTRAINT "topic_progress_accountChannelMapId_fkey" FOREIGN KEY ("accountChannelMapId") REFERENCES "account_channel_map"("id") ON DELETE CASCADE ON UPDATE CASCADE;

View File

@@ -349,3 +349,189 @@ model UserSettings {
user User @relation(fields: [userId], references: [id], onDelete: Cascade)
}
// ───────────────────────────────────────
// Telegram ingestion models
// ───────────────────────────────────────
enum AuthState {
PENDING
AWAITING_CODE
AWAITING_PASSWORD
AUTHENTICATED
EXPIRED
}
enum ChannelType {
SOURCE
DESTINATION
}
enum ChannelRole {
READER
WRITER
}
enum ArchiveType {
ZIP
RAR
}
enum IngestionStatus {
RUNNING
COMPLETED
FAILED
CANCELLED
}
model TelegramAccount {
id String @id @default(cuid())
phone String @unique
displayName String?
isActive Boolean @default(true)
authState AuthState @default(PENDING)
authCode String?
lastSeenAt DateTime?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
channelMaps AccountChannelMap[]
ingestionRuns IngestionRun[]
@@index([isActive])
@@map("telegram_accounts")
}
model TelegramChannel {
id String @id @default(cuid())
telegramId BigInt @unique
title String
type ChannelType
isForum Boolean @default(false)
isActive Boolean @default(true)
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
accountMaps AccountChannelMap[]
packages Package[]
@@index([type, isActive])
@@map("telegram_channels")
}
model AccountChannelMap {
id String @id @default(cuid())
accountId String
channelId String
role ChannelRole @default(READER)
lastProcessedMessageId BigInt?
createdAt DateTime @default(now())
account TelegramAccount @relation(fields: [accountId], references: [id], onDelete: Cascade)
channel TelegramChannel @relation(fields: [channelId], references: [id], onDelete: Cascade)
topicProgress TopicProgress[]
@@unique([accountId, channelId])
@@index([accountId])
@@index([channelId])
@@map("account_channel_map")
}
model Package {
id String @id @default(cuid())
contentHash String @unique
fileName String
fileSize BigInt
archiveType ArchiveType
creator String?
sourceChannelId String
sourceMessageId BigInt
sourceTopicId BigInt?
destChannelId String?
destMessageId BigInt?
isMultipart Boolean @default(false)
partCount Int @default(1)
fileCount Int @default(0)
previewData Bytes? // JPEG thumbnail from nearby Telegram photo (stored as raw bytes)
previewMsgId BigInt? // Telegram message ID of the matched photo
indexedAt DateTime @default(now())
createdAt DateTime @default(now())
sourceChannel TelegramChannel @relation(fields: [sourceChannelId], references: [id])
files PackageFile[]
ingestionRun IngestionRun? @relation(fields: [ingestionRunId], references: [id])
ingestionRunId String?
@@index([sourceChannelId])
@@index([destChannelId])
@@index([fileName])
@@index([indexedAt])
@@index([archiveType])
@@index([creator])
@@map("packages")
}
model PackageFile {
id String @id @default(cuid())
packageId String
path String
fileName String
extension String?
compressedSize BigInt @default(0)
uncompressedSize BigInt @default(0)
crc32 String?
package Package @relation(fields: [packageId], references: [id], onDelete: Cascade)
@@index([packageId])
@@index([extension])
@@index([fileName])
@@map("package_files")
}
model IngestionRun {
id String @id @default(cuid())
accountId String
status IngestionStatus @default(RUNNING)
startedAt DateTime @default(now())
finishedAt DateTime?
messagesScanned Int @default(0)
zipsFound Int @default(0)
zipsDuplicate Int @default(0)
zipsIngested Int @default(0)
errorMessage String?
// Live activity tracking — written by worker in real-time
currentActivity String? // Human-readable: "Downloading pack.zip (part 2/5)"
currentStep String? // Machine-readable step key
currentChannel String? // Channel title being processed
currentFile String? // File currently being processed
currentFileNum Int? // Which archive set (1-indexed)
totalFiles Int? // Total archive sets found
downloadedBytes BigInt? // Current download progress in bytes
totalBytes BigInt? // Total size of current download
downloadPercent Int? // 0-100
lastActivityAt DateTime? // When activity was last updated
account TelegramAccount @relation(fields: [accountId], references: [id])
packages Package[]
@@index([accountId])
@@index([status])
@@index([startedAt])
@@map("ingestion_runs")
}
model TopicProgress {
id String @id @default(cuid())
accountChannelMapId String
topicId BigInt
topicName String?
lastProcessedMessageId BigInt?
accountChannelMap AccountChannelMap @relation(fields: [accountChannelMapId], references: [id], onDelete: Cascade)
@@unique([accountChannelMapId, topicId])
@@index([accountChannelMapId])
@@map("topic_progress")
}