addd TG integration

This commit is contained in:
xCyanGrizzly
2026-03-02 11:57:17 +01:00
parent b427193d17
commit 4d0df6b1a4
35 changed files with 4436 additions and 242 deletions

162
worker/src/tdlib/chats.ts Normal file
View File

@@ -0,0 +1,162 @@
import type { Client } from "tdl";
import { childLogger } from "../util/logger.js";
import { config } from "../util/config.js";
const log = childLogger("chats");
export interface TelegramChatInfo {
chatId: bigint;
title: string;
type: "channel" | "supergroup" | "group" | "private" | "other";
isForum: boolean;
memberCount?: number;
}
/**
* Fetch all chats the account is a member of.
* Uses TDLib's getChats to load the chat list, then getChat for details.
* Filters to channels and supergroups only (groups/privates are not useful for ingestion).
*/
export async function getAccountChats(
client: Client
): Promise<TelegramChatInfo[]> {
const chats: TelegramChatInfo[] = [];
// Load main chat list — TDLib loads in batches
let offsetOrder = "9223372036854775807"; // max int64 as string
let offsetChatId = 0;
let hasMore = true;
while (hasMore) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const result = (await client.invoke({
_: "getChats",
chat_list: { _: "chatListMain" },
limit: 100,
})) as { chat_ids: number[] };
if (!result.chat_ids || result.chat_ids.length === 0) {
break;
}
for (const chatId of result.chat_ids) {
try {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const chat = (await client.invoke({
_: "getChat",
chat_id: chatId,
})) as any;
const chatType = chat.type?._;
let type: TelegramChatInfo["type"] = "other";
let isForum = false;
if (chatType === "chatTypeSupergroup") {
// Get supergroup details to check if it's a channel or group
try {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const sg = (await client.invoke({
_: "getSupergroup",
supergroup_id: chat.type.supergroup_id,
})) as any;
type = sg.is_channel ? "channel" : "supergroup";
isForum = sg.is_forum ?? false;
} catch {
type = "supergroup";
}
} else if (chatType === "chatTypeBasicGroup") {
type = "group";
} else if (chatType === "chatTypePrivate" || chatType === "chatTypeSecret") {
type = "private";
}
// Only include channels and supergroups
if (type === "channel" || type === "supergroup") {
chats.push({
chatId: BigInt(chatId),
title: chat.title ?? `Chat ${chatId}`,
type,
isForum,
});
}
} catch (err) {
log.warn({ chatId, err }, "Failed to get chat details, skipping");
}
}
// getChats with chatListMain returns all chats at once in newer TDLib versions
// So we break after the first batch
hasMore = false;
await sleep(config.apiDelayMs);
}
log.info(
{ total: chats.length },
"Fetched channels/supergroups from Telegram"
);
return chats;
}
/**
* Generate an invite link for a chat. The account must be an admin or have
* invite link permissions.
*/
export async function generateInviteLink(
client: Client,
chatId: bigint
): Promise<string> {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const result = (await client.invoke({
_: "createChatInviteLink",
chat_id: Number(chatId),
name: "DragonsStash Auto-Join",
creates_join_request: false,
})) as any;
const link = result.invite_link as string;
log.info({ chatId: chatId.toString(), link }, "Generated invite link");
return link;
}
/**
* Create a new supergroup (private group) via TDLib.
* Returns the chat ID and title.
*/
export async function createSupergroup(
client: Client,
title: string
): Promise<{ chatId: bigint; title: string }> {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const result = (await client.invoke({
_: "createNewSupergroupChat",
title,
is_forum: false,
is_channel: false,
description: "DragonsStash archive destination — all accounts write here",
})) as any;
const chatId = BigInt(result.id);
log.info({ chatId: chatId.toString(), title }, "Created new supergroup");
return { chatId, title: result.title ?? title };
}
/**
* Join a chat using an invite link.
*/
export async function joinChatByInviteLink(
client: Client,
inviteLink: string
): Promise<void> {
await client.invoke({
_: "joinChatByInviteLink",
invite_link: inviteLink,
});
log.info({ inviteLink }, "Joined chat by invite link");
}
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}

View File

@@ -1,5 +1,5 @@
import type { Client } from "tdl";
import { readFile, rename, stat } from "fs/promises";
import { readFile, rename, copyFile, unlink, stat } from "fs/promises";
import { config } from "../util/config.js";
import { childLogger } from "../util/logger.js";
import { isArchiveAttachment } from "../archive/detect.js";
@@ -69,19 +69,26 @@ export interface ChannelScanResult {
}
/**
* Fetch messages from a channel since a given message ID.
* Fetch messages from a channel, stopping once we've scanned past the
* last-processed boundary (with one page of lookback for multipart safety).
* Collects both archive attachments AND photo messages (for preview matching).
* Returns messages in chronological order (oldest first).
*
* When `lastProcessedMessageId` is null (first run), scans everything.
* The worker applies a post-grouping filter to skip fully-processed sets,
* and keeps `packageExistsBySourceMessage` as a safety net.
*/
export async function getChannelMessages(
client: Client,
chatId: bigint,
fromMessageId?: bigint | null,
lastProcessedMessageId?: bigint | null,
limit = 100
): Promise<ChannelScanResult> {
const archives: TelegramMessage[] = [];
const photos: TelegramPhoto[] = [];
let currentFromId = fromMessageId ? Number(fromMessageId) : 0;
const boundary = lastProcessedMessageId ? Number(lastProcessedMessageId) : null;
let currentFromId = 0;
// eslint-disable-next-line no-constant-condition
while (true) {
@@ -114,8 +121,6 @@ export async function getChannelMessages(
const photo = msg.content?.photo;
const caption = msg.content?.caption?.text ?? "";
if (photo?.sizes && photo.sizes.length > 0) {
// Pick the smallest size for thumbnail (type "s" or "m")
// TDLib photo sizes are ordered from smallest to largest
const smallest = photo.sizes[0];
photos.push({
id: BigInt(msg.id),
@@ -128,13 +133,22 @@ export async function getChannelMessages(
}
currentFromId = result.messages[result.messages.length - 1].id;
// Stop scanning once we've gone past the boundary (this page is the lookback)
if (boundary && currentFromId < boundary) break;
if (result.messages.length < 100) break;
// Rate limit delay
await sleep(config.apiDelayMs);
}
// Return in chronological order (oldest first)
log.info(
{ chatId: chatId.toString(), archives: archives.length, photos: photos.length },
"Channel scan complete"
);
// Reverse to chronological order (oldest first) so worker processes old→new
return {
archives: archives.reverse(),
photos: photos.reverse(),
@@ -380,8 +394,23 @@ async function verifyAndMove(
"File verified and complete"
);
// Move from TDLib's cache to our temp directory
await rename(localPath, destPath);
// Move from TDLib's cache to our temp directory.
// Use rename first (fast, same filesystem), fall back to copy+delete
// when source and destination are on different filesystems (EXDEV).
try {
await rename(localPath, destPath);
} catch (err: unknown) {
if ((err as NodeJS.ErrnoException).code === "EXDEV") {
log.debug(
{ fileId, fileName },
"Cross-device rename — falling back to copy + unlink"
);
await copyFile(localPath, destPath);
await unlink(localPath);
} else {
throw err;
}
}
}
function sleep(ms: number): Promise<void> {

View File

@@ -125,29 +125,43 @@ export async function getForumTopicList(
}
/**
* Fetch messages from a specific forum topic (thread).
* Uses getMessageThreadHistory to scan within a topic.
* Fetch messages from a specific forum topic (thread), stopping once
* we've scanned past the last-processed boundary (with one page of lookback).
* Uses searchChatMessages with message_thread_id to scan within a topic.
*
* Returns messages in chronological order (oldest first).
*
* When `lastProcessedMessageId` is null (first run), scans everything.
* The worker applies a post-grouping filter to skip fully-processed sets,
* and keeps `packageExistsBySourceMessage` as a safety net.
*/
export async function getTopicMessages(
client: Client,
chatId: bigint,
topicId: bigint,
fromMessageId?: bigint | null,
lastProcessedMessageId?: bigint | null,
limit = 100
): Promise<ChannelScanResult> {
const archives: TelegramMessage[] = [];
const photos: TelegramPhoto[] = [];
let currentFromId = fromMessageId ? Number(fromMessageId) : 0;
const boundary = lastProcessedMessageId ? Number(lastProcessedMessageId) : null;
let currentFromId = 0;
// eslint-disable-next-line no-constant-condition
while (true) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const result = (await client.invoke({
_: "getMessageThreadHistory",
_: "searchChatMessages",
chat_id: Number(chatId),
message_id: Number(topicId),
query: "",
message_thread_id: Number(topicId),
from_message_id: currentFromId,
offset: 0,
limit: Math.min(limit, 100),
filter: null,
sender_id: null,
saved_messages_topic_id: 0,
})) as {
messages?: {
id: number;
@@ -206,11 +220,21 @@ export async function getTopicMessages(
}
currentFromId = result.messages[result.messages.length - 1].id;
// Stop scanning once we've gone past the boundary (this page is the lookback)
if (boundary && currentFromId < boundary) break;
if (result.messages.length < 100) break;
await sleep(config.apiDelayMs);
}
log.info(
{ chatId: chatId.toString(), topicId: topicId.toString(), archives: archives.length, photos: photos.length },
"Topic scan complete"
);
// Reverse to chronological order (oldest first) so worker processes old→new
return {
archives: archives.reverse(),
photos: photos.reverse(),