diff --git a/__init__.py b/__init__.py index e69de29..6a43f9b 100644 --- a/__init__.py +++ b/__init__.py @@ -0,0 +1 @@ +from .messagelogger import setup diff --git a/data/schema.sql b/data/schema.sql new file mode 100644 index 0000000..3d7ea1a --- /dev/null +++ b/data/schema.sql @@ -0,0 +1,53 @@ +BEGIN; + +CREATE TABLE logging_guilds( + guildid BIGINT PRIMARY KEY, + webhook_url TEXT, + created_at TIMESTAMPTZ DEFAULT NOW(), + _timestamp TIMESTAMPTZ DEFAULT NOW() +); + +CREATE TRIGGER logging_guilds_timestamp BEFORE UPDATE ON logging_guilds + FOR EACH ROW EXECUTE FUNCTION update_timestamp_column(); + + +CREATE TABLE logged_messages( + messageid BIGINT PRIMARY KEY, + guildid BIGINT NOT NULL REFERENCES logging_guilds ON DELETE CASCADE, + channelid BIGINT NOT NULL, + userid BIGINT NOT NULL, + created_at TIMESTAMPTZ NOT NULL, + deleted_at TIMESTAMPTZ, + _timestamp TIMESTAMPTZ DEFAULT NOW() +); + +CREATE TRIGGER logged_messages_timestamp BEFORE UPDATE ON logged_messages + FOR EACH ROW EXECUTE FUNCTION update_timestamp_column(); + +CREATE TABLE message_states( + stateid INTEGER PRIMARY KEY GENERATED ALWAYS AS IDENTITY, + messageid BIGINT NOT NULL REFERENCES logged_messages ON DELETE CASCADE, + content TEXT NOT NULL, + embeds_raw TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); +CREATE INDEX message_states_messageid ON message_states (messageid); + +CREATE TABLE logged_attachments( + attachment_id BIGINT PRIMARY KEY, + proxy_url TEXT NOT NULL, + url TEXT NOT NULL, + content_type TEXT NOT NULL, + filesize INTEGER NOT NULL, + filename TEXT NOT NULL, + permalink TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE TABLE logged_messages_attachments( + stateid INTEGER NOT NULL REFERENCES message_states(stateid) ON DELETE CASCADE, + attachment_id BIGINT NOT NULL REFERENCES logged_attachments(attachment_id) ON DELETE CASCADE +); +CREATE INDEX logged_messages_attachments_stateid ON logged_messages_attachments (stateid); + +COMMIT; diff --git a/messagelogger/__init__.py b/messagelogger/__init__.py index e69de29..6c44273 100644 --- a/messagelogger/__init__.py +++ b/messagelogger/__init__.py @@ -0,0 +1,7 @@ +import logging + +logger = logging.getLogger(__name__) + +async def setup(bot): + from .cog import LogCog + await bot.add_cog(LogCog(bot)) diff --git a/messagelogger/cog.py b/messagelogger/cog.py index e69de29..ba1f7a3 100644 --- a/messagelogger/cog.py +++ b/messagelogger/cog.py @@ -0,0 +1,504 @@ +import asyncio +import json +from typing import Optional +from weakref import WeakValueDictionary +import discord +from discord.abc import GuildChannel +from discord.ext import commands as cmds +from discord import app_commands as appcmds +from discord.utils import utcnow + +from data.queries import JOINTYPE, ORDER +from meta import LionBot, LionCog, LionContext +from meta.errors import ResponseTimedOut, SafeCancellation, UserInputError +from utils.ui import Confirm +from utils.lib import MessageArgs, jumpto +from data.conditions import NULL + +from . import logger +from .lib import diff_file +from .data import LogData, LoggingGuild, LoggedMessage, LogAttachment, MessageState + + +class LogCog(LionCog): + attachment_hook: discord.Webhook + + def __init__(self, bot: LionBot): + self.bot = bot + self.data = bot.db.load_registry(LogData()) + + self.logging_guilds = {} + + self.message_locks: WeakValueDictionary[int, asyncio.Lock] = WeakValueDictionary() + + async def cog_load(self): + await self.data.init() + await self.refresh_webhooks() + + async def refresh_webhooks(self): + self.attachment_hook = discord.Webhook.from_url( + self.bot.config.messagelogger['attachment_hook_url'], + client=self.bot + ) + + guildrows = await LoggingGuild.fetch_where( + LoggingGuild.webhook_url != NULL + ) + guilds = {} + for row in guildrows: + hook = discord.Webhook.from_url(row.webhook_url, client=self.bot) + guilds[row.guildid] = hook + self.logging_guilds = guilds + + def message_lock(self, messageid: int) -> asyncio.Lock: + if not (lock := self.message_locks.get(messageid)): + lock = self.message_locks[messageid] = asyncio.Lock() + return lock + + def validate_message(self, message: discord.Message): + """ + Check whether the given message should be logged. + """ + valid = True + valid = valid and message.guild + valid = valid and message.guild.id in self.logging_guilds + valid = valid and not message.author.bot + valid = valid and (message.system_content or message.attachments) + return valid + + async def get_message_state(self, messageid: int) -> Optional[MessageState]: + """ + Get the last message state for this message if it exists. + """ + query = MessageState.fetch_where(messageid=messageid) + query.order_by(MessageState.created_at, ORDER.DESC) + query.limit(1) + results = await query + return results[0] if results else None + + async def save_message_state(self, message: discord.Message) -> MessageState: + assert self.validate_message(message) + + # Create message metadata if required + # This should be cached for recent messages + if not await LoggedMessage.fetch(message.id): + await LoggedMessage.create( + messageid=message.id, + guildid=message.guild.id, + channelid=message.channel.id, + userid=message.author.id, + created_at=message.created_at + ) + + stateargs = { + 'messageid': message.id, + 'content': message.system_content, + 'created_at': message.edited_at or message.created_at + } + if message.embeds: + raw_embeds = [embed.to_dict() for embed in message.embeds] + stateargs['embeds_raw'] = json.dumps(raw_embeds) + + state = await MessageState.create(**stateargs) + + # Save and log any attachments + if message.attachments: + log_attachments = await self.save_attachments(*message.attachments) + await self.data.logged_messages_attachments.insert_many( + ('stateid', 'attachment_id'), + *((state.stateid, att.attachment_id) for att in log_attachments) + ) + + return state + + async def save_attachments(self, *attachments: discord.Attachment) -> list[LogAttachment]: + """ + Save the given attachment files into LogAttachments. + + Notes: + Some options here for either saving attachment into db + Or sending it to another channel and saving the link + Or saving it to disk, recording the path for upload. + All of these are complicated by webhook upload limitations. + Nitro users can send attachments that are vastly larger than + we are allowed to upload (10MB or 25MB probably) + + Or we could save to disk, and re-serve via webserver, + and just send the links. + + For now what we do is we try to send the attachments via webhhok + if they are under 25MB, and save a permalink that way. + """ + aids = {attach.id: attach for attach in attachments} + existing = await LogAttachment.fetch_where(attachment_id=list(aids.keys())) + existingids = {row.attachment_id: row for row in existing} + to_create = [attach for attach in attachments if attach.id not in existingids] + created = {} # attachment_id -> LogAttachment + + files = {} + filenames = {} # filename -> id + for attachment in to_create: + if attachment.size > 25 * 10**6: + # Skipping because too large + continue + + try: + as_file = await attachment.to_file() + files[attachment.id] = as_file + filenames[attachment.filename] = attachment.id + except discord.HTTPException: + logger.warning( + "Failed to download attachment '%s'", + attachment.id, + exc_info=True + ) + permalinks = {} # attachment_id -> permalink + if files: + try: + result = await self.attachment_hook.send(files=list(files.values()), wait=True) + for result_attach in result.attachments: + permalinks[filenames[result_attach.filename]] = result_attach.url + except discord.HTTPException: + # Try individually + if len(files) > 1: + for aid, file in files.items(): + try: + result = await self.attachment_hook.send(file=file, wait=True) + permalinks[aid] = result.attachments[0].url + except discord.HTTPException: + logger.warning( + "Failed to hooksave attachment '%s'", + aid, + exc_info=True + ) + + for attachment in to_create: + row = await LogAttachment.create( + attachment_id=attachment.id, + proxy_url=attachment.proxy_url, + url=attachment.url, + content_type=attachment.content_type, + filesize=attachment.size, + filename=attachment.filename, + permalink=permalinks.get(attachment.id), + created_at=discord.utils.snowflake_time(attachment.id) + ) + created[attachment.id] = row + + results = [existingids.get(attach.id) or created[attach.id] for attach in attachments] + return results + + @LionCog.listener('on_message') + async def on_message(self, message: discord.Message): + if self.validate_message(message): + async with self.message_lock(message.id): + await self.save_message_state(message) + + @LionCog.listener('on_raw_message_edit') + async def on_raw_message_update(self, payload: discord.RawMessageUpdateEvent): + if self.validate_message(payload.message): + async with self.message_lock(payload.message_id): + # Get last state of message if it exists + # Create it if we missed it and the message is cached + old_state = await self.get_message_state(payload.message_id) + if old_state is None and payload.cached_message: + old_state = await self.save_message_state(payload.cached_message) + + changed = False + if old_state: + # Check if the state has changed + # i.e. content changed or attachments changed + if old_state.content != payload.message.content: + # Content has changed + changed = True + else: + rows = await self.data.logged_messages_attachments.select_where(stateid=old_state.stateid) + old_aids = {row['attachment_id'] for row in rows} + new_aids = {att.id for att in payload.message.attachments} + if old_aids.symmetric_difference(new_aids): + # Attachments have changes + changed = True + + if changed or not old_state: + new_state = await self.save_message_state(payload.message) + if changed: + await self.send_state_updated(old_state, new_state) + + @LionCog.listener('on_raw_message_delete') + async def on_raw_message_delete(self, payload: discord.RawMessageDeleteEvent): + if payload.guild_id and payload.guild_id in self.logging_guilds: + async with self.message_lock(payload.message_id): + state = await self.get_message_state(payload.message_id) + if state is None and payload.cached_message and self.validate_message(payload.cached_message): + state = await self.save_message_state(payload.cached_message) + if state is not None: + # Set corresponding message to deleted now + await self.data.logged_messages.update_where( + messageid=payload.message_id + ).set(deleted_at=utcnow()) + + await self.send_state_deleted(state) + + async def send_state_updated(self, old_state: MessageState, new_state: MessageState): + """ + Inform the logging webhook that a message state has changed. + + This occurs either when the content is changed, or attachments are changed. + - Message edited + - Message attachment added + - Message attachment removed + + Nitro users may also send messages up to 4k chars long, + The edits of which exceed our 4096 embed description limit + And possibly our total of 6k chars as well. + """ + # Get the associated LoggingGuild + message = await LoggedMessage.fetch(old_state.messageid) + if message is None: + return + webhook = self.logging_guilds[message.guildid] + + args: list[MessageArgs] = [] + # List of embeds to add author, message link/created info + needs_dec: list[discord.Embed] = [] + # List of embeds which need the messageid set in the footer + needs_footer: list[discord.Embed] = [] + + # Check content change + if old_state.content != new_state.content: + if len(old_state.content) + len(new_state.content) > 4040: + desc = "Message too long to display! See attached diff file." + else: + desc = f"{old_state.content}\n\n-->\n\n{new_state.content}" + + embed = discord.Embed( + title=f"Message edited in <#{message.channelid}>", + description=desc, + timestamp=new_state.created_at + ) + needs_dec.append(embed) + needs_footer.append(embed) + file = diff_file(old_state.content, new_state.content, filename=f"{message.messageid}.diff") + args.append( + MessageArgs( + embed=embed, + file=file, + ) + ) + + # Get attachments to check for differences + query = self.data.logged_messages_attachments.select_where(stateid=[old_state.stateid, new_state.stateid]) + query.join(self.data.logged_attachments.name, join_type=JOINTYPE.LEFT, using=('attachment_id',)) + query.select('stateid', 'attachment_id', 'proxy_url', 'content_type', 'filename', 'permalink') + query.select( + stateid='stateid', + attachment_id='attachment_id', + proxy_url='proxy_url', + content_type='content_type', + filename='filename', + permalink='permalink' + ) + results = await query + before_attach = {} + after_attach = {} + for row in results: + mapper = before_attach if row['stateid'] == old_state.stateid else after_attach + mapper[row['attachment_id']] = row + + # Check attachments added or removed + added = [row for aid, row in after_attach.items() if aid not in before_attach] + removed = [row for aid, row in before_attach.items() if aid not in after_attach] + for sublist, op in ((added, 'added'), (removed, 'removed')): + if sublist: + links, image_urls = self._format_attachments(*sublist) + embed = discord.Embed( + title=f"Attachment(s) {op} in <#{message.channelid}>", + description=', '.join(links) + ) + needs_dec.append(embed) + + embeds = [embed] + if len(image_urls) == 1: + embed.set_image(url=image_urls[0][1]) + elif len(image_urls) > 1: + for name, url in image_urls: + next_embed = discord.Embed( + title=name + ).set_image(url=url) + embeds.append(next_embed) + + needs_footer.extend(embeds) + args.append( + MessageArgs(embeds=embeds) + ) + + for embed in needs_footer: + embed.set_footer( + text=f"Message ID: {message.messageid}" + ) + await self._logmessage_decorate(message, *needs_dec) + + try: + for marg in args: + await webhook.send(**marg.send_args, wait=True) + except discord.HTTPException: + logger.exception( + f"Failed to send update for state {new_state!r}" + ) + + async def _logmessage_decorate(self, message: LoggedMessage, *embeds: discord.Embed): + guild = self.bot.get_guild(message.guildid) + author = guild.get_member(message.userid) if guild else None + if not author: + try: + await guild.fetch_member(message.userid) + except discord.NotFound: + pass + if author: + if author.joined_at: + joined = discord.utils.format_dt(author.joined_at, 'R') + joinedstr = f"Joined {joined}" + else: + joinedstr = "" + for embed in embeds: + embed.set_author( + name=f"Sent By: {author.display_name}", + icon_url=author.display_avatar.url, + ) + embed.add_field( + name="Author", + value=f"{author.mention}\nID: {author.id}\n{joinedstr}" + ) + else: + for embed in embeds: + embed.set_author(name=f"Author ID: {message.userid}") + + created = discord.utils.format_dt(message.created_at, 'R') + jump_url = jumpto(message.guildid, message.channelid, message.messageid) + locationstr = f"Sent {created}\n[Click to Jump]({jump_url})" + + for embed in embeds: + embed.add_field( + name="Location", + value=locationstr + ) + + def _format_attachments(self, *attachrows) -> tuple[list[str], list[str]]: + image_urls = [] + links = [] + for result in attachrows: + if url := result['permalink']: + if result['content_type'].startswith('image'): + image_urls.append((result['filename'], url)) + else: + url = result['proxy_url'] + linkstr = f"[{result['filename']}]({url})" + links.append(linkstr) + return (links, image_urls) + + async def send_state_deleted(self, state: MessageState): + # Get the associated LoggingGuild + message = await LoggedMessage.fetch(state.messageid) + if message is None: + return + log_guild = await LoggingGuild.fetch(message.guildid) + if log_guild is None or log_guild.webhook_url is None: + return + + # Format the deleted message + embed = discord.Embed( + title=f"Message deleted in <#{message.channelid}>", + description=state.content, + timestamp=message.deleted_at + ) + + # Get attachment urls + query = self.data.logged_messages_attachments.select_where(stateid=state.stateid) + query.join(self.data.logged_attachments.name, join_type=JOINTYPE.LEFT, using=('attachment_id',)) + query.select( + proxy_url='proxy_url', + content_type='content_type', + filename='filename', + permalink='permalink' + ) + results = await query + + links, image_urls = self._format_attachments(*results) + if links: + embed.add_field( + name='Attachments', + value='\n'.join(links) + ) + + await self._logmessage_decorate(message, embed) + + embeds = [embed] + if len(image_urls) == 1: + embed.set_image(url=image_urls[0][1]) + elif len(image_urls) > 1: + for name, url in image_urls: + next_embed = discord.Embed( + title=name + ).set_image(url=url) + embeds.append(next_embed) + for embed in embeds: + embed.set_footer( + text=f"Message ID: {message.messageid}" + ) + hook = self.logging_guilds[message.guildid] + try: + await hook.send(embeds=embeds) + except discord.HTTPException: + logger.exception(f"Failed to log deleted state {state!r}") + + # ----- Commands ----- + @cmds.hybrid_group( + name='logging', + description="Base command group for the message logging system" + ) + @appcmds.default_permissions(manage_guild=True) + async def logging_group(self, ctx: LionContext): + ... + + @logging_group.command( + name='enable', + description="Enable message logging and set the webhook url to use." + ) + async def logging_enable(self, ctx: LionContext, webhook_url: str): + if not ctx.guild: + return + if not ctx.author.guild_permissions.manage_guild: + return + + webhook = discord.Webhook.from_url(webhook_url, client=self.bot) + try: + embed = discord.Embed( + title="Testing", + description="Testing logging webhook, feel free to delete.", + ) + await webhook.send(embed=embed, wait=True) + existing = await LoggingGuild.fetch(ctx.guild.id) + if existing: + await existing.update(webhook_url=webhook_url) + else: + await LoggingGuild.create(guildid=ctx.guild.id, webhook_url=webhook_url) + + self.logging_guilds[ctx.guild.id] = webhook + await ctx.reply("Message logging enabled!") + except discord.HTTPException: + await ctx.error_reply("Could not post to the given webhook!") + + @logging_group.command( + name='disable', + description="Disable message logging in this server." + ) + async def logging_disable(self, ctx: LionContext): + if not ctx.guild: + return + if not ctx.author.guild_permissions.manage_guild: + return + + await self.data.logging_guilds.update_where(guildid=ctx.guild.id).set(webhook_url=None) + + self.logging_guilds.pop(ctx.guild.id, None) + await ctx.reply("Message logging disabled.") diff --git a/messagelogger/data.py b/messagelogger/data.py index e69de29..f33f474 100644 --- a/messagelogger/data.py +++ b/messagelogger/data.py @@ -0,0 +1,79 @@ +from cachetools import TTLCache + +from data import Registry, RowModel, Table, WeakCache +from data.columns import String, Timestamp, Integer, Bool + + +class LoggingGuild(RowModel): + """ + Schema + ------ + """ + _tablename_ = 'logging_guilds' + _cache_ = {} + + guildid = Integer(primary=True) + webhook_url = String() + created_at = Timestamp() + _timestamp = Timestamp() + + +class LoggedMessage(RowModel): + """ + Schema + ------ + """ + _tablename_ = 'logged_messages' + _cache_ = WeakCache(ref_cache=TTLCache(maxsize=5000, ttl=60*60*24)) + + messageid = Integer(primary=True) + guildid = Integer() + channelid = Integer() + userid = Integer() + created_at = Timestamp() + deleted_at = Timestamp() + + _timestamp = Timestamp() + + +class MessageState(RowModel): + """ + Schema + ------ + """ + _tablename_ = 'message_states' + _cache_ = WeakCache(ref_cache=TTLCache(maxsize=5000, ttl=60*60*24)) + + stateid = Integer(primary=True) + messageid = Integer() + content = String() + embeds_raw = String() + + created_at = Timestamp() + + +class LogAttachment(RowModel): + """ + Schema + ------ + """ + _tablename_ = 'logged_attachments' + _cache_ = WeakCache(ref_cache=TTLCache(maxsize=5000, ttl=60*60*24)) + + attachment_id = Integer(primary=True) + proxy_url = String() + url = String() + content_type = String() + filesize = Integer() + filename = String() + permalink = String() + created_at = Timestamp() + + +class LogData(Registry): + logging_guilds = LoggingGuild.table + logged_messages = LoggedMessage.table + message_states = MessageState.table + logged_attachments = LogAttachment.table + + logged_messages_attachments = Table('logged_messages_attachments') diff --git a/messagelogger/lib.py b/messagelogger/lib.py new file mode 100644 index 0000000..5847415 --- /dev/null +++ b/messagelogger/lib.py @@ -0,0 +1,28 @@ +import difflib +from io import BytesIO + +import discord + + +def diff_file(before: str, after: str, filename: str = "changes.diff") -> discord.File: + """ + Generates a diff from two strings and returns it as a discord.File. + + Args: + before (str): The original string content. + after (str): The modified string content. + filename (str, optional): The name for the output file. Defaults to "changes.diff". + + Returns: + discord.File: A file object ready to be sent in a Discord message. + """ + diff_generator = difflib.ndiff( + before.splitlines(), + after.splitlines(), + ) + diff_text = "\n".join(diff_generator) or "No changes." + diff_bytes = diff_text.encode('utf-8') + + with BytesIO(diff_bytes) as buffer: + buffer.seek(0) + return discord.File(buffer, filename=filename)