From 970e652fdcc61dd62530cc687a287758ec6f77a8 Mon Sep 17 00:00:00 2001 From: Conatum Date: Fri, 22 Sep 2023 08:38:59 +0300 Subject: [PATCH] feat(meta): Add a system status reporter. --- src/bot.py | 23 +++++ src/meta/LionBot.py | 32 +++++++ src/meta/monitor.py | 139 +++++++++++++++++++++++++++++++ src/modules/pomodoro/cog.py | 14 ++++ src/modules/schedule/cog.py | 49 +++++++++++ src/modules/sysadmin/exec_cog.py | 18 +++- 6 files changed, 272 insertions(+), 3 deletions(-) create mode 100644 src/meta/monitor.py diff --git a/src/bot.py b/src/bot.py index 8fda839d..49dceb11 100644 --- a/src/bot.py +++ b/src/bot.py @@ -9,6 +9,7 @@ from meta import LionBot, conf, sharding, appname, shard_talk from meta.app import shardname from meta.logger import log_context, log_action_stack, setup_main_logger from meta.context import ctx_bot +from meta.monitor import ComponentMonitor, StatusLevel, ComponentStatus from data import Database @@ -29,6 +30,25 @@ logger = logging.getLogger(__name__) db = Database(conf.data['args']) +async def _data_monitor() -> ComponentStatus: + """ + Component monitor callback for the database. + """ + data = { + 'stats': str(db.pool.get_stats()) + } + if not db.pool._opened: + level = StatusLevel.WAITING + info = "(WAITING) Database Pool is not opened." + elif db.pool._closed: + level = StatusLevel.ERRORED + info = "(ERROR) Database Pool is closed." + else: + level = StatusLevel.OKAY + info = "(OK) Database Pool statistics: {stats}" + return ComponentStatus(level, info, info, data) + + async def main(): log_action_stack.set(("Initialising",)) logger.info("Initialising StudyLion") @@ -73,6 +93,9 @@ async def main(): chunk_guilds_at_startup=False, ) as lionbot: ctx_bot.set(lionbot) + lionbot.system_monitor.add_component( + ComponentMonitor('Database', _data_monitor) + ) try: log_context.set(f"APP: {appname}") logger.info("StudyLion initialised, starting!", extra={'action': 'Starting'}) diff --git a/src/meta/LionBot.py b/src/meta/LionBot.py index 4e842859..ca699ffb 100644 --- a/src/meta/LionBot.py +++ b/src/meta/LionBot.py @@ -21,6 +21,7 @@ from .context import context from .LionContext import LionContext from .LionTree import LionTree from .errors import HandledException, SafeCancellation +from .monitor import SystemMonitor, ComponentMonitor, StatusLevel, ComponentStatus if TYPE_CHECKING: from core import CoreCog @@ -48,9 +49,40 @@ class LionBot(Bot): self.core: Optional['CoreCog'] = None self.translator = translator + self.system_monitor = SystemMonitor() + self.monitor = ComponentMonitor('LionBot', self._monitor_status) + self.system_monitor.add_component(self.monitor) + self._locks = WeakValueDictionary() self._running_events = set() + async def _monitor_status(self): + if self.is_closed(): + level = StatusLevel.ERRORED + info = "(ERROR) Websocket is closed" + data = {} + elif self.is_ws_ratelimited(): + level = StatusLevel.WAITING + info = "(WAITING) Websocket is ratelimited" + data = {} + elif not self.is_ready(): + level = StatusLevel.STARTING + info = "(STARTING) Not yet ready" + data = {} + else: + level = StatusLevel.OKAY + info = ( + "(OK) " + "Logged in with {guild_count} guilds, " + ", websocket latency {latency}, and {events} running events." + ) + data = { + 'guild_count': len(self.guilds), + 'latency': self.latency, + 'events': len(self._running_events), + } + return ComponentStatus(level, info, info, data) + async def setup_hook(self) -> None: log_context.set(f"APP: {self.application_id}") await self.app_ipc.connect() diff --git a/src/meta/monitor.py b/src/meta/monitor.py new file mode 100644 index 00000000..474c51f4 --- /dev/null +++ b/src/meta/monitor.py @@ -0,0 +1,139 @@ +import logging +import asyncio +from enum import IntEnum +from collections import deque, ChainMap +import datetime as dt + +logger = logging.getLogger(__name__) + + +class StatusLevel(IntEnum): + ERRORED = -2 + UNSURE = -1 + WAITING = 0 + STARTING = 1 + OKAY = 2 + + @property + def symbol(self): + return symbols[self] + + +symbols = { + StatusLevel.ERRORED: '🟥', + StatusLevel.UNSURE: '🟧', + StatusLevel.WAITING: '⬜', + StatusLevel.STARTING: '🟫', + StatusLevel.OKAY: '🟩', +} + + +class ComponentStatus: + def __init__(self, level: StatusLevel, short_formatstr: str, long_formatstr: str, data: dict = {}): + self.level = level + self.short_formatstr = short_formatstr + self.long_formatstr = long_formatstr + self.data = data + self.created_at = dt.datetime.now(tz=dt.timezone.utc) + + def format_args(self): + extra = { + 'created_at': self.created_at, + 'level': self.level, + 'symbol': self.level.symbol, + } + return ChainMap(extra, self.data) + + @property + def short(self): + return self.short_formatstr.format(**self.format_args()) + + @property + def long(self): + return self.long_formatstr.format(**self.format_args()) + + +class ComponentMonitor: + _name = None + + def __init__(self, name=None, callback=None): + self._callback = callback + self.name = name or self._name + if not self.name: + raise ValueError("ComponentMonitor must have a name") + + async def _make_status(self, *args, **kwargs): + if self._callback is not None: + return await self._callback(*args, **kwargs) + else: + raise NotImplementedError + + async def status(self) -> ComponentStatus: + try: + status = await self._make_status() + except Exception as e: + logger.exception( + f"Status callback for component '{self.name}' failed. This should not happen." + ) + status = ComponentStatus( + level=StatusLevel.UNSURE, + short_formatstr="Status callback for '{name}' failed with error '{error}'", + long_formatstr="Status callback for '{name}' failed with error '{error}'", + data={ + 'name': self.name, + 'error': repr(e) + } + ) + return status + + +class SystemMonitor: + def __init__(self): + self.components = {} + self.recent = deque(maxlen=10) + + def add_component(self, component: ComponentMonitor): + self.components[component.name] = component + return component + + async def request(self): + """ + Request status from each component. + """ + tasks = { + name: asyncio.create_task(comp.status()) + for name, comp in self.components.items() + } + await asyncio.gather(*tasks.values()) + status = { + name: await fut for name, fut in tasks.items() + } + self.recent.append(status) + return status + + async def _format_summary(self, status_dict: dict[str, ComponentStatus]): + """ + Format a one line summary from a status dict. + """ + freq = {level: 0 for level in StatusLevel} + for status in status_dict.values(): + freq[status.level] += 1 + + summary = '\t'.join(f"{level.symbol} {count}" for level, count in freq.items() if count) + return summary + + async def _format_overview(self, status_dict: dict[str, ComponentStatus]): + """ + Format an overview (one line per component) from a status dict. + """ + lines = [] + for name, status in status_dict.items(): + lines.append(f"{status.level.symbol} {name}: {status.short}") + summary = await self._format_summary(status_dict) + return '\n'.join((summary, *lines)) + + async def get_summary(self): + return await self._format_summary(await self.request()) + + async def get_overview(self): + return await self._format_overview(await self.request()) diff --git a/src/modules/pomodoro/cog.py b/src/modules/pomodoro/cog.py index f2a5cc41..fd25b318 100644 --- a/src/modules/pomodoro/cog.py +++ b/src/modules/pomodoro/cog.py @@ -10,6 +10,7 @@ from discord import app_commands as appcmds from meta import LionCog, LionBot, LionContext from meta.logger import log_wrap from meta.sharding import THIS_SHARD +from meta.monitor import ComponentMonitor, ComponentStatus, StatusLevel from utils.lib import utc_now from wards import low_management_ward @@ -42,12 +43,25 @@ class TimerCog(LionCog): self.bot = bot self.data = bot.db.load_registry(TimerData()) self.settings = TimerSettings() + self.monitor = ComponentMonitor('TimerCog', self._monitor) + self.timer_options = TimerOptions() self.ready = False self.timers = defaultdict(dict) + async def _monitor(self): + if not self.ready: + level = StatusLevel.STARTING + info = "(STARTING) Not ready. {timers} timers loaded." + else: + level = StatusLevel.OKAY + info = "(OK) {timers} timers loaded." + data = dict(timers=len(self.timers)) + return ComponentStatus(level, info, info, data) + async def cog_load(self): + self.bot.system_monitor.add_component(self.monitor) await self.data.init() self.bot.core.guild_config.register_model_setting(self.settings.PomodoroChannel) diff --git a/src/modules/schedule/cog.py b/src/modules/schedule/cog.py index 75d56642..39b60960 100644 --- a/src/modules/schedule/cog.py +++ b/src/modules/schedule/cog.py @@ -13,6 +13,7 @@ from meta import LionCog, LionBot, LionContext from meta.logger import log_wrap from meta.errors import UserInputError, ResponseTimedOut from meta.sharding import THIS_SHARD +from meta.monitor import ComponentMonitor, ComponentStatus, StatusLevel from utils.lib import utc_now, error_embed from utils.ui import Confirm from utils.data import MULTIVALUE_IN, MEMBERS @@ -38,6 +39,10 @@ class ScheduleCog(LionCog): self.bot = bot self.data: ScheduleData = bot.db.load_registry(ScheduleData()) self.settings = ScheduleSettings() + self.monitor = ComponentMonitor( + 'ScheduleCog', + self._monitor + ) # Whether we are ready to take events self.initialised = asyncio.Event() @@ -57,12 +62,56 @@ class ScheduleCog(LionCog): self.session_channels = self.settings.SessionChannels._cache + async def _monitor(self): + nowid = self.nowid + now = None + now_lock = self.slotlock(nowid) + if not self.initialised.is_set(): + level = StatusLevel.STARTING + info = ( + "(STARTING) " + "Not ready. " + "Spawn task is {spawn}. " + "Spawn lock is {spawn_lock}. " + "Active slots {active}." + ) + elif nowid not in self.active_slots: + level = StatusLevel.UNSURE + info = ( + "(UNSURE) " + "Setup, but current slotid {nowid} not active. " + "Spawn task is {spawn}. " + "Spawn lock is {spawn_lock}. " + "Now lock is {now_lock}. " + "Active slots {active}." + ) + else: + now = self.active_slots[nowid] + level = StatusLevel.OKAY + info = ( + "(OK) " + "Running current slot {now}. " + "Spawn lock is {spawn_lock}. " + "Now lock is {now_lock}. " + "Active slots {active}." + ) + data = { + 'spawn': self.spawn_task, + 'spawn_lock': self.spawn_lock, + 'active': self.active_slots, + 'nowid': nowid, + 'now_lock': now_lock, + 'now': now, + } + return ComponentStatus(level, info, info, data) + @property def nowid(self): now = utc_now() return time_to_slotid(now) async def cog_load(self): + self.bot.system_monitor.add_component(self.monitor) await self.data.init() # Update the session channel cache diff --git a/src/modules/sysadmin/exec_cog.py b/src/modules/sysadmin/exec_cog.py index 63b5c151..351219c5 100644 --- a/src/modules/sysadmin/exec_cog.py +++ b/src/modules/sysadmin/exec_cog.py @@ -186,6 +186,17 @@ def mk_print(fp: io.StringIO) -> Callable[..., None]: return _print +def mk_status_printer(bot, printer): + async def _status(details=False): + if details: + status = await bot.system_monitor.get_overview() + else: + status = await bot.system_monitor.get_summary() + printer(status) + return status + return _status + + @log_wrap(action="Code Exec") async def _async(to_eval: str, style='exec'): newline = '\n' * ('\n' in to_eval) @@ -202,6 +213,7 @@ async def _async(to_eval: str, style='exec'): scope['ctx'] = ctx = context.get() scope['bot'] = ctx_bot.get() scope['print'] = _print # type: ignore + scope['print_status'] = mk_status_printer(scope['bot'], _print) try: if ctx and ctx.message: @@ -297,7 +309,7 @@ class Exec(LionCog): file = discord.File(fp, filename=f"output-{target}.md") await ctx.reply(file=file) elif result: - await ctx.reply(f"```md{result}```") + await ctx.reply(f"```md\n{result}```") else: await ctx.reply("Command completed, and had no output.") else: @@ -351,7 +363,7 @@ class Exec(LionCog): except asyncio.TimeoutError: return if ctx.interaction: - await ctx.interaction.response.defer(thinking=True, ephemeral=True) + await ctx.interaction.response.defer(thinking=True) if target is not None: if target not in shard_talk.peers: embed = discord.Embed(description=f"Unknown peer {target}", colour=discord.Colour.red()) @@ -376,7 +388,7 @@ class Exec(LionCog): await ctx.reply(file=file) else: # Send as message - await ctx.reply(f"```md\n{output}```", ephemeral=True) + await ctx.reply(f"```md\n{output}```") asyncall_cmd.autocomplete('target')(_peer_acmpl)