feat(meta): Add a system status reporter.
This commit is contained in:
23
src/bot.py
23
src/bot.py
@@ -9,6 +9,7 @@ from meta import LionBot, conf, sharding, appname, shard_talk
|
||||
from meta.app import shardname
|
||||
from meta.logger import log_context, log_action_stack, setup_main_logger
|
||||
from meta.context import ctx_bot
|
||||
from meta.monitor import ComponentMonitor, StatusLevel, ComponentStatus
|
||||
|
||||
from data import Database
|
||||
|
||||
@@ -29,6 +30,25 @@ logger = logging.getLogger(__name__)
|
||||
db = Database(conf.data['args'])
|
||||
|
||||
|
||||
async def _data_monitor() -> ComponentStatus:
|
||||
"""
|
||||
Component monitor callback for the database.
|
||||
"""
|
||||
data = {
|
||||
'stats': str(db.pool.get_stats())
|
||||
}
|
||||
if not db.pool._opened:
|
||||
level = StatusLevel.WAITING
|
||||
info = "(WAITING) Database Pool is not opened."
|
||||
elif db.pool._closed:
|
||||
level = StatusLevel.ERRORED
|
||||
info = "(ERROR) Database Pool is closed."
|
||||
else:
|
||||
level = StatusLevel.OKAY
|
||||
info = "(OK) Database Pool statistics: {stats}"
|
||||
return ComponentStatus(level, info, info, data)
|
||||
|
||||
|
||||
async def main():
|
||||
log_action_stack.set(("Initialising",))
|
||||
logger.info("Initialising StudyLion")
|
||||
@@ -73,6 +93,9 @@ async def main():
|
||||
chunk_guilds_at_startup=False,
|
||||
) as lionbot:
|
||||
ctx_bot.set(lionbot)
|
||||
lionbot.system_monitor.add_component(
|
||||
ComponentMonitor('Database', _data_monitor)
|
||||
)
|
||||
try:
|
||||
log_context.set(f"APP: {appname}")
|
||||
logger.info("StudyLion initialised, starting!", extra={'action': 'Starting'})
|
||||
|
||||
@@ -21,6 +21,7 @@ from .context import context
|
||||
from .LionContext import LionContext
|
||||
from .LionTree import LionTree
|
||||
from .errors import HandledException, SafeCancellation
|
||||
from .monitor import SystemMonitor, ComponentMonitor, StatusLevel, ComponentStatus
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from core import CoreCog
|
||||
@@ -48,9 +49,40 @@ class LionBot(Bot):
|
||||
self.core: Optional['CoreCog'] = None
|
||||
self.translator = translator
|
||||
|
||||
self.system_monitor = SystemMonitor()
|
||||
self.monitor = ComponentMonitor('LionBot', self._monitor_status)
|
||||
self.system_monitor.add_component(self.monitor)
|
||||
|
||||
self._locks = WeakValueDictionary()
|
||||
self._running_events = set()
|
||||
|
||||
async def _monitor_status(self):
|
||||
if self.is_closed():
|
||||
level = StatusLevel.ERRORED
|
||||
info = "(ERROR) Websocket is closed"
|
||||
data = {}
|
||||
elif self.is_ws_ratelimited():
|
||||
level = StatusLevel.WAITING
|
||||
info = "(WAITING) Websocket is ratelimited"
|
||||
data = {}
|
||||
elif not self.is_ready():
|
||||
level = StatusLevel.STARTING
|
||||
info = "(STARTING) Not yet ready"
|
||||
data = {}
|
||||
else:
|
||||
level = StatusLevel.OKAY
|
||||
info = (
|
||||
"(OK) "
|
||||
"Logged in with {guild_count} guilds, "
|
||||
", websocket latency {latency}, and {events} running events."
|
||||
)
|
||||
data = {
|
||||
'guild_count': len(self.guilds),
|
||||
'latency': self.latency,
|
||||
'events': len(self._running_events),
|
||||
}
|
||||
return ComponentStatus(level, info, info, data)
|
||||
|
||||
async def setup_hook(self) -> None:
|
||||
log_context.set(f"APP: {self.application_id}")
|
||||
await self.app_ipc.connect()
|
||||
|
||||
139
src/meta/monitor.py
Normal file
139
src/meta/monitor.py
Normal file
@@ -0,0 +1,139 @@
|
||||
import logging
|
||||
import asyncio
|
||||
from enum import IntEnum
|
||||
from collections import deque, ChainMap
|
||||
import datetime as dt
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class StatusLevel(IntEnum):
|
||||
ERRORED = -2
|
||||
UNSURE = -1
|
||||
WAITING = 0
|
||||
STARTING = 1
|
||||
OKAY = 2
|
||||
|
||||
@property
|
||||
def symbol(self):
|
||||
return symbols[self]
|
||||
|
||||
|
||||
symbols = {
|
||||
StatusLevel.ERRORED: '🟥',
|
||||
StatusLevel.UNSURE: '🟧',
|
||||
StatusLevel.WAITING: '⬜',
|
||||
StatusLevel.STARTING: '🟫',
|
||||
StatusLevel.OKAY: '🟩',
|
||||
}
|
||||
|
||||
|
||||
class ComponentStatus:
|
||||
def __init__(self, level: StatusLevel, short_formatstr: str, long_formatstr: str, data: dict = {}):
|
||||
self.level = level
|
||||
self.short_formatstr = short_formatstr
|
||||
self.long_formatstr = long_formatstr
|
||||
self.data = data
|
||||
self.created_at = dt.datetime.now(tz=dt.timezone.utc)
|
||||
|
||||
def format_args(self):
|
||||
extra = {
|
||||
'created_at': self.created_at,
|
||||
'level': self.level,
|
||||
'symbol': self.level.symbol,
|
||||
}
|
||||
return ChainMap(extra, self.data)
|
||||
|
||||
@property
|
||||
def short(self):
|
||||
return self.short_formatstr.format(**self.format_args())
|
||||
|
||||
@property
|
||||
def long(self):
|
||||
return self.long_formatstr.format(**self.format_args())
|
||||
|
||||
|
||||
class ComponentMonitor:
|
||||
_name = None
|
||||
|
||||
def __init__(self, name=None, callback=None):
|
||||
self._callback = callback
|
||||
self.name = name or self._name
|
||||
if not self.name:
|
||||
raise ValueError("ComponentMonitor must have a name")
|
||||
|
||||
async def _make_status(self, *args, **kwargs):
|
||||
if self._callback is not None:
|
||||
return await self._callback(*args, **kwargs)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
async def status(self) -> ComponentStatus:
|
||||
try:
|
||||
status = await self._make_status()
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
f"Status callback for component '{self.name}' failed. This should not happen."
|
||||
)
|
||||
status = ComponentStatus(
|
||||
level=StatusLevel.UNSURE,
|
||||
short_formatstr="Status callback for '{name}' failed with error '{error}'",
|
||||
long_formatstr="Status callback for '{name}' failed with error '{error}'",
|
||||
data={
|
||||
'name': self.name,
|
||||
'error': repr(e)
|
||||
}
|
||||
)
|
||||
return status
|
||||
|
||||
|
||||
class SystemMonitor:
|
||||
def __init__(self):
|
||||
self.components = {}
|
||||
self.recent = deque(maxlen=10)
|
||||
|
||||
def add_component(self, component: ComponentMonitor):
|
||||
self.components[component.name] = component
|
||||
return component
|
||||
|
||||
async def request(self):
|
||||
"""
|
||||
Request status from each component.
|
||||
"""
|
||||
tasks = {
|
||||
name: asyncio.create_task(comp.status())
|
||||
for name, comp in self.components.items()
|
||||
}
|
||||
await asyncio.gather(*tasks.values())
|
||||
status = {
|
||||
name: await fut for name, fut in tasks.items()
|
||||
}
|
||||
self.recent.append(status)
|
||||
return status
|
||||
|
||||
async def _format_summary(self, status_dict: dict[str, ComponentStatus]):
|
||||
"""
|
||||
Format a one line summary from a status dict.
|
||||
"""
|
||||
freq = {level: 0 for level in StatusLevel}
|
||||
for status in status_dict.values():
|
||||
freq[status.level] += 1
|
||||
|
||||
summary = '\t'.join(f"{level.symbol} {count}" for level, count in freq.items() if count)
|
||||
return summary
|
||||
|
||||
async def _format_overview(self, status_dict: dict[str, ComponentStatus]):
|
||||
"""
|
||||
Format an overview (one line per component) from a status dict.
|
||||
"""
|
||||
lines = []
|
||||
for name, status in status_dict.items():
|
||||
lines.append(f"{status.level.symbol} {name}: {status.short}")
|
||||
summary = await self._format_summary(status_dict)
|
||||
return '\n'.join((summary, *lines))
|
||||
|
||||
async def get_summary(self):
|
||||
return await self._format_summary(await self.request())
|
||||
|
||||
async def get_overview(self):
|
||||
return await self._format_overview(await self.request())
|
||||
@@ -10,6 +10,7 @@ from discord import app_commands as appcmds
|
||||
from meta import LionCog, LionBot, LionContext
|
||||
from meta.logger import log_wrap
|
||||
from meta.sharding import THIS_SHARD
|
||||
from meta.monitor import ComponentMonitor, ComponentStatus, StatusLevel
|
||||
from utils.lib import utc_now
|
||||
|
||||
from wards import low_management_ward
|
||||
@@ -42,12 +43,25 @@ class TimerCog(LionCog):
|
||||
self.bot = bot
|
||||
self.data = bot.db.load_registry(TimerData())
|
||||
self.settings = TimerSettings()
|
||||
self.monitor = ComponentMonitor('TimerCog', self._monitor)
|
||||
|
||||
self.timer_options = TimerOptions()
|
||||
|
||||
self.ready = False
|
||||
self.timers = defaultdict(dict)
|
||||
|
||||
async def _monitor(self):
|
||||
if not self.ready:
|
||||
level = StatusLevel.STARTING
|
||||
info = "(STARTING) Not ready. {timers} timers loaded."
|
||||
else:
|
||||
level = StatusLevel.OKAY
|
||||
info = "(OK) {timers} timers loaded."
|
||||
data = dict(timers=len(self.timers))
|
||||
return ComponentStatus(level, info, info, data)
|
||||
|
||||
async def cog_load(self):
|
||||
self.bot.system_monitor.add_component(self.monitor)
|
||||
await self.data.init()
|
||||
|
||||
self.bot.core.guild_config.register_model_setting(self.settings.PomodoroChannel)
|
||||
|
||||
@@ -13,6 +13,7 @@ from meta import LionCog, LionBot, LionContext
|
||||
from meta.logger import log_wrap
|
||||
from meta.errors import UserInputError, ResponseTimedOut
|
||||
from meta.sharding import THIS_SHARD
|
||||
from meta.monitor import ComponentMonitor, ComponentStatus, StatusLevel
|
||||
from utils.lib import utc_now, error_embed
|
||||
from utils.ui import Confirm
|
||||
from utils.data import MULTIVALUE_IN, MEMBERS
|
||||
@@ -38,6 +39,10 @@ class ScheduleCog(LionCog):
|
||||
self.bot = bot
|
||||
self.data: ScheduleData = bot.db.load_registry(ScheduleData())
|
||||
self.settings = ScheduleSettings()
|
||||
self.monitor = ComponentMonitor(
|
||||
'ScheduleCog',
|
||||
self._monitor
|
||||
)
|
||||
|
||||
# Whether we are ready to take events
|
||||
self.initialised = asyncio.Event()
|
||||
@@ -57,12 +62,56 @@ class ScheduleCog(LionCog):
|
||||
|
||||
self.session_channels = self.settings.SessionChannels._cache
|
||||
|
||||
async def _monitor(self):
|
||||
nowid = self.nowid
|
||||
now = None
|
||||
now_lock = self.slotlock(nowid)
|
||||
if not self.initialised.is_set():
|
||||
level = StatusLevel.STARTING
|
||||
info = (
|
||||
"(STARTING) "
|
||||
"Not ready. "
|
||||
"Spawn task is {spawn}. "
|
||||
"Spawn lock is {spawn_lock}. "
|
||||
"Active slots {active}."
|
||||
)
|
||||
elif nowid not in self.active_slots:
|
||||
level = StatusLevel.UNSURE
|
||||
info = (
|
||||
"(UNSURE) "
|
||||
"Setup, but current slotid {nowid} not active. "
|
||||
"Spawn task is {spawn}. "
|
||||
"Spawn lock is {spawn_lock}. "
|
||||
"Now lock is {now_lock}. "
|
||||
"Active slots {active}."
|
||||
)
|
||||
else:
|
||||
now = self.active_slots[nowid]
|
||||
level = StatusLevel.OKAY
|
||||
info = (
|
||||
"(OK) "
|
||||
"Running current slot {now}. "
|
||||
"Spawn lock is {spawn_lock}. "
|
||||
"Now lock is {now_lock}. "
|
||||
"Active slots {active}."
|
||||
)
|
||||
data = {
|
||||
'spawn': self.spawn_task,
|
||||
'spawn_lock': self.spawn_lock,
|
||||
'active': self.active_slots,
|
||||
'nowid': nowid,
|
||||
'now_lock': now_lock,
|
||||
'now': now,
|
||||
}
|
||||
return ComponentStatus(level, info, info, data)
|
||||
|
||||
@property
|
||||
def nowid(self):
|
||||
now = utc_now()
|
||||
return time_to_slotid(now)
|
||||
|
||||
async def cog_load(self):
|
||||
self.bot.system_monitor.add_component(self.monitor)
|
||||
await self.data.init()
|
||||
|
||||
# Update the session channel cache
|
||||
|
||||
@@ -186,6 +186,17 @@ def mk_print(fp: io.StringIO) -> Callable[..., None]:
|
||||
return _print
|
||||
|
||||
|
||||
def mk_status_printer(bot, printer):
|
||||
async def _status(details=False):
|
||||
if details:
|
||||
status = await bot.system_monitor.get_overview()
|
||||
else:
|
||||
status = await bot.system_monitor.get_summary()
|
||||
printer(status)
|
||||
return status
|
||||
return _status
|
||||
|
||||
|
||||
@log_wrap(action="Code Exec")
|
||||
async def _async(to_eval: str, style='exec'):
|
||||
newline = '\n' * ('\n' in to_eval)
|
||||
@@ -202,6 +213,7 @@ async def _async(to_eval: str, style='exec'):
|
||||
scope['ctx'] = ctx = context.get()
|
||||
scope['bot'] = ctx_bot.get()
|
||||
scope['print'] = _print # type: ignore
|
||||
scope['print_status'] = mk_status_printer(scope['bot'], _print)
|
||||
|
||||
try:
|
||||
if ctx and ctx.message:
|
||||
@@ -297,7 +309,7 @@ class Exec(LionCog):
|
||||
file = discord.File(fp, filename=f"output-{target}.md")
|
||||
await ctx.reply(file=file)
|
||||
elif result:
|
||||
await ctx.reply(f"```md{result}```")
|
||||
await ctx.reply(f"```md\n{result}```")
|
||||
else:
|
||||
await ctx.reply("Command completed, and had no output.")
|
||||
else:
|
||||
@@ -351,7 +363,7 @@ class Exec(LionCog):
|
||||
except asyncio.TimeoutError:
|
||||
return
|
||||
if ctx.interaction:
|
||||
await ctx.interaction.response.defer(thinking=True, ephemeral=True)
|
||||
await ctx.interaction.response.defer(thinking=True)
|
||||
if target is not None:
|
||||
if target not in shard_talk.peers:
|
||||
embed = discord.Embed(description=f"Unknown peer {target}", colour=discord.Colour.red())
|
||||
@@ -376,7 +388,7 @@ class Exec(LionCog):
|
||||
await ctx.reply(file=file)
|
||||
else:
|
||||
# Send as message
|
||||
await ctx.reply(f"```md\n{output}```", ephemeral=True)
|
||||
await ctx.reply(f"```md\n{output}```")
|
||||
|
||||
asyncall_cmd.autocomplete('target')(_peer_acmpl)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user