feat(meta): Add a system status reporter.

This commit is contained in:
2023-09-22 08:38:59 +03:00
parent ff0bb17d29
commit 970e652fdc
6 changed files with 272 additions and 3 deletions

View File

@@ -9,6 +9,7 @@ from meta import LionBot, conf, sharding, appname, shard_talk
from meta.app import shardname
from meta.logger import log_context, log_action_stack, setup_main_logger
from meta.context import ctx_bot
from meta.monitor import ComponentMonitor, StatusLevel, ComponentStatus
from data import Database
@@ -29,6 +30,25 @@ logger = logging.getLogger(__name__)
db = Database(conf.data['args'])
async def _data_monitor() -> ComponentStatus:
"""
Component monitor callback for the database.
"""
data = {
'stats': str(db.pool.get_stats())
}
if not db.pool._opened:
level = StatusLevel.WAITING
info = "(WAITING) Database Pool is not opened."
elif db.pool._closed:
level = StatusLevel.ERRORED
info = "(ERROR) Database Pool is closed."
else:
level = StatusLevel.OKAY
info = "(OK) Database Pool statistics: {stats}"
return ComponentStatus(level, info, info, data)
async def main():
log_action_stack.set(("Initialising",))
logger.info("Initialising StudyLion")
@@ -73,6 +93,9 @@ async def main():
chunk_guilds_at_startup=False,
) as lionbot:
ctx_bot.set(lionbot)
lionbot.system_monitor.add_component(
ComponentMonitor('Database', _data_monitor)
)
try:
log_context.set(f"APP: {appname}")
logger.info("StudyLion initialised, starting!", extra={'action': 'Starting'})

View File

@@ -21,6 +21,7 @@ from .context import context
from .LionContext import LionContext
from .LionTree import LionTree
from .errors import HandledException, SafeCancellation
from .monitor import SystemMonitor, ComponentMonitor, StatusLevel, ComponentStatus
if TYPE_CHECKING:
from core import CoreCog
@@ -48,9 +49,40 @@ class LionBot(Bot):
self.core: Optional['CoreCog'] = None
self.translator = translator
self.system_monitor = SystemMonitor()
self.monitor = ComponentMonitor('LionBot', self._monitor_status)
self.system_monitor.add_component(self.monitor)
self._locks = WeakValueDictionary()
self._running_events = set()
async def _monitor_status(self):
if self.is_closed():
level = StatusLevel.ERRORED
info = "(ERROR) Websocket is closed"
data = {}
elif self.is_ws_ratelimited():
level = StatusLevel.WAITING
info = "(WAITING) Websocket is ratelimited"
data = {}
elif not self.is_ready():
level = StatusLevel.STARTING
info = "(STARTING) Not yet ready"
data = {}
else:
level = StatusLevel.OKAY
info = (
"(OK) "
"Logged in with {guild_count} guilds, "
", websocket latency {latency}, and {events} running events."
)
data = {
'guild_count': len(self.guilds),
'latency': self.latency,
'events': len(self._running_events),
}
return ComponentStatus(level, info, info, data)
async def setup_hook(self) -> None:
log_context.set(f"APP: {self.application_id}")
await self.app_ipc.connect()

139
src/meta/monitor.py Normal file
View File

@@ -0,0 +1,139 @@
import logging
import asyncio
from enum import IntEnum
from collections import deque, ChainMap
import datetime as dt
logger = logging.getLogger(__name__)
class StatusLevel(IntEnum):
ERRORED = -2
UNSURE = -1
WAITING = 0
STARTING = 1
OKAY = 2
@property
def symbol(self):
return symbols[self]
symbols = {
StatusLevel.ERRORED: '🟥',
StatusLevel.UNSURE: '🟧',
StatusLevel.WAITING: '',
StatusLevel.STARTING: '🟫',
StatusLevel.OKAY: '🟩',
}
class ComponentStatus:
def __init__(self, level: StatusLevel, short_formatstr: str, long_formatstr: str, data: dict = {}):
self.level = level
self.short_formatstr = short_formatstr
self.long_formatstr = long_formatstr
self.data = data
self.created_at = dt.datetime.now(tz=dt.timezone.utc)
def format_args(self):
extra = {
'created_at': self.created_at,
'level': self.level,
'symbol': self.level.symbol,
}
return ChainMap(extra, self.data)
@property
def short(self):
return self.short_formatstr.format(**self.format_args())
@property
def long(self):
return self.long_formatstr.format(**self.format_args())
class ComponentMonitor:
_name = None
def __init__(self, name=None, callback=None):
self._callback = callback
self.name = name or self._name
if not self.name:
raise ValueError("ComponentMonitor must have a name")
async def _make_status(self, *args, **kwargs):
if self._callback is not None:
return await self._callback(*args, **kwargs)
else:
raise NotImplementedError
async def status(self) -> ComponentStatus:
try:
status = await self._make_status()
except Exception as e:
logger.exception(
f"Status callback for component '{self.name}' failed. This should not happen."
)
status = ComponentStatus(
level=StatusLevel.UNSURE,
short_formatstr="Status callback for '{name}' failed with error '{error}'",
long_formatstr="Status callback for '{name}' failed with error '{error}'",
data={
'name': self.name,
'error': repr(e)
}
)
return status
class SystemMonitor:
def __init__(self):
self.components = {}
self.recent = deque(maxlen=10)
def add_component(self, component: ComponentMonitor):
self.components[component.name] = component
return component
async def request(self):
"""
Request status from each component.
"""
tasks = {
name: asyncio.create_task(comp.status())
for name, comp in self.components.items()
}
await asyncio.gather(*tasks.values())
status = {
name: await fut for name, fut in tasks.items()
}
self.recent.append(status)
return status
async def _format_summary(self, status_dict: dict[str, ComponentStatus]):
"""
Format a one line summary from a status dict.
"""
freq = {level: 0 for level in StatusLevel}
for status in status_dict.values():
freq[status.level] += 1
summary = '\t'.join(f"{level.symbol} {count}" for level, count in freq.items() if count)
return summary
async def _format_overview(self, status_dict: dict[str, ComponentStatus]):
"""
Format an overview (one line per component) from a status dict.
"""
lines = []
for name, status in status_dict.items():
lines.append(f"{status.level.symbol} {name}: {status.short}")
summary = await self._format_summary(status_dict)
return '\n'.join((summary, *lines))
async def get_summary(self):
return await self._format_summary(await self.request())
async def get_overview(self):
return await self._format_overview(await self.request())

View File

@@ -10,6 +10,7 @@ from discord import app_commands as appcmds
from meta import LionCog, LionBot, LionContext
from meta.logger import log_wrap
from meta.sharding import THIS_SHARD
from meta.monitor import ComponentMonitor, ComponentStatus, StatusLevel
from utils.lib import utc_now
from wards import low_management_ward
@@ -42,12 +43,25 @@ class TimerCog(LionCog):
self.bot = bot
self.data = bot.db.load_registry(TimerData())
self.settings = TimerSettings()
self.monitor = ComponentMonitor('TimerCog', self._monitor)
self.timer_options = TimerOptions()
self.ready = False
self.timers = defaultdict(dict)
async def _monitor(self):
if not self.ready:
level = StatusLevel.STARTING
info = "(STARTING) Not ready. {timers} timers loaded."
else:
level = StatusLevel.OKAY
info = "(OK) {timers} timers loaded."
data = dict(timers=len(self.timers))
return ComponentStatus(level, info, info, data)
async def cog_load(self):
self.bot.system_monitor.add_component(self.monitor)
await self.data.init()
self.bot.core.guild_config.register_model_setting(self.settings.PomodoroChannel)

View File

@@ -13,6 +13,7 @@ from meta import LionCog, LionBot, LionContext
from meta.logger import log_wrap
from meta.errors import UserInputError, ResponseTimedOut
from meta.sharding import THIS_SHARD
from meta.monitor import ComponentMonitor, ComponentStatus, StatusLevel
from utils.lib import utc_now, error_embed
from utils.ui import Confirm
from utils.data import MULTIVALUE_IN, MEMBERS
@@ -38,6 +39,10 @@ class ScheduleCog(LionCog):
self.bot = bot
self.data: ScheduleData = bot.db.load_registry(ScheduleData())
self.settings = ScheduleSettings()
self.monitor = ComponentMonitor(
'ScheduleCog',
self._monitor
)
# Whether we are ready to take events
self.initialised = asyncio.Event()
@@ -57,12 +62,56 @@ class ScheduleCog(LionCog):
self.session_channels = self.settings.SessionChannels._cache
async def _monitor(self):
nowid = self.nowid
now = None
now_lock = self.slotlock(nowid)
if not self.initialised.is_set():
level = StatusLevel.STARTING
info = (
"(STARTING) "
"Not ready. "
"Spawn task is {spawn}. "
"Spawn lock is {spawn_lock}. "
"Active slots {active}."
)
elif nowid not in self.active_slots:
level = StatusLevel.UNSURE
info = (
"(UNSURE) "
"Setup, but current slotid {nowid} not active. "
"Spawn task is {spawn}. "
"Spawn lock is {spawn_lock}. "
"Now lock is {now_lock}. "
"Active slots {active}."
)
else:
now = self.active_slots[nowid]
level = StatusLevel.OKAY
info = (
"(OK) "
"Running current slot {now}. "
"Spawn lock is {spawn_lock}. "
"Now lock is {now_lock}. "
"Active slots {active}."
)
data = {
'spawn': self.spawn_task,
'spawn_lock': self.spawn_lock,
'active': self.active_slots,
'nowid': nowid,
'now_lock': now_lock,
'now': now,
}
return ComponentStatus(level, info, info, data)
@property
def nowid(self):
now = utc_now()
return time_to_slotid(now)
async def cog_load(self):
self.bot.system_monitor.add_component(self.monitor)
await self.data.init()
# Update the session channel cache

View File

@@ -186,6 +186,17 @@ def mk_print(fp: io.StringIO) -> Callable[..., None]:
return _print
def mk_status_printer(bot, printer):
async def _status(details=False):
if details:
status = await bot.system_monitor.get_overview()
else:
status = await bot.system_monitor.get_summary()
printer(status)
return status
return _status
@log_wrap(action="Code Exec")
async def _async(to_eval: str, style='exec'):
newline = '\n' * ('\n' in to_eval)
@@ -202,6 +213,7 @@ async def _async(to_eval: str, style='exec'):
scope['ctx'] = ctx = context.get()
scope['bot'] = ctx_bot.get()
scope['print'] = _print # type: ignore
scope['print_status'] = mk_status_printer(scope['bot'], _print)
try:
if ctx and ctx.message:
@@ -297,7 +309,7 @@ class Exec(LionCog):
file = discord.File(fp, filename=f"output-{target}.md")
await ctx.reply(file=file)
elif result:
await ctx.reply(f"```md{result}```")
await ctx.reply(f"```md\n{result}```")
else:
await ctx.reply("Command completed, and had no output.")
else:
@@ -351,7 +363,7 @@ class Exec(LionCog):
except asyncio.TimeoutError:
return
if ctx.interaction:
await ctx.interaction.response.defer(thinking=True, ephemeral=True)
await ctx.interaction.response.defer(thinking=True)
if target is not None:
if target not in shard_talk.peers:
embed = discord.Embed(description=f"Unknown peer {target}", colour=discord.Colour.red())
@@ -376,7 +388,7 @@ class Exec(LionCog):
await ctx.reply(file=file)
else:
# Send as message
await ctx.reply(f"```md\n{output}```", ephemeral=True)
await ctx.reply(f"```md\n{output}```")
asyncall_cmd.autocomplete('target')(_peer_acmpl)