feat(meta): Add a system status reporter.

This commit is contained in:
2023-09-22 08:38:59 +03:00
parent ff0bb17d29
commit 970e652fdc
6 changed files with 272 additions and 3 deletions

View File

@@ -9,6 +9,7 @@ from meta import LionBot, conf, sharding, appname, shard_talk
from meta.app import shardname from meta.app import shardname
from meta.logger import log_context, log_action_stack, setup_main_logger from meta.logger import log_context, log_action_stack, setup_main_logger
from meta.context import ctx_bot from meta.context import ctx_bot
from meta.monitor import ComponentMonitor, StatusLevel, ComponentStatus
from data import Database from data import Database
@@ -29,6 +30,25 @@ logger = logging.getLogger(__name__)
db = Database(conf.data['args']) db = Database(conf.data['args'])
async def _data_monitor() -> ComponentStatus:
"""
Component monitor callback for the database.
"""
data = {
'stats': str(db.pool.get_stats())
}
if not db.pool._opened:
level = StatusLevel.WAITING
info = "(WAITING) Database Pool is not opened."
elif db.pool._closed:
level = StatusLevel.ERRORED
info = "(ERROR) Database Pool is closed."
else:
level = StatusLevel.OKAY
info = "(OK) Database Pool statistics: {stats}"
return ComponentStatus(level, info, info, data)
async def main(): async def main():
log_action_stack.set(("Initialising",)) log_action_stack.set(("Initialising",))
logger.info("Initialising StudyLion") logger.info("Initialising StudyLion")
@@ -73,6 +93,9 @@ async def main():
chunk_guilds_at_startup=False, chunk_guilds_at_startup=False,
) as lionbot: ) as lionbot:
ctx_bot.set(lionbot) ctx_bot.set(lionbot)
lionbot.system_monitor.add_component(
ComponentMonitor('Database', _data_monitor)
)
try: try:
log_context.set(f"APP: {appname}") log_context.set(f"APP: {appname}")
logger.info("StudyLion initialised, starting!", extra={'action': 'Starting'}) logger.info("StudyLion initialised, starting!", extra={'action': 'Starting'})

View File

@@ -21,6 +21,7 @@ from .context import context
from .LionContext import LionContext from .LionContext import LionContext
from .LionTree import LionTree from .LionTree import LionTree
from .errors import HandledException, SafeCancellation from .errors import HandledException, SafeCancellation
from .monitor import SystemMonitor, ComponentMonitor, StatusLevel, ComponentStatus
if TYPE_CHECKING: if TYPE_CHECKING:
from core import CoreCog from core import CoreCog
@@ -48,9 +49,40 @@ class LionBot(Bot):
self.core: Optional['CoreCog'] = None self.core: Optional['CoreCog'] = None
self.translator = translator self.translator = translator
self.system_monitor = SystemMonitor()
self.monitor = ComponentMonitor('LionBot', self._monitor_status)
self.system_monitor.add_component(self.monitor)
self._locks = WeakValueDictionary() self._locks = WeakValueDictionary()
self._running_events = set() self._running_events = set()
async def _monitor_status(self):
if self.is_closed():
level = StatusLevel.ERRORED
info = "(ERROR) Websocket is closed"
data = {}
elif self.is_ws_ratelimited():
level = StatusLevel.WAITING
info = "(WAITING) Websocket is ratelimited"
data = {}
elif not self.is_ready():
level = StatusLevel.STARTING
info = "(STARTING) Not yet ready"
data = {}
else:
level = StatusLevel.OKAY
info = (
"(OK) "
"Logged in with {guild_count} guilds, "
", websocket latency {latency}, and {events} running events."
)
data = {
'guild_count': len(self.guilds),
'latency': self.latency,
'events': len(self._running_events),
}
return ComponentStatus(level, info, info, data)
async def setup_hook(self) -> None: async def setup_hook(self) -> None:
log_context.set(f"APP: {self.application_id}") log_context.set(f"APP: {self.application_id}")
await self.app_ipc.connect() await self.app_ipc.connect()

139
src/meta/monitor.py Normal file
View File

@@ -0,0 +1,139 @@
import logging
import asyncio
from enum import IntEnum
from collections import deque, ChainMap
import datetime as dt
logger = logging.getLogger(__name__)
class StatusLevel(IntEnum):
ERRORED = -2
UNSURE = -1
WAITING = 0
STARTING = 1
OKAY = 2
@property
def symbol(self):
return symbols[self]
symbols = {
StatusLevel.ERRORED: '🟥',
StatusLevel.UNSURE: '🟧',
StatusLevel.WAITING: '',
StatusLevel.STARTING: '🟫',
StatusLevel.OKAY: '🟩',
}
class ComponentStatus:
def __init__(self, level: StatusLevel, short_formatstr: str, long_formatstr: str, data: dict = {}):
self.level = level
self.short_formatstr = short_formatstr
self.long_formatstr = long_formatstr
self.data = data
self.created_at = dt.datetime.now(tz=dt.timezone.utc)
def format_args(self):
extra = {
'created_at': self.created_at,
'level': self.level,
'symbol': self.level.symbol,
}
return ChainMap(extra, self.data)
@property
def short(self):
return self.short_formatstr.format(**self.format_args())
@property
def long(self):
return self.long_formatstr.format(**self.format_args())
class ComponentMonitor:
_name = None
def __init__(self, name=None, callback=None):
self._callback = callback
self.name = name or self._name
if not self.name:
raise ValueError("ComponentMonitor must have a name")
async def _make_status(self, *args, **kwargs):
if self._callback is not None:
return await self._callback(*args, **kwargs)
else:
raise NotImplementedError
async def status(self) -> ComponentStatus:
try:
status = await self._make_status()
except Exception as e:
logger.exception(
f"Status callback for component '{self.name}' failed. This should not happen."
)
status = ComponentStatus(
level=StatusLevel.UNSURE,
short_formatstr="Status callback for '{name}' failed with error '{error}'",
long_formatstr="Status callback for '{name}' failed with error '{error}'",
data={
'name': self.name,
'error': repr(e)
}
)
return status
class SystemMonitor:
def __init__(self):
self.components = {}
self.recent = deque(maxlen=10)
def add_component(self, component: ComponentMonitor):
self.components[component.name] = component
return component
async def request(self):
"""
Request status from each component.
"""
tasks = {
name: asyncio.create_task(comp.status())
for name, comp in self.components.items()
}
await asyncio.gather(*tasks.values())
status = {
name: await fut for name, fut in tasks.items()
}
self.recent.append(status)
return status
async def _format_summary(self, status_dict: dict[str, ComponentStatus]):
"""
Format a one line summary from a status dict.
"""
freq = {level: 0 for level in StatusLevel}
for status in status_dict.values():
freq[status.level] += 1
summary = '\t'.join(f"{level.symbol} {count}" for level, count in freq.items() if count)
return summary
async def _format_overview(self, status_dict: dict[str, ComponentStatus]):
"""
Format an overview (one line per component) from a status dict.
"""
lines = []
for name, status in status_dict.items():
lines.append(f"{status.level.symbol} {name}: {status.short}")
summary = await self._format_summary(status_dict)
return '\n'.join((summary, *lines))
async def get_summary(self):
return await self._format_summary(await self.request())
async def get_overview(self):
return await self._format_overview(await self.request())

View File

@@ -10,6 +10,7 @@ from discord import app_commands as appcmds
from meta import LionCog, LionBot, LionContext from meta import LionCog, LionBot, LionContext
from meta.logger import log_wrap from meta.logger import log_wrap
from meta.sharding import THIS_SHARD from meta.sharding import THIS_SHARD
from meta.monitor import ComponentMonitor, ComponentStatus, StatusLevel
from utils.lib import utc_now from utils.lib import utc_now
from wards import low_management_ward from wards import low_management_ward
@@ -42,12 +43,25 @@ class TimerCog(LionCog):
self.bot = bot self.bot = bot
self.data = bot.db.load_registry(TimerData()) self.data = bot.db.load_registry(TimerData())
self.settings = TimerSettings() self.settings = TimerSettings()
self.monitor = ComponentMonitor('TimerCog', self._monitor)
self.timer_options = TimerOptions() self.timer_options = TimerOptions()
self.ready = False self.ready = False
self.timers = defaultdict(dict) self.timers = defaultdict(dict)
async def _monitor(self):
if not self.ready:
level = StatusLevel.STARTING
info = "(STARTING) Not ready. {timers} timers loaded."
else:
level = StatusLevel.OKAY
info = "(OK) {timers} timers loaded."
data = dict(timers=len(self.timers))
return ComponentStatus(level, info, info, data)
async def cog_load(self): async def cog_load(self):
self.bot.system_monitor.add_component(self.monitor)
await self.data.init() await self.data.init()
self.bot.core.guild_config.register_model_setting(self.settings.PomodoroChannel) self.bot.core.guild_config.register_model_setting(self.settings.PomodoroChannel)

View File

@@ -13,6 +13,7 @@ from meta import LionCog, LionBot, LionContext
from meta.logger import log_wrap from meta.logger import log_wrap
from meta.errors import UserInputError, ResponseTimedOut from meta.errors import UserInputError, ResponseTimedOut
from meta.sharding import THIS_SHARD from meta.sharding import THIS_SHARD
from meta.monitor import ComponentMonitor, ComponentStatus, StatusLevel
from utils.lib import utc_now, error_embed from utils.lib import utc_now, error_embed
from utils.ui import Confirm from utils.ui import Confirm
from utils.data import MULTIVALUE_IN, MEMBERS from utils.data import MULTIVALUE_IN, MEMBERS
@@ -38,6 +39,10 @@ class ScheduleCog(LionCog):
self.bot = bot self.bot = bot
self.data: ScheduleData = bot.db.load_registry(ScheduleData()) self.data: ScheduleData = bot.db.load_registry(ScheduleData())
self.settings = ScheduleSettings() self.settings = ScheduleSettings()
self.monitor = ComponentMonitor(
'ScheduleCog',
self._monitor
)
# Whether we are ready to take events # Whether we are ready to take events
self.initialised = asyncio.Event() self.initialised = asyncio.Event()
@@ -57,12 +62,56 @@ class ScheduleCog(LionCog):
self.session_channels = self.settings.SessionChannels._cache self.session_channels = self.settings.SessionChannels._cache
async def _monitor(self):
nowid = self.nowid
now = None
now_lock = self.slotlock(nowid)
if not self.initialised.is_set():
level = StatusLevel.STARTING
info = (
"(STARTING) "
"Not ready. "
"Spawn task is {spawn}. "
"Spawn lock is {spawn_lock}. "
"Active slots {active}."
)
elif nowid not in self.active_slots:
level = StatusLevel.UNSURE
info = (
"(UNSURE) "
"Setup, but current slotid {nowid} not active. "
"Spawn task is {spawn}. "
"Spawn lock is {spawn_lock}. "
"Now lock is {now_lock}. "
"Active slots {active}."
)
else:
now = self.active_slots[nowid]
level = StatusLevel.OKAY
info = (
"(OK) "
"Running current slot {now}. "
"Spawn lock is {spawn_lock}. "
"Now lock is {now_lock}. "
"Active slots {active}."
)
data = {
'spawn': self.spawn_task,
'spawn_lock': self.spawn_lock,
'active': self.active_slots,
'nowid': nowid,
'now_lock': now_lock,
'now': now,
}
return ComponentStatus(level, info, info, data)
@property @property
def nowid(self): def nowid(self):
now = utc_now() now = utc_now()
return time_to_slotid(now) return time_to_slotid(now)
async def cog_load(self): async def cog_load(self):
self.bot.system_monitor.add_component(self.monitor)
await self.data.init() await self.data.init()
# Update the session channel cache # Update the session channel cache

View File

@@ -186,6 +186,17 @@ def mk_print(fp: io.StringIO) -> Callable[..., None]:
return _print return _print
def mk_status_printer(bot, printer):
async def _status(details=False):
if details:
status = await bot.system_monitor.get_overview()
else:
status = await bot.system_monitor.get_summary()
printer(status)
return status
return _status
@log_wrap(action="Code Exec") @log_wrap(action="Code Exec")
async def _async(to_eval: str, style='exec'): async def _async(to_eval: str, style='exec'):
newline = '\n' * ('\n' in to_eval) newline = '\n' * ('\n' in to_eval)
@@ -202,6 +213,7 @@ async def _async(to_eval: str, style='exec'):
scope['ctx'] = ctx = context.get() scope['ctx'] = ctx = context.get()
scope['bot'] = ctx_bot.get() scope['bot'] = ctx_bot.get()
scope['print'] = _print # type: ignore scope['print'] = _print # type: ignore
scope['print_status'] = mk_status_printer(scope['bot'], _print)
try: try:
if ctx and ctx.message: if ctx and ctx.message:
@@ -297,7 +309,7 @@ class Exec(LionCog):
file = discord.File(fp, filename=f"output-{target}.md") file = discord.File(fp, filename=f"output-{target}.md")
await ctx.reply(file=file) await ctx.reply(file=file)
elif result: elif result:
await ctx.reply(f"```md{result}```") await ctx.reply(f"```md\n{result}```")
else: else:
await ctx.reply("Command completed, and had no output.") await ctx.reply("Command completed, and had no output.")
else: else:
@@ -351,7 +363,7 @@ class Exec(LionCog):
except asyncio.TimeoutError: except asyncio.TimeoutError:
return return
if ctx.interaction: if ctx.interaction:
await ctx.interaction.response.defer(thinking=True, ephemeral=True) await ctx.interaction.response.defer(thinking=True)
if target is not None: if target is not None:
if target not in shard_talk.peers: if target not in shard_talk.peers:
embed = discord.Embed(description=f"Unknown peer {target}", colour=discord.Colour.red()) embed = discord.Embed(description=f"Unknown peer {target}", colour=discord.Colour.red())
@@ -376,7 +388,7 @@ class Exec(LionCog):
await ctx.reply(file=file) await ctx.reply(file=file)
else: else:
# Send as message # Send as message
await ctx.reply(f"```md\n{output}```", ephemeral=True) await ctx.reply(f"```md\n{output}```")
asyncall_cmd.autocomplete('target')(_peer_acmpl) asyncall_cmd.autocomplete('target')(_peer_acmpl)