feat(meta): Add a system status reporter.
This commit is contained in:
23
src/bot.py
23
src/bot.py
@@ -9,6 +9,7 @@ from meta import LionBot, conf, sharding, appname, shard_talk
|
|||||||
from meta.app import shardname
|
from meta.app import shardname
|
||||||
from meta.logger import log_context, log_action_stack, setup_main_logger
|
from meta.logger import log_context, log_action_stack, setup_main_logger
|
||||||
from meta.context import ctx_bot
|
from meta.context import ctx_bot
|
||||||
|
from meta.monitor import ComponentMonitor, StatusLevel, ComponentStatus
|
||||||
|
|
||||||
from data import Database
|
from data import Database
|
||||||
|
|
||||||
@@ -29,6 +30,25 @@ logger = logging.getLogger(__name__)
|
|||||||
db = Database(conf.data['args'])
|
db = Database(conf.data['args'])
|
||||||
|
|
||||||
|
|
||||||
|
async def _data_monitor() -> ComponentStatus:
|
||||||
|
"""
|
||||||
|
Component monitor callback for the database.
|
||||||
|
"""
|
||||||
|
data = {
|
||||||
|
'stats': str(db.pool.get_stats())
|
||||||
|
}
|
||||||
|
if not db.pool._opened:
|
||||||
|
level = StatusLevel.WAITING
|
||||||
|
info = "(WAITING) Database Pool is not opened."
|
||||||
|
elif db.pool._closed:
|
||||||
|
level = StatusLevel.ERRORED
|
||||||
|
info = "(ERROR) Database Pool is closed."
|
||||||
|
else:
|
||||||
|
level = StatusLevel.OKAY
|
||||||
|
info = "(OK) Database Pool statistics: {stats}"
|
||||||
|
return ComponentStatus(level, info, info, data)
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
log_action_stack.set(("Initialising",))
|
log_action_stack.set(("Initialising",))
|
||||||
logger.info("Initialising StudyLion")
|
logger.info("Initialising StudyLion")
|
||||||
@@ -73,6 +93,9 @@ async def main():
|
|||||||
chunk_guilds_at_startup=False,
|
chunk_guilds_at_startup=False,
|
||||||
) as lionbot:
|
) as lionbot:
|
||||||
ctx_bot.set(lionbot)
|
ctx_bot.set(lionbot)
|
||||||
|
lionbot.system_monitor.add_component(
|
||||||
|
ComponentMonitor('Database', _data_monitor)
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
log_context.set(f"APP: {appname}")
|
log_context.set(f"APP: {appname}")
|
||||||
logger.info("StudyLion initialised, starting!", extra={'action': 'Starting'})
|
logger.info("StudyLion initialised, starting!", extra={'action': 'Starting'})
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ from .context import context
|
|||||||
from .LionContext import LionContext
|
from .LionContext import LionContext
|
||||||
from .LionTree import LionTree
|
from .LionTree import LionTree
|
||||||
from .errors import HandledException, SafeCancellation
|
from .errors import HandledException, SafeCancellation
|
||||||
|
from .monitor import SystemMonitor, ComponentMonitor, StatusLevel, ComponentStatus
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from core import CoreCog
|
from core import CoreCog
|
||||||
@@ -48,9 +49,40 @@ class LionBot(Bot):
|
|||||||
self.core: Optional['CoreCog'] = None
|
self.core: Optional['CoreCog'] = None
|
||||||
self.translator = translator
|
self.translator = translator
|
||||||
|
|
||||||
|
self.system_monitor = SystemMonitor()
|
||||||
|
self.monitor = ComponentMonitor('LionBot', self._monitor_status)
|
||||||
|
self.system_monitor.add_component(self.monitor)
|
||||||
|
|
||||||
self._locks = WeakValueDictionary()
|
self._locks = WeakValueDictionary()
|
||||||
self._running_events = set()
|
self._running_events = set()
|
||||||
|
|
||||||
|
async def _monitor_status(self):
|
||||||
|
if self.is_closed():
|
||||||
|
level = StatusLevel.ERRORED
|
||||||
|
info = "(ERROR) Websocket is closed"
|
||||||
|
data = {}
|
||||||
|
elif self.is_ws_ratelimited():
|
||||||
|
level = StatusLevel.WAITING
|
||||||
|
info = "(WAITING) Websocket is ratelimited"
|
||||||
|
data = {}
|
||||||
|
elif not self.is_ready():
|
||||||
|
level = StatusLevel.STARTING
|
||||||
|
info = "(STARTING) Not yet ready"
|
||||||
|
data = {}
|
||||||
|
else:
|
||||||
|
level = StatusLevel.OKAY
|
||||||
|
info = (
|
||||||
|
"(OK) "
|
||||||
|
"Logged in with {guild_count} guilds, "
|
||||||
|
", websocket latency {latency}, and {events} running events."
|
||||||
|
)
|
||||||
|
data = {
|
||||||
|
'guild_count': len(self.guilds),
|
||||||
|
'latency': self.latency,
|
||||||
|
'events': len(self._running_events),
|
||||||
|
}
|
||||||
|
return ComponentStatus(level, info, info, data)
|
||||||
|
|
||||||
async def setup_hook(self) -> None:
|
async def setup_hook(self) -> None:
|
||||||
log_context.set(f"APP: {self.application_id}")
|
log_context.set(f"APP: {self.application_id}")
|
||||||
await self.app_ipc.connect()
|
await self.app_ipc.connect()
|
||||||
|
|||||||
139
src/meta/monitor.py
Normal file
139
src/meta/monitor.py
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
import logging
|
||||||
|
import asyncio
|
||||||
|
from enum import IntEnum
|
||||||
|
from collections import deque, ChainMap
|
||||||
|
import datetime as dt
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class StatusLevel(IntEnum):
|
||||||
|
ERRORED = -2
|
||||||
|
UNSURE = -1
|
||||||
|
WAITING = 0
|
||||||
|
STARTING = 1
|
||||||
|
OKAY = 2
|
||||||
|
|
||||||
|
@property
|
||||||
|
def symbol(self):
|
||||||
|
return symbols[self]
|
||||||
|
|
||||||
|
|
||||||
|
symbols = {
|
||||||
|
StatusLevel.ERRORED: '🟥',
|
||||||
|
StatusLevel.UNSURE: '🟧',
|
||||||
|
StatusLevel.WAITING: '⬜',
|
||||||
|
StatusLevel.STARTING: '🟫',
|
||||||
|
StatusLevel.OKAY: '🟩',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ComponentStatus:
|
||||||
|
def __init__(self, level: StatusLevel, short_formatstr: str, long_formatstr: str, data: dict = {}):
|
||||||
|
self.level = level
|
||||||
|
self.short_formatstr = short_formatstr
|
||||||
|
self.long_formatstr = long_formatstr
|
||||||
|
self.data = data
|
||||||
|
self.created_at = dt.datetime.now(tz=dt.timezone.utc)
|
||||||
|
|
||||||
|
def format_args(self):
|
||||||
|
extra = {
|
||||||
|
'created_at': self.created_at,
|
||||||
|
'level': self.level,
|
||||||
|
'symbol': self.level.symbol,
|
||||||
|
}
|
||||||
|
return ChainMap(extra, self.data)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def short(self):
|
||||||
|
return self.short_formatstr.format(**self.format_args())
|
||||||
|
|
||||||
|
@property
|
||||||
|
def long(self):
|
||||||
|
return self.long_formatstr.format(**self.format_args())
|
||||||
|
|
||||||
|
|
||||||
|
class ComponentMonitor:
|
||||||
|
_name = None
|
||||||
|
|
||||||
|
def __init__(self, name=None, callback=None):
|
||||||
|
self._callback = callback
|
||||||
|
self.name = name or self._name
|
||||||
|
if not self.name:
|
||||||
|
raise ValueError("ComponentMonitor must have a name")
|
||||||
|
|
||||||
|
async def _make_status(self, *args, **kwargs):
|
||||||
|
if self._callback is not None:
|
||||||
|
return await self._callback(*args, **kwargs)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
async def status(self) -> ComponentStatus:
|
||||||
|
try:
|
||||||
|
status = await self._make_status()
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(
|
||||||
|
f"Status callback for component '{self.name}' failed. This should not happen."
|
||||||
|
)
|
||||||
|
status = ComponentStatus(
|
||||||
|
level=StatusLevel.UNSURE,
|
||||||
|
short_formatstr="Status callback for '{name}' failed with error '{error}'",
|
||||||
|
long_formatstr="Status callback for '{name}' failed with error '{error}'",
|
||||||
|
data={
|
||||||
|
'name': self.name,
|
||||||
|
'error': repr(e)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return status
|
||||||
|
|
||||||
|
|
||||||
|
class SystemMonitor:
|
||||||
|
def __init__(self):
|
||||||
|
self.components = {}
|
||||||
|
self.recent = deque(maxlen=10)
|
||||||
|
|
||||||
|
def add_component(self, component: ComponentMonitor):
|
||||||
|
self.components[component.name] = component
|
||||||
|
return component
|
||||||
|
|
||||||
|
async def request(self):
|
||||||
|
"""
|
||||||
|
Request status from each component.
|
||||||
|
"""
|
||||||
|
tasks = {
|
||||||
|
name: asyncio.create_task(comp.status())
|
||||||
|
for name, comp in self.components.items()
|
||||||
|
}
|
||||||
|
await asyncio.gather(*tasks.values())
|
||||||
|
status = {
|
||||||
|
name: await fut for name, fut in tasks.items()
|
||||||
|
}
|
||||||
|
self.recent.append(status)
|
||||||
|
return status
|
||||||
|
|
||||||
|
async def _format_summary(self, status_dict: dict[str, ComponentStatus]):
|
||||||
|
"""
|
||||||
|
Format a one line summary from a status dict.
|
||||||
|
"""
|
||||||
|
freq = {level: 0 for level in StatusLevel}
|
||||||
|
for status in status_dict.values():
|
||||||
|
freq[status.level] += 1
|
||||||
|
|
||||||
|
summary = '\t'.join(f"{level.symbol} {count}" for level, count in freq.items() if count)
|
||||||
|
return summary
|
||||||
|
|
||||||
|
async def _format_overview(self, status_dict: dict[str, ComponentStatus]):
|
||||||
|
"""
|
||||||
|
Format an overview (one line per component) from a status dict.
|
||||||
|
"""
|
||||||
|
lines = []
|
||||||
|
for name, status in status_dict.items():
|
||||||
|
lines.append(f"{status.level.symbol} {name}: {status.short}")
|
||||||
|
summary = await self._format_summary(status_dict)
|
||||||
|
return '\n'.join((summary, *lines))
|
||||||
|
|
||||||
|
async def get_summary(self):
|
||||||
|
return await self._format_summary(await self.request())
|
||||||
|
|
||||||
|
async def get_overview(self):
|
||||||
|
return await self._format_overview(await self.request())
|
||||||
@@ -10,6 +10,7 @@ from discord import app_commands as appcmds
|
|||||||
from meta import LionCog, LionBot, LionContext
|
from meta import LionCog, LionBot, LionContext
|
||||||
from meta.logger import log_wrap
|
from meta.logger import log_wrap
|
||||||
from meta.sharding import THIS_SHARD
|
from meta.sharding import THIS_SHARD
|
||||||
|
from meta.monitor import ComponentMonitor, ComponentStatus, StatusLevel
|
||||||
from utils.lib import utc_now
|
from utils.lib import utc_now
|
||||||
|
|
||||||
from wards import low_management_ward
|
from wards import low_management_ward
|
||||||
@@ -42,12 +43,25 @@ class TimerCog(LionCog):
|
|||||||
self.bot = bot
|
self.bot = bot
|
||||||
self.data = bot.db.load_registry(TimerData())
|
self.data = bot.db.load_registry(TimerData())
|
||||||
self.settings = TimerSettings()
|
self.settings = TimerSettings()
|
||||||
|
self.monitor = ComponentMonitor('TimerCog', self._monitor)
|
||||||
|
|
||||||
self.timer_options = TimerOptions()
|
self.timer_options = TimerOptions()
|
||||||
|
|
||||||
self.ready = False
|
self.ready = False
|
||||||
self.timers = defaultdict(dict)
|
self.timers = defaultdict(dict)
|
||||||
|
|
||||||
|
async def _monitor(self):
|
||||||
|
if not self.ready:
|
||||||
|
level = StatusLevel.STARTING
|
||||||
|
info = "(STARTING) Not ready. {timers} timers loaded."
|
||||||
|
else:
|
||||||
|
level = StatusLevel.OKAY
|
||||||
|
info = "(OK) {timers} timers loaded."
|
||||||
|
data = dict(timers=len(self.timers))
|
||||||
|
return ComponentStatus(level, info, info, data)
|
||||||
|
|
||||||
async def cog_load(self):
|
async def cog_load(self):
|
||||||
|
self.bot.system_monitor.add_component(self.monitor)
|
||||||
await self.data.init()
|
await self.data.init()
|
||||||
|
|
||||||
self.bot.core.guild_config.register_model_setting(self.settings.PomodoroChannel)
|
self.bot.core.guild_config.register_model_setting(self.settings.PomodoroChannel)
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ from meta import LionCog, LionBot, LionContext
|
|||||||
from meta.logger import log_wrap
|
from meta.logger import log_wrap
|
||||||
from meta.errors import UserInputError, ResponseTimedOut
|
from meta.errors import UserInputError, ResponseTimedOut
|
||||||
from meta.sharding import THIS_SHARD
|
from meta.sharding import THIS_SHARD
|
||||||
|
from meta.monitor import ComponentMonitor, ComponentStatus, StatusLevel
|
||||||
from utils.lib import utc_now, error_embed
|
from utils.lib import utc_now, error_embed
|
||||||
from utils.ui import Confirm
|
from utils.ui import Confirm
|
||||||
from utils.data import MULTIVALUE_IN, MEMBERS
|
from utils.data import MULTIVALUE_IN, MEMBERS
|
||||||
@@ -38,6 +39,10 @@ class ScheduleCog(LionCog):
|
|||||||
self.bot = bot
|
self.bot = bot
|
||||||
self.data: ScheduleData = bot.db.load_registry(ScheduleData())
|
self.data: ScheduleData = bot.db.load_registry(ScheduleData())
|
||||||
self.settings = ScheduleSettings()
|
self.settings = ScheduleSettings()
|
||||||
|
self.monitor = ComponentMonitor(
|
||||||
|
'ScheduleCog',
|
||||||
|
self._monitor
|
||||||
|
)
|
||||||
|
|
||||||
# Whether we are ready to take events
|
# Whether we are ready to take events
|
||||||
self.initialised = asyncio.Event()
|
self.initialised = asyncio.Event()
|
||||||
@@ -57,12 +62,56 @@ class ScheduleCog(LionCog):
|
|||||||
|
|
||||||
self.session_channels = self.settings.SessionChannels._cache
|
self.session_channels = self.settings.SessionChannels._cache
|
||||||
|
|
||||||
|
async def _monitor(self):
|
||||||
|
nowid = self.nowid
|
||||||
|
now = None
|
||||||
|
now_lock = self.slotlock(nowid)
|
||||||
|
if not self.initialised.is_set():
|
||||||
|
level = StatusLevel.STARTING
|
||||||
|
info = (
|
||||||
|
"(STARTING) "
|
||||||
|
"Not ready. "
|
||||||
|
"Spawn task is {spawn}. "
|
||||||
|
"Spawn lock is {spawn_lock}. "
|
||||||
|
"Active slots {active}."
|
||||||
|
)
|
||||||
|
elif nowid not in self.active_slots:
|
||||||
|
level = StatusLevel.UNSURE
|
||||||
|
info = (
|
||||||
|
"(UNSURE) "
|
||||||
|
"Setup, but current slotid {nowid} not active. "
|
||||||
|
"Spawn task is {spawn}. "
|
||||||
|
"Spawn lock is {spawn_lock}. "
|
||||||
|
"Now lock is {now_lock}. "
|
||||||
|
"Active slots {active}."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
now = self.active_slots[nowid]
|
||||||
|
level = StatusLevel.OKAY
|
||||||
|
info = (
|
||||||
|
"(OK) "
|
||||||
|
"Running current slot {now}. "
|
||||||
|
"Spawn lock is {spawn_lock}. "
|
||||||
|
"Now lock is {now_lock}. "
|
||||||
|
"Active slots {active}."
|
||||||
|
)
|
||||||
|
data = {
|
||||||
|
'spawn': self.spawn_task,
|
||||||
|
'spawn_lock': self.spawn_lock,
|
||||||
|
'active': self.active_slots,
|
||||||
|
'nowid': nowid,
|
||||||
|
'now_lock': now_lock,
|
||||||
|
'now': now,
|
||||||
|
}
|
||||||
|
return ComponentStatus(level, info, info, data)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def nowid(self):
|
def nowid(self):
|
||||||
now = utc_now()
|
now = utc_now()
|
||||||
return time_to_slotid(now)
|
return time_to_slotid(now)
|
||||||
|
|
||||||
async def cog_load(self):
|
async def cog_load(self):
|
||||||
|
self.bot.system_monitor.add_component(self.monitor)
|
||||||
await self.data.init()
|
await self.data.init()
|
||||||
|
|
||||||
# Update the session channel cache
|
# Update the session channel cache
|
||||||
|
|||||||
@@ -186,6 +186,17 @@ def mk_print(fp: io.StringIO) -> Callable[..., None]:
|
|||||||
return _print
|
return _print
|
||||||
|
|
||||||
|
|
||||||
|
def mk_status_printer(bot, printer):
|
||||||
|
async def _status(details=False):
|
||||||
|
if details:
|
||||||
|
status = await bot.system_monitor.get_overview()
|
||||||
|
else:
|
||||||
|
status = await bot.system_monitor.get_summary()
|
||||||
|
printer(status)
|
||||||
|
return status
|
||||||
|
return _status
|
||||||
|
|
||||||
|
|
||||||
@log_wrap(action="Code Exec")
|
@log_wrap(action="Code Exec")
|
||||||
async def _async(to_eval: str, style='exec'):
|
async def _async(to_eval: str, style='exec'):
|
||||||
newline = '\n' * ('\n' in to_eval)
|
newline = '\n' * ('\n' in to_eval)
|
||||||
@@ -202,6 +213,7 @@ async def _async(to_eval: str, style='exec'):
|
|||||||
scope['ctx'] = ctx = context.get()
|
scope['ctx'] = ctx = context.get()
|
||||||
scope['bot'] = ctx_bot.get()
|
scope['bot'] = ctx_bot.get()
|
||||||
scope['print'] = _print # type: ignore
|
scope['print'] = _print # type: ignore
|
||||||
|
scope['print_status'] = mk_status_printer(scope['bot'], _print)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if ctx and ctx.message:
|
if ctx and ctx.message:
|
||||||
@@ -297,7 +309,7 @@ class Exec(LionCog):
|
|||||||
file = discord.File(fp, filename=f"output-{target}.md")
|
file = discord.File(fp, filename=f"output-{target}.md")
|
||||||
await ctx.reply(file=file)
|
await ctx.reply(file=file)
|
||||||
elif result:
|
elif result:
|
||||||
await ctx.reply(f"```md{result}```")
|
await ctx.reply(f"```md\n{result}```")
|
||||||
else:
|
else:
|
||||||
await ctx.reply("Command completed, and had no output.")
|
await ctx.reply("Command completed, and had no output.")
|
||||||
else:
|
else:
|
||||||
@@ -351,7 +363,7 @@ class Exec(LionCog):
|
|||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
return
|
return
|
||||||
if ctx.interaction:
|
if ctx.interaction:
|
||||||
await ctx.interaction.response.defer(thinking=True, ephemeral=True)
|
await ctx.interaction.response.defer(thinking=True)
|
||||||
if target is not None:
|
if target is not None:
|
||||||
if target not in shard_talk.peers:
|
if target not in shard_talk.peers:
|
||||||
embed = discord.Embed(description=f"Unknown peer {target}", colour=discord.Colour.red())
|
embed = discord.Embed(description=f"Unknown peer {target}", colour=discord.Colour.red())
|
||||||
@@ -376,7 +388,7 @@ class Exec(LionCog):
|
|||||||
await ctx.reply(file=file)
|
await ctx.reply(file=file)
|
||||||
else:
|
else:
|
||||||
# Send as message
|
# Send as message
|
||||||
await ctx.reply(f"```md\n{output}```", ephemeral=True)
|
await ctx.reply(f"```md\n{output}```")
|
||||||
|
|
||||||
asyncall_cmd.autocomplete('target')(_peer_acmpl)
|
asyncall_cmd.autocomplete('target')(_peer_acmpl)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user