rewrite: Restructure to include GUI.

This commit is contained in:
2022-12-23 06:44:32 +02:00
parent 2b93354248
commit f328324747
224 changed files with 8 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
from .cog import Analytics
async def setup(bot):
await bot.add_cog(Analytics(bot))

177
src/analytics/cog.py Normal file
View File

@@ -0,0 +1,177 @@
import logging
import discord
from discord.ext.commands import Bot, Cog, HybridCommand, HybridCommandError
from discord.ext.commands.errors import CommandInvokeError, CheckFailure
from discord.app_commands.errors import CommandInvokeError as appCommandInvokeError
from meta import LionCog, LionBot, LionContext
from meta.app import shard_talk, appname
from meta.errors import HandledException, SafeCancellation
from meta.logger import log_wrap
from utils.lib import utc_now
from .data import AnalyticsData
from .events import (
CommandStatus, CommandEvent, command_event_handler,
GuildAction, GuildEvent, guild_event_handler,
VoiceAction, VoiceEvent, voice_event_handler
)
from .snapshot import shard_snapshot
logger = logging.getLogger(__name__)
# TODO: Client side might be better handled as a single connection fed by a queue?
# Maybe consider this again after the interactive REPL idea
# Or if it seems like this is giving an absurd amount of traffic
class Analytics(LionCog):
def __init__(self, bot: LionBot):
self.bot = bot
self.data = bot.db.load_registry(AnalyticsData())
self.an_app = bot.config.analytics['appname']
self.talk_command_event = command_event_handler.bind(shard_talk).route
self.talk_guild_event = guild_event_handler.bind(shard_talk).route
self.talk_voice_event = voice_event_handler.bind(shard_talk).route
self.talk_shard_snapshot = shard_talk.register_route()(shard_snapshot)
async def cog_load(self):
await self.data.init()
@LionCog.listener()
@log_wrap(action='AnEvent')
async def on_voice_state_update(self, member, before, after):
if not before.channel and after.channel:
# Member joined channel
action = VoiceAction.JOINED
elif before.channel and not after.channel:
# Member left channel
action = VoiceAction.LEFT
else:
# Member change state, we don't need to deal with that
return
event = VoiceEvent(
appname=appname,
userid=member.id,
guildid=member.guild.id,
action=action,
created_at=utc_now()
)
if self.an_app not in shard_talk.peers:
logger.warning(f"Analytics peer not found, discarding event: {event}")
else:
await self.talk_voice_event(event).send(self.an_app, wait_for_reply=False)
@LionCog.listener()
@log_wrap(action='AnEvent')
async def on_guild_join(self, guild):
"""
Send guild join event.
"""
event = GuildEvent(
appname=appname,
guildid=guild.id,
action=GuildAction.JOINED,
created_at=utc_now()
)
if self.an_app not in shard_talk.peers:
logger.warning(f"Analytics peer not found, discarding event: {event}")
else:
await self.talk_guild_event(event).send(self.an_app, wait_for_reply=False)
@LionCog.listener()
@log_wrap(action='AnEvent')
async def on_guild_remove(self, guild):
"""
Send guild leave event
"""
event = GuildEvent(
appname=appname,
guildid=guild.id,
action=GuildAction.LEFT,
created_at=utc_now()
)
if self.an_app not in shard_talk.peers:
logger.warning(f"Analytics peer not found, discarding event: {event}")
else:
await self.talk_guild_event(event).send(self.an_app, wait_for_reply=False)
@LionCog.listener()
@log_wrap(action='AnEvent')
async def on_command_completion(self, ctx: LionContext):
"""
Send command completed successfully.
"""
duration = utc_now() - ctx.message.created_at
event = CommandEvent(
appname=appname,
cmdname=ctx.command.name if ctx.command else 'Unknown',
cogname=ctx.cog.qualified_name if ctx.cog else None,
userid=ctx.author.id,
created_at=utc_now(),
status=CommandStatus.COMPLETED,
execution_time=duration.total_seconds(),
guildid=ctx.guild.id if ctx.guild else None,
ctxid=ctx.message.id
)
if self.an_app not in shard_talk.peers:
logger.warning(f"Analytics peer not found, discarding event: {event}")
else:
await self.talk_command_event(event).send(self.an_app, wait_for_reply=False)
@LionCog.listener()
@log_wrap(action='AnEvent')
async def on_command_error(self, ctx: LionContext, error):
"""
Send command failed.
"""
duration = utc_now() - ctx.message.created_at
status = CommandStatus.FAILED
err_type = None
try:
err_type = repr(error)
raise error
except (HybridCommandError, CommandInvokeError, appCommandInvokeError):
original = error.original
try:
err_type = repr(original)
if isinstance(original, (HybridCommandError, CommandInvokeError, appCommandInvokeError)):
raise original.original
else:
raise original
except HandledException:
status = CommandStatus.CANCELLED
except SafeCancellation:
status = CommandStatus.CANCELLED
except discord.Forbidden:
status = CommandStatus.CANCELLED
except discord.HTTPException:
status = CommandStatus.CANCELLED
except Exception:
status = CommandStatus.FAILED
except CheckFailure:
status = CommandStatus.CANCELLED
except Exception:
status = CommandStatus.FAILED
event = CommandEvent(
appname=appname,
cmdname=ctx.command.name if ctx.command else 'Unknown',
cogname=ctx.cog.qualified_name if ctx.cog else None,
userid=ctx.author.id,
created_at=utc_now(),
status=status,
error=err_type,
execution_time=duration.total_seconds(),
guildid=ctx.guild.id if ctx.guild else None,
ctxid=ctx.message.id
)
if self.an_app not in shard_talk.peers:
logger.warning(f"Analytics peer not found, discarding event: {event}")
else:
await self.talk_command_event(event).send(self.an_app, wait_for_reply=False)

189
src/analytics/data.py Normal file
View File

@@ -0,0 +1,189 @@
from enum import Enum
from data.registry import Registry
from data.adapted import RegisterEnum
from data.models import RowModel
from data.columns import Integer, String, Timestamp, Column
class CommandStatus(Enum):
"""
Schema
------
CREATE TYPE analytics.CommandStatus AS ENUM(
'COMPLETED',
'CANCELLED'
'FAILED'
);
"""
COMPLETED = ('COMPLETED',)
CANCELLED = ('CANCELLED',)
FAILED = ('FAILED',)
class GuildAction(Enum):
"""
Schema
------
CREATE TYPE analytics.GuildAction AS ENUM(
'JOINED',
'LEFT'
);
"""
JOINED = ('JOINED',)
LEFT = ('LEFT',)
class VoiceAction(Enum):
"""
Schema
------
CREATE TYPE analytics.VoiceAction AS ENUM(
'JOINED',
'LEFT'
);
"""
JOINED = ('JOINED',)
LEFT = ('LEFT',)
class AnalyticsData(Registry, name='analytics'):
CommandStatus = RegisterEnum(CommandStatus, name="analytics.CommandStatus")
GuildAction = RegisterEnum(GuildAction, name="analytics.GuildAction")
VoiceAction = RegisterEnum(VoiceAction, name="analytics.VoiceAction")
class Snapshots(RowModel):
"""
Schema
------
CREATE TABLE analytics.snapshots(
snapshotid SERIAL PRIMARY KEY,
appname TEXT NOT NULL REFERENCES bot_config (appname),
guild_count INTEGER NOT NULL,
member_count INTEGER NOT NULL,
user_count INTEGER NOT NULL,
in_voice INTEGER NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT (now() at time zone 'utc')
);
"""
_schema_ = 'analytics'
_tablename_ = 'snapshots'
snapshotid = Integer(primary=True)
appname = String()
guild_count = Integer()
member_count = Integer()
user_count = Integer()
in_voice = Integer()
created_at = Timestamp()
class Events(RowModel):
"""
Schema
------
CREATE TABLE analytics.events(
eventid SERIAL PRIMARY KEY,
appname TEXT NOT NULL REFERENCES bot_config (appname),
ctxid BIGINT,
guildid BIGINT,
_created_at TIMESTAMPTZ NOT NULL DEFAULT (now() at time zone 'utc')
);
"""
_schema_ = 'analytics'
_tablename_ = 'events'
eventid = Integer(primary=True)
appname = String()
ctxid = Integer()
guildid = Integer()
created_at = Timestamp()
class Commands(RowModel):
"""
Schema
------
CREATE TABLE analytics.commands(
cmdname TEXT NOT NULL,
cogname TEXT,
userid BIGINT NOT NULL,
status analytics.CommandStatus NOT NULL,
execution_time REAL NOT NULL
) INHERITS (analytics.events);
"""
_schema_ = 'analytics'
_tablename_ = 'commands'
eventid = Integer(primary=True)
appname = String()
ctxid = Integer()
guildid = Integer()
created_at = Timestamp()
cmdname = String()
cogname = String()
userid = Integer()
status: Column[CommandStatus] = Column()
error = String()
execution_time: Column[float] = Column()
class Guilds(RowModel):
"""
Schema
------
CREATE TABLE analytics.guilds(
guildid BIGINT NOT NULL,
action analytics.GuildAction NOT NULL
) INHERITS (analytics.events);
"""
_schema_ = 'analytics'
_tablename_ = 'guilds'
eventid = Integer(primary=True)
appname = String()
ctxid = Integer()
guildid = Integer()
created_at = Timestamp()
action: Column[GuildAction] = Column()
class VoiceSession(RowModel):
"""
Schema
------
CREATE TABLE analytics.voice_sessions(
userid BIGINT NOT NULL,
action analytics.VoiceAction NOT NULL
) INHERITS (analytics.events);
"""
_schema_ = 'analytics'
_tablename_ = 'voice_sessions'
eventid = Integer(primary=True)
appname = String()
ctxid = Integer()
guildid = Integer()
created_at = Timestamp()
userid = Integer()
action: Column[GuildAction] = Column()
class GuiRender(RowModel):
"""
Schema
------
CREATE TABLE analytics.gui_renders(
cardname TEXT NOT NULL,
duration INTEGER NOT NULL
) INHERITS (analytics.events);
"""
_schema_ = 'analytics'
_tablename_ = 'gui_renders'
eventid = Integer(primary=True)
appname = String()
ctxid = Integer()
guildid = Integer()
created_at = Timestamp()
cardname = String()
duration = Integer()

180
src/analytics/events.py Normal file
View File

@@ -0,0 +1,180 @@
import asyncio
import datetime
import logging
from collections import namedtuple
from typing import NamedTuple, Optional, Generic, Type, TypeVar
from meta.ipc import AppRoute, AppClient
from meta.logger import logging_context, log_wrap
from data import RowModel
from .data import AnalyticsData, CommandStatus, VoiceAction, GuildAction
logger = logging.getLogger(__name__)
"""
TODO
Snapshot type? Incremental or manual?
Request snapshot route will require all shards to be online
Update batch size before release, or put it in the config
"""
T = TypeVar('T')
class EventHandler(Generic[T]):
def __init__(self, route_name: str, model: Type[RowModel], struct: Type[T], batchsize: int = 20):
self.model = model
self.struct = struct
self.batch_size = batchsize
self.route_name = route_name
self._route: Optional[AppRoute] = None
self._client: Optional[AppClient] = None
self.queue: asyncio.Queue[T] = asyncio.Queue()
self.batch: list[T] = []
self._consumer_task: Optional[asyncio.Task] = None
@property
def route(self):
if self._route is None:
self._route = AppRoute(self.handle_event, name=self.route_name)
return self._route
async def handle_event(self, data):
try:
await self.queue.put(data)
except asyncio.QueueFull:
logger.warning(
f"Queue on event handler {self.route_name} is full! Discarding event {data}"
)
async def consumer(self):
with logging_context(action='consumer'):
while True:
try:
item = await self.queue.get()
self.batch.append(item)
if len(self.batch) > self.batch_size:
await self.process_batch()
except asyncio.CancelledError:
# Try and process the last batch
logger.info(
f"Event handler {self.route_name} received cancellation signal! "
"Trying to process last batch."
)
if self.batch:
await self.process_batch()
raise
except Exception:
logger.exception(
f"Event handler {self.route_name} received unhandled error."
" Ignoring and continuing cautiously."
)
pass
async def process_batch(self):
with logging_context(action='batch'):
logger.debug("Processing Batch")
# TODO: copy syntax might be more efficient here
await self.model.table.insert_many(
self.struct._fields,
*map(tuple, self.batch)
)
self.batch.clear()
def bind(self, client: AppClient):
"""
Bind our route to the given client.
"""
if self._client:
raise ValueError("This EventHandler is already attached!")
self._client = client
self.route._client = client
client.routes[self.route_name] = self.route
return self
def unbind(self):
"""
Unbind from the client.
"""
if not self._client:
raise ValueError("Not attached, cannot detach!")
self._client.routes.pop(self.route_name, None)
self._route = None
logger.info(
f"EventHandler {self.route_name} has attached to the ShardTalk client."
)
return self
async def attach(self, client: AppClient):
"""
Attach to a ShardTalk client and start listening.
"""
with logging_context(action=self.route_name):
self.bind(client)
self._consumer_task = asyncio.create_task(self.consumer())
logger.info(
f"EventHandler {self.route_name} is listening for incoming events."
)
return self
async def detach(self):
"""
Stop listening and detach from client.
"""
self.unbind()
if self._consumer_task and not self._consumer_task.done():
self._consumer_task.cancel()
self._consumer_task = None
logger.info(
f"EventHandler {self.route_name} has detached."
)
return self
class CommandEvent(NamedTuple):
appname: str
cmdname: str
userid: int
created_at: datetime.datetime
status: CommandStatus
execution_time: float
error: Optional[str] = None
cogname: Optional[str] = None
guildid: Optional[int] = None
ctxid: Optional[int] = None
command_event_handler: EventHandler[CommandEvent] = EventHandler(
'command_event', AnalyticsData.Commands, CommandEvent, batchsize=1
)
class GuildEvent(NamedTuple):
appname: str
guildid: int
action: GuildAction
created_at: datetime.datetime
guild_event_handler: EventHandler[GuildEvent] = EventHandler(
'guild_event', AnalyticsData.Guilds, GuildEvent, batchsize=0
)
class VoiceEvent(NamedTuple):
appname: str
guildid: int
userid: int
action: VoiceAction
created_at: datetime.datetime
voice_event_handler: EventHandler[VoiceEvent] = EventHandler(
'voice_event', AnalyticsData.VoiceSession, VoiceEvent, batchsize=5
)

128
src/analytics/server.py Normal file
View File

@@ -0,0 +1,128 @@
import asyncio
import logging
from typing import Optional
from meta import conf, appname
from meta.logger import log_context, log_action_stack, logging_context, log_app, log_wrap
from meta.ipc import AppClient
from meta.app import appname_from_shard
from meta.sharding import shard_count
from data import Database
from .events import command_event_handler, guild_event_handler, voice_event_handler
from .snapshot import shard_snapshot, ShardSnapshot
from .data import AnalyticsData
logger = logging.getLogger(__name__)
for name in conf.config.options('LOGGING_LEVELS', no_defaults=True):
logging.getLogger(name).setLevel(conf.logging_levels[name])
class AnalyticsServer:
# TODO: Move these to the config
# How often to request snapshots
snap_period = 120
# How soon after a snapshot failure (e.g. not all shards online) to retry
snap_retry_period = 10
def __init__(self) -> None:
self.db = Database(conf.data['args'])
self.data = self.db.load_registry(AnalyticsData())
self.event_handlers = [
command_event_handler,
guild_event_handler,
voice_event_handler
]
self.talk = AppClient(
conf.analytics['appname'],
appname,
{'host': conf.analytics['server_host'], 'port': int(conf.analytics['server_port'])},
{'host': conf.appipc['server_host'], 'port': int(conf.appipc['server_port'])}
)
self.talk_shard_snapshot = self.talk.register_route()(shard_snapshot)
self._snap_task: Optional[asyncio.Task] = None
async def attach_event_handlers(self):
for handler in self.event_handlers:
await handler.attach(self.talk)
@log_wrap(action='Snap')
async def take_snapshot(self):
# Check if all the shards are registered on shard_talk
expected_peers = [appname_from_shard(i) for i in range(0, shard_count)]
if missing := [peer for peer in expected_peers if peer not in self.talk.peers]:
# We are missing peer(s)!
logger.warning(
f"Analytics could not take snapshot because peers are missing: {', '.join(missing)}"
)
return False
# Everyone is here, ask for shard snapshots
results = await self.talk_shard_snapshot().broadcast()
# Make sure everyone sent results and there were no exceptions (e.g. concurrency)
if not all(result is not None and not isinstance(result, Exception) for result in results.values()):
# This should essentially never happen
# Either some of the shards could not make a snapshot (e.g. Discord client issues)
# or they disconnected in the process.
logger.warning(
f"Analytics could not take snapshot because some peers failed! Partial snapshot: {results}"
)
return False
# Now we have a dictionary of shard snapshots, aggregate, pull in remaining data, and store.
# TODO Possibly move this out into snapshots.py?
aggregate = {field: 0 for field in ShardSnapshot._fields}
for result in results.values():
for field, num in result._asdict().items():
aggregate[field] += num
row = await self.data.Snapshots.create(
appname=appname,
guild_count=aggregate['guild_count'],
member_count=aggregate['member_count'],
user_count=aggregate['user_count'],
in_voice=aggregate['voice_count'],
)
logger.info(f"Created snapshot: {row.data!r}")
return True
@log_wrap(action='SnapLoop')
async def snapshot_loop(self):
while True:
try:
result = await self.take_snapshot()
if result:
await asyncio.sleep(self.snap_period)
else:
logger.info("Snapshot failed, retrying after %d seconds", self.snap_retry_period)
await asyncio.sleep(self.snap_retry_period)
except asyncio.CancelledError:
logger.info("Snapshot loop cancelled, closing.")
return
except Exception:
logger.exception(
"Unhandled exception during snapshot loop. Ignoring and continuing cautiously."
)
await asyncio.sleep(self.snap_retry_period)
async def run(self):
log_action_stack.set(['Analytics'])
log_app.set(conf.analytics['appname'])
async with await self.db.connect():
await self.talk.connect()
await self.attach_event_handlers()
self._snap_task = asyncio.create_task(self.snapshot_loop())
await asyncio.gather(*(handler._consumer_task for handler in self.event_handlers))
if __name__ == '__main__':
server = AnalyticsServer()
asyncio.run(server.run())

28
src/analytics/snapshot.py Normal file
View File

@@ -0,0 +1,28 @@
from typing import NamedTuple
from meta.context import ctx_bot
class ShardSnapshot(NamedTuple):
guild_count: int
voice_count: int
member_count: int
user_count: int
async def shard_snapshot():
"""
Take a snapshot of the current shard.
"""
bot = ctx_bot.get()
if bot is None or not bot.is_ready():
# We cannot take a snapshot without Bot
# Just quietly fail
return None
snap = ShardSnapshot(
guild_count=len(bot.guilds),
voice_count=sum(len(channel.members) for guild in bot.guilds for channel in guild.voice_channels),
member_count=sum(len(guild.members) for guild in bot.guilds),
user_count=len(set(m.id for guild in bot.guilds for m in guild.members))
)
return snap