From fcad165607ac7343ea86b5aae521b98de898760c Mon Sep 17 00:00:00 2001 From: Jon Bergli Heier Date: Sun, 11 Sep 2011 22:15:38 +0200 Subject: Generate activity graphs for users. --- .gitignore | 5 +- SConstruct | 2 + userparse.py | 29 ++++++ usertimes.c | 284 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 319 insertions(+), 1 deletion(-) create mode 100755 userparse.py create mode 100644 usertimes.c diff --git a/.gitignore b/.gitignore index de23a70..6474597 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,11 @@ *.swp *.o *.xml +*.csv +*.png +*.log +/usertimes /ircstats -/irclogs .sconf_temp .sconsign.dblite config.log diff --git a/SConstruct b/SConstruct index cbc8521..13c4c2e 100644 --- a/SConstruct +++ b/SConstruct @@ -22,4 +22,6 @@ env.ParseConfig('xml2-config --cflags --libs') env.Program('ircstats', Glob('*.c')) +env.Program('usertimes', [x + '.c' for x in ('usertimes', 'nick', 'regexset', 'channel', 'config', 'word', 'sdbm')]) + # vim: syn=python diff --git a/userparse.py b/userparse.py new file mode 100755 index 0000000..41d4d3d --- /dev/null +++ b/userparse.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python2 + +import sys, datetime +from pylab import * + +for fn in sys.argv[1:]: + f = open(fn, 'r') + channel = f.readline().strip().rsplit('/')[-1] + nick = f.readline().strip() + + if channel.startswith('#'): + channel = channel[1:] + + print('Graphing activity for {0} in #{1}'.format(nick, channel)) + + data = sorted(([int(y) for y in x.strip().split(',')] for x in f), key = lambda x: int(x[0])) + f.close() + + fig = figure(figsize=(30, 6)) + ax = fig.gca() + ax.xaxis.set_major_locator(MonthLocator(range(3, 13, 3))) + ax.xaxis.set_major_formatter(DateFormatter('%b %Y')) + grid(1) + xlabel('Time') + ylabel('Lines/day') + title('Activity for {0} in #{1}'.format(nick, channel)) + bar([datetime.date.fromtimestamp(x[0]) for x in data], [sum(x[1:]) for x in data]) + + savefig('{0}-{1}.png'.format(channel, nick)) diff --git a/usertimes.c b/usertimes.c new file mode 100644 index 0000000..6e1a2ea --- /dev/null +++ b/usertimes.c @@ -0,0 +1,284 @@ +#include +#include +#include +#include +#include +#include + +#include "regexset.h" +#include "channel.h" +#include "nick.h" +#include "config.h" +#include "word.h" + +#define NICK_BUFFER_SIZE 0x100 +#define TEXT_BUFFER_SIZE 0x400 +#define LINE_BUFFER_SIZE 0x400 +#define TIME_BUFFER_SIZE 0xf +#define DATE_BUFFER_SIZE 0x20 + +#define max(a, b) ((a) > (b) ? (a) : (b)) + +struct day_t { + time_t day; + unsigned long lines[24*4]; + struct day_t *next; +}; + +struct user_t { + const char *nick; + struct day_t *days; + unsigned long long words, characters, kicks, kicked, monolog_lines, monologs; + time_t seen_first, seen_last; +}; + +static inline void add_line(struct user_t *user, time_t now) { + time_t day_ut = now - (now % 86400); + + if(!user->days) { + user->days = malloc(sizeof(struct day_t)); + memset(user->days, 0, sizeof(struct day_t)); + user->days->day = day_ut; + } + + struct day_t *day = user->days; + + if(day->day != day_ut) { + day = malloc(sizeof(struct day_t)); + memset(day, 0, sizeof(struct day_t)); + day->day = day_ut; + day->next = user->days; + user->days = day; + } + + int time_i = (day_ut % 3600) / 900; + day->lines[time_i]++; +} + +static void export(struct channel_t *channel, struct user_t *user) { + char path[0xff]; + // ignore slashes in channel name (eg. network) + char *temp = strrchr(channel->name, '/'); + if(!temp) + temp = channel->name; + else + temp++; + // skip leading # + if(*temp == '#') + temp++; + snprintf(path, 0xff, "%s-%s.csv", temp, user->nick); + FILE *f = fopen(path, "w"); + + fprintf(f, "%s\n", channel->name); + fprintf(f, "%s\n", user->nick); + + struct day_t *day = user->days; + while(day) { + fprintf(f, "%lu", day->day); + for(int h = 0; h < 24; h++) { + for(int q = 0; q < 4; q++) { + fprintf(f, ",%lu", day->lines[h*4 + q]); + } + } + fprintf(f, "\n"); + day = day->next; + } + fclose(f); +} + +static inline char *parse_getline(char *buffer, int bufsize, FILE *f) { + char *r; + r = fgets(buffer, bufsize, f); + return r; +} + +int main(int argc, char **argv) { + if(argc != 2) { + printf("Usage: %s NICK\n", argv[0]); + return 1; + } + + /* Set locale. */ + setlocale(LC_CTYPE, ""); + + /* Regex sets must be initialized before config. */ + rs_init(); + channel_init(); + nick_init(); + if(!cfg_init()) { + /* Free any registered regex sets and channels when config fails. + Config will fail if a regex set fails to compile all parts. */ + rs_free(); + channel_free(); + return 1; + } + + struct user_t user_ = {0}; + struct user_t *user = &user_; + user->nick = argv[1]; + + for(int chan_i = 0; chan_i < channel_get_count(); chan_i++) { + //word_init(); + struct channel_t *channel = channel_get(chan_i); + printf("Channel %s\n", channel->name); + struct channel_file_t *file = channel->files; + while(file) { + struct regexset_t *rs = file->rs; + FILE *f = fopen(file->path, "r"); + if(!f) { + fprintf(stderr, "\tFailed to open %s\n", file->path); + file = file->next; + continue; + } else + printf("\tParsing %s\n", file->path); + + char line[LINE_BUFFER_SIZE]; + const char *log_date_format, *day_date_format; + struct tm now; + struct tm now_global; + + if(rs->log_date_format) { + log_date_format = rs->log_date_format; + } else if(ircstats_config.log_date_format) { + log_date_format = ircstats_config.log_date_format; + } else { + log_date_format = NULL; + } + + if(rs->day_date_format) { + day_date_format = rs->day_date_format; + } else if(ircstats_config.day_date_format) { + day_date_format = ircstats_config.day_date_format; + } else { + day_date_format = NULL; + } + + while(parse_getline(line, LINE_BUFFER_SIZE, f)) { + int rc; + int ovector[30]; + + rc = pcre_exec(rs->text, rs->text_e, line, strlen(line), 0, 0, ovector, 30); + if(rc > 0) { + char nick[NICK_BUFFER_SIZE], text[TEXT_BUFFER_SIZE], hour_s[TIME_BUFFER_SIZE], min_s[TIME_BUFFER_SIZE]; + pcre_copy_named_substring(rs->text, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE); + pcre_copy_named_substring(rs->text, line, ovector, rc, "text", text, TEXT_BUFFER_SIZE); + pcre_copy_named_substring(rs->text, line, ovector, rc, "hour", hour_s, TIME_BUFFER_SIZE); + pcre_copy_named_substring(rs->text, line, ovector, rc, "minute", min_s, TIME_BUFFER_SIZE); + char *realnick = nick_get(nick); + if(strcmp(realnick, argv[1]) != 0) { + continue; + } + + /* Calculate array index for lines. */ + int hour, min, time_i; + hour = atoi(hour_s); + min = atoi(min_s); + time_i = hour*4 + min / 15; + + /* Count words. */ + /*wchar_t wtext[TEXT_BUFFER_SIZE]; + mbstowcs(wtext, text, TEXT_BUFFER_SIZE);*/ + + now = now_global; + + now.tm_hour = hour; + now.tm_min = min; + time_t now_ut = mktime(&now); + + //user->characters += wcslen(wtext); + add_line(user, now_ut); + //user->lines[time_i]++; + + if(user->seen_first == 0 || now_ut < user->seen_first) { + user->seen_first = now_ut; + } + user->seen_last = max(now_ut, user->seen_last); + + //channel->hours[time_i]++; + + /*wchar_t word[TEXT_BUFFER_SIZE]; + wchar_t *end = wcschr(wtext, '\0'); + *word = '\0'; + int len = 0; + for(wchar_t *pos = wtext; pos < end; pos++) { + if(iswblank(*pos)) { + if(len >= ircstats_config.wordlen_min) { + add_word(user, word, len); + } + len = 0; + *word = '\0'; + } else if(iswalpha(*pos)) { + word[len++] = towlower(*pos); + } else { + len = 0; + *word = '\0'; + } + } + if(len >= ircstats_config.wordlen_min) { + add_word(user, word, len); + }*/ + continue; + } + + rc = pcre_exec(rs->log_opened, rs->log_opened_e, line, strlen(line), 0, 0, ovector, 30); + if(rc > 0) { + char date[DATE_BUFFER_SIZE]; + + if(!log_date_format) { + continue; + } + + pcre_copy_named_substring(rs->log_opened, line, ovector, rc, "date", date, DATE_BUFFER_SIZE); + + if(!strptime(date, log_date_format, &now)) { + fprintf(stderr, "log fail: %s\n", date); + continue; + } + + now_global = now; + + continue; + } + + /* day_changed is optional */ + rc = rs->day_changed ? pcre_exec(rs->day_changed, rs->day_changed_e, line, strlen(line), 0, 0, ovector, 30) : 0; + if(rc > 0) { + char date[DATE_BUFFER_SIZE]; + + if(!day_date_format) { + continue; + } + + pcre_copy_named_substring(rs->day_changed, line, ovector, rc, "date", date, DATE_BUFFER_SIZE); + + if(!strptime(date, day_date_format, &now)) { + fprintf(stderr, "day fail: %s\n", date); + continue; + } + + now_global = now; + + continue; + } + } + + fclose(f); + file = file->next; + } + + export(channel, user); + + while(user->days) { + struct day_t *day = user->days; + user->days = day->next; + free(day); + } + } + + nick_free(); + cfg_free(); + channel_free(); + rs_free(); + + return 0; +} -- cgit v1.2.3