#include #include #include #include #include #include "parsing.h" #include "channel.h" #include "user.h" #include "word.h" #include "config.h" #include "pg.h" #define NICK_BUFFER_SIZE 0x100 #define TEXT_BUFFER_SIZE 0x400 #define LINE_BUFFER_SIZE 0x400 #define TIME_BUFFER_SIZE 0xf #define DATE_BUFFER_SIZE 0x20 #define max(a, b) ((a) > (b) ? (a) : (b)) static struct user_t *last_user = NULL; static int in_monolog = 0, monolog_len = 0; static struct tm now_global; static inline void add_word(struct user_t *user, wchar_t *word, int len) { user->words++; word[len] = '\0'; char mbword[TEXT_BUFFER_SIZE]; wcstombs(mbword, word, TEXT_BUFFER_SIZE); struct word_t *word_s = word_get(mbword); word_s->count++; } static inline char *parse_getline(char *buffer, int bufsize, FILE *f) { char *r; r = fgets(buffer, bufsize, f); return r; } static void process_file(FILE *f, struct channel_t *channel, struct regexset_t *rs) { char line[LINE_BUFFER_SIZE]; const char *log_date_format, *day_date_format; struct tm now; if(rs->log_date_format) { log_date_format = rs->log_date_format; } else if(ircstats_config.log_date_format) { log_date_format = ircstats_config.log_date_format; } else { log_date_format = NULL; } if(rs->day_date_format) { day_date_format = rs->day_date_format; } else if(ircstats_config.day_date_format) { day_date_format = ircstats_config.day_date_format; } else { day_date_format = NULL; } while(parse_getline(line, LINE_BUFFER_SIZE, f)) { int rc; int ovector[30]; rc = pcre_exec(rs->text, rs->text_e, line, strlen(line), 0, 0, ovector, 30); if(rc > 0) { char nick[NICK_BUFFER_SIZE], text[TEXT_BUFFER_SIZE], hour_s[TIME_BUFFER_SIZE], min_s[TIME_BUFFER_SIZE]; pcre_copy_named_substring(rs->text, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE); pcre_copy_named_substring(rs->text, line, ovector, rc, "text", text, TEXT_BUFFER_SIZE); pcre_copy_named_substring(rs->text, line, ovector, rc, "hour", hour_s, TIME_BUFFER_SIZE); pcre_copy_named_substring(rs->text, line, ovector, rc, "minute", min_s, TIME_BUFFER_SIZE); struct user_t *user = user_get(nick); if(user == last_user) { monolog_len++; if(!in_monolog && monolog_len >= ircstats_config.monolog_min) { in_monolog = 1; user->monologs++; /* Count first lines. */ user->monolog_lines += monolog_len; } else if(in_monolog) { user->monolog_lines++; } } else { last_user = user; in_monolog = 0; monolog_len = 1; } /* Calculate array index for lines. */ int hour, min, time_i; hour = atoi(hour_s); min = atoi(min_s); time_i = hour*4 + min / 15; /* Count words. */ wchar_t wtext[TEXT_BUFFER_SIZE]; mbstowcs(wtext, text, TEXT_BUFFER_SIZE); now = now_global; now.tm_hour = hour; now.tm_min = min; time_t now_ut = mktime(&now); user->characters += wcslen(wtext); user->lines[time_i]++; if(user->seen_first == 0 || now_ut < user->seen_first) { user->seen_first = now_ut; } user->seen_last = max(now_ut, user->seen_last); channel->hours[time_i]++; wchar_t word[TEXT_BUFFER_SIZE]; wchar_t *end = wcschr(wtext, '\0'); *word = '\0'; int len = 0; for(wchar_t *pos = wtext; pos < end; pos++) { if(iswblank(*pos)) { if(len >= ircstats_config.wordlen_min) { add_word(user, word, len); } len = 0; *word = '\0'; } else if(iswalpha(*pos)) { word[len++] = towlower(*pos); } else { len = 0; *word = '\0'; } } if(len >= ircstats_config.wordlen_min) { add_word(user, word, len); } continue; } rc = pcre_exec(rs->join, rs->join_e, line, strlen(line), 0, 0, ovector, 30); if(rc > 0) { char nick[NICK_BUFFER_SIZE], hour_s[TIME_BUFFER_SIZE], min_s[TIME_BUFFER_SIZE]; pcre_copy_named_substring(rs->join, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE); pcre_copy_named_substring(rs->join, line, ovector, rc, "hour", hour_s, TIME_BUFFER_SIZE); pcre_copy_named_substring(rs->join, line, ovector, rc, "minute", min_s, TIME_BUFFER_SIZE); int hour, min; hour = atoi(hour_s); min = atoi(min_s); now = now_global; now.tm_hour = hour; now.tm_min = min; time_t now_ut = mktime(&now); struct user_t *user = user_get(nick); if(user->seen_first == 0 || now_ut < user->seen_first) { user->seen_first = now_ut; } user->seen_last = max(now_ut, user->seen_last); continue; } rc = pcre_exec(rs->part, rs->part_e, line, strlen(line), 0, 0, ovector, 30); if(rc > 0) { char nick[NICK_BUFFER_SIZE], hour_s[TIME_BUFFER_SIZE], min_s[TIME_BUFFER_SIZE]; pcre_copy_named_substring(rs->part, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE); pcre_copy_named_substring(rs->part, line, ovector, rc, "hour", hour_s, TIME_BUFFER_SIZE); pcre_copy_named_substring(rs->part, line, ovector, rc, "minute", min_s, TIME_BUFFER_SIZE); int hour, min; hour = atoi(hour_s); min = atoi(min_s); now = now_global; now.tm_hour = hour; now.tm_min = min; time_t now_ut = mktime(&now); struct user_t *user = user_get(nick); if(user->seen_first == 0 || now_ut < user->seen_first) { user->seen_first = now_ut; } user->seen_last = max(now_ut, user->seen_last); continue; } rc = pcre_exec(rs->quit, rs->quit_e, line, strlen(line), 0, 0, ovector, 30); if(rc > 0) { char nick[NICK_BUFFER_SIZE], hour_s[TIME_BUFFER_SIZE], min_s[TIME_BUFFER_SIZE]; pcre_copy_named_substring(rs->quit, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE); pcre_copy_named_substring(rs->quit, line, ovector, rc, "hour", hour_s, TIME_BUFFER_SIZE); pcre_copy_named_substring(rs->quit, line, ovector, rc, "minute", min_s, TIME_BUFFER_SIZE); int hour, min; hour = atoi(hour_s); min = atoi(min_s); now = now_global; now.tm_hour = hour; now.tm_min = min; time_t now_ut = mktime(&now); struct user_t *user = user_get(nick); if(user->seen_first == 0 || now_ut < user->seen_first) { user->seen_first = now_ut; } user->seen_last = max(now_ut, user->seen_last); continue; } rc = pcre_exec(rs->nick_changed, rs->nick_changed_e, line, strlen(line), 0, 0, ovector, 30); if(rc > 0) { char newnick[NICK_BUFFER_SIZE], hour_s[TIME_BUFFER_SIZE], min_s[TIME_BUFFER_SIZE]; pcre_copy_named_substring(rs->nick_changed, line, ovector, rc, "new_nick", newnick, NICK_BUFFER_SIZE); pcre_copy_named_substring(rs->nick_changed, line, ovector, rc, "hour", hour_s, TIME_BUFFER_SIZE); pcre_copy_named_substring(rs->nick_changed, line, ovector, rc, "minute", min_s, TIME_BUFFER_SIZE); int hour, min; hour = atoi(hour_s); min = atoi(min_s); now = now_global; now.tm_hour = hour; now.tm_min = min; time_t now_ut = mktime(&now); struct user_t *user = user_get(newnick); if(user->seen_first == 0 || now_ut < user->seen_first) { user->seen_first = now_ut; } user->seen_last = max(now_ut, user->seen_last); continue; } rc = pcre_exec(rs->log_opened, rs->log_opened_e, line, strlen(line), 0, 0, ovector, 30); if(rc > 0) { char date[DATE_BUFFER_SIZE]; if(!log_date_format) { continue; } pcre_copy_named_substring(rs->log_opened, line, ovector, rc, "date", date, DATE_BUFFER_SIZE); if(!strptime(date, log_date_format, &now)) { fprintf(stderr, "log fail: %s\n", date); continue; } now_global = now; continue; } /* day_changed is optional */ rc = rs->day_changed ? pcre_exec(rs->day_changed, rs->day_changed_e, line, strlen(line), 0, 0, ovector, 30) : 0; if(rc > 0) { char date[DATE_BUFFER_SIZE]; if(!day_date_format) { continue; } pcre_copy_named_substring(rs->day_changed, line, ovector, rc, "date", date, DATE_BUFFER_SIZE); if(!strptime(date, day_date_format, &now)) { fprintf(stderr, "day fail: %s\n", date); continue; } now_global = now; continue; } rc = pcre_exec(rs->kick, rs->kick_e, line, strlen(line), 0, 0, ovector, 30); if(rc > 0) { char nick[NICK_BUFFER_SIZE], victim[NICK_BUFFER_SIZE], hour_s[TIME_BUFFER_SIZE], min_s[TIME_BUFFER_SIZE]; pcre_copy_named_substring(rs->kick, line, ovector, rc, "hour", hour_s, TIME_BUFFER_SIZE); pcre_copy_named_substring(rs->kick, line, ovector, rc, "minute", min_s, TIME_BUFFER_SIZE); pcre_copy_named_substring(rs->kick, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE); pcre_copy_named_substring(rs->kick, line, ovector, rc, "victim", victim, NICK_BUFFER_SIZE); int hour, min; hour = atoi(hour_s); min = atoi(min_s); now = now_global; now.tm_hour = hour; now.tm_min = min; time_t now_ut = mktime(&now); struct user_t *user = user_get(nick), *victim_user = user_get(victim); user->kicks++; victim_user->kicked++; if(user->seen_first == 0 || now_ut < user->seen_first) { user->seen_first = now_ut; } user->seen_last = max(now_ut, user->seen_last); continue; } } } static void save_users(int channel_id) { for(int i = 0; i < USERS_MAX; i++) { struct user_t *user = &users[i]; if(!user->nick) continue; while(user) { if(!user->real_user && strcmp(user->nick, "")) pg_user_set(channel_id, user); user = user->next; } } } static void save_words(int channel_id) { for(int i = 0; i < WORDS_MAX; i++) { struct word_t *word = &words[i]; if(!word->name) continue; while(word) { pg_word_set(channel_id, word); word = word->next; } } } void process() { /* Parsing stuff goes here. */ for(int chan_i = 0; chan_i < channel_get_count(); chan_i++) { user_init(); word_init(); struct channel_t *channel = channel_get(chan_i); int channel_id = pg_channel_get(channel); pg_users_get(channel_id); pg_words_get(channel_id); printf("Channel %s\n", channel->name); struct channel_file_t *file = channel->files; while(file) { struct regexset_t *rs = file->rs; FILE *f = fopen(file->path, "r"); if(!f) { fprintf(stderr, "\tFailed to open %s\n", file->path); file = file->next; continue; } else printf("\tParsing %s\n", file->path); long pos = 0; time_t time = 0; pg_channel_file_get(channel_id, file, &pos, &time); fseek(f, pos, SEEK_SET); localtime_r(&time, &now_global); last_user = NULL; in_monolog = monolog_len = 0; process_file(f, channel, rs); pg_channel_file_set(channel_id, file, ftell(f), mktime(&now_global)); fclose(f); file = file->next; } save_users(channel_id); save_words(channel_id); user_free(); word_free(); } }