#include #include #include #include #include #include #include "parsing.h" #include "channel.h" #include "user.h" #include "word.h" #include "export_xml.h" #include "config.h" #define NICK_BUFFER_SIZE 0x100 #define TEXT_BUFFER_SIZE 0x400 #define LINE_BUFFER_SIZE 0x400 #define TIME_BUFFER_SIZE 0xf #define DATE_BUFFER_SIZE 0x20 #define max(a, b) ((a) > (b) ? (a) : (b)) static pthread_mutex_t file_mutex, user_mutex, word_mutex, channel_mutex, time_mutex; static struct user_t *last_user = NULL; static int in_monolog = 0, monolog_len = 0; static struct tm now_global; static inline void add_word(struct user_t *user, wchar_t *word, int len) { pthread_mutex_lock(&user_mutex); user->words++; pthread_mutex_unlock(&user_mutex); word[len] = '\0'; char mbword[TEXT_BUFFER_SIZE]; wcstombs(mbword, word, TEXT_BUFFER_SIZE); pthread_mutex_lock(&word_mutex); struct word_t *word_s = word_get(mbword); word_s->count++; pthread_mutex_unlock(&word_mutex); } static inline char *parse_getline(char *buffer, int bufsize, FILE *f) { char *r; pthread_mutex_lock(&file_mutex); r = fgets(buffer, bufsize, f); pthread_mutex_unlock(&file_mutex); return r; } static void process_file(FILE *f, struct channel_t *channel, struct regexset_t *rs) { char line[LINE_BUFFER_SIZE]; const char *log_date_format, *day_date_format; struct tm now; if(rs->log_date_format) { log_date_format = rs->log_date_format; } else if(ircstats_config.log_date_format) { log_date_format = ircstats_config.log_date_format; } else { log_date_format = NULL; } if(rs->day_date_format) { day_date_format = rs->day_date_format; } else if(ircstats_config.day_date_format) { day_date_format = ircstats_config.day_date_format; } else { day_date_format = NULL; } while(parse_getline(line, LINE_BUFFER_SIZE, f)) { int rc; int ovector[30]; rc = pcre_exec(rs->text, rs->text_e, line, strlen(line), 0, 0, ovector, 30); if(rc > 0) { char nick[NICK_BUFFER_SIZE], text[TEXT_BUFFER_SIZE], hour_s[TIME_BUFFER_SIZE], min_s[TIME_BUFFER_SIZE]; pcre_copy_named_substring(rs->text, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE); pcre_copy_named_substring(rs->text, line, ovector, rc, "text", text, TEXT_BUFFER_SIZE); pcre_copy_named_substring(rs->text, line, ovector, rc, "hour", hour_s, TIME_BUFFER_SIZE); pcre_copy_named_substring(rs->text, line, ovector, rc, "minute", min_s, TIME_BUFFER_SIZE); pthread_mutex_lock(&user_mutex); struct user_t *user = user_get(nick); if(user == last_user) { monolog_len++; if(!in_monolog && monolog_len >= ircstats_config.monolog_min) { in_monolog = 1; user->monologs++; /* Count first lines. */ user->monolog_lines += monolog_len; } else if(in_monolog) { user->monolog_lines++; } } else { last_user = user; in_monolog = 0; monolog_len = 1; } pthread_mutex_unlock(&user_mutex); /* Calculate array index for lines. */ int hour, min, time_i; hour = atoi(hour_s); min = atoi(min_s); time_i = hour*4 + min / 15; /* Count words. */ wchar_t wtext[TEXT_BUFFER_SIZE]; mbstowcs(wtext, text, TEXT_BUFFER_SIZE); pthread_mutex_lock(&time_mutex); now = now_global; pthread_mutex_unlock(&time_mutex); now.tm_hour = hour; now.tm_min = min; time_t now_ut = mktime(&now); pthread_mutex_lock(&user_mutex); user->characters += wcslen(wtext); user->lines[time_i]++; if(user->seen_first == 0 || now_ut < user->seen_first) { user->seen_first = now_ut; } user->seen_last = max(now_ut, user->seen_last); pthread_mutex_unlock(&user_mutex); pthread_mutex_lock(&channel_mutex); channel->hours[time_i]++; pthread_mutex_unlock(&channel_mutex); wchar_t word[TEXT_BUFFER_SIZE]; wchar_t *end = wcschr(wtext, '\0'); *word = '\0'; int len = 0; for(wchar_t *pos = wtext; pos < end; pos++) { if(iswblank(*pos)) { if(len >= ircstats_config.wordlen_min) { add_word(user, word, len); } len = 0; *word = '\0'; } else if(iswalpha(*pos)) { word[len++] = towlower(*pos); } else { len = 0; *word = '\0'; } } if(len >= ircstats_config.wordlen_min) { add_word(user, word, len); } continue; } rc = pcre_exec(rs->join, rs->join_e, line, strlen(line), 0, 0, ovector, 30); if(rc > 0) { char nick[NICK_BUFFER_SIZE], hour_s[TIME_BUFFER_SIZE], min_s[TIME_BUFFER_SIZE]; pcre_copy_named_substring(rs->join, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE); pcre_copy_named_substring(rs->join, line, ovector, rc, "hour", hour_s, TIME_BUFFER_SIZE); pcre_copy_named_substring(rs->join, line, ovector, rc, "minute", min_s, TIME_BUFFER_SIZE); int hour, min; hour = atoi(hour_s); min = atoi(min_s); pthread_mutex_lock(&time_mutex); now = now_global; pthread_mutex_unlock(&time_mutex); now.tm_hour = hour; now.tm_min = min; time_t now_ut = mktime(&now); pthread_mutex_lock(&user_mutex); struct user_t *user = user_get(nick); if(user->seen_first == 0 || now_ut < user->seen_first) { user->seen_first = now_ut; } user->seen_last = max(now_ut, user->seen_last); pthread_mutex_unlock(&user_mutex); continue; } rc = pcre_exec(rs->part, rs->part_e, line, strlen(line), 0, 0, ovector, 30); if(rc > 0) { char nick[NICK_BUFFER_SIZE], hour_s[TIME_BUFFER_SIZE], min_s[TIME_BUFFER_SIZE]; pcre_copy_named_substring(rs->part, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE); pcre_copy_named_substring(rs->part, line, ovector, rc, "hour", hour_s, TIME_BUFFER_SIZE); pcre_copy_named_substring(rs->part, line, ovector, rc, "minute", min_s, TIME_BUFFER_SIZE); int hour, min; hour = atoi(hour_s); min = atoi(min_s); pthread_mutex_lock(&time_mutex); now = now_global; pthread_mutex_unlock(&time_mutex); now.tm_hour = hour; now.tm_min = min; time_t now_ut = mktime(&now); pthread_mutex_lock(&user_mutex); struct user_t *user = user_get(nick); if(user->seen_first == 0 || now_ut < user->seen_first) { user->seen_first = now_ut; } user->seen_last = max(now_ut, user->seen_last); pthread_mutex_unlock(&user_mutex); continue; } rc = pcre_exec(rs->quit, rs->quit_e, line, strlen(line), 0, 0, ovector, 30); if(rc > 0) { char nick[NICK_BUFFER_SIZE], hour_s[TIME_BUFFER_SIZE], min_s[TIME_BUFFER_SIZE]; pcre_copy_named_substring(rs->quit, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE); pcre_copy_named_substring(rs->quit, line, ovector, rc, "hour", hour_s, TIME_BUFFER_SIZE); pcre_copy_named_substring(rs->quit, line, ovector, rc, "minute", min_s, TIME_BUFFER_SIZE); int hour, min; hour = atoi(hour_s); min = atoi(min_s); pthread_mutex_lock(&time_mutex); now = now_global; pthread_mutex_unlock(&time_mutex); now.tm_hour = hour; now.tm_min = min; time_t now_ut = mktime(&now); pthread_mutex_lock(&user_mutex); struct user_t *user = user_get(nick); if(user->seen_first == 0 || now_ut < user->seen_first) { user->seen_first = now_ut; } user->seen_last = max(now_ut, user->seen_last); pthread_mutex_unlock(&user_mutex); continue; } rc = pcre_exec(rs->nick_changed, rs->nick_changed_e, line, strlen(line), 0, 0, ovector, 30); if(rc > 0) { char newnick[NICK_BUFFER_SIZE], hour_s[TIME_BUFFER_SIZE], min_s[TIME_BUFFER_SIZE]; pcre_copy_named_substring(rs->nick_changed, line, ovector, rc, "new_nick", newnick, NICK_BUFFER_SIZE); pcre_copy_named_substring(rs->nick_changed, line, ovector, rc, "hour", hour_s, TIME_BUFFER_SIZE); pcre_copy_named_substring(rs->nick_changed, line, ovector, rc, "minute", min_s, TIME_BUFFER_SIZE); int hour, min; hour = atoi(hour_s); min = atoi(min_s); pthread_mutex_lock(&time_mutex); now = now_global; pthread_mutex_unlock(&time_mutex); now.tm_hour = hour; now.tm_min = min; time_t now_ut = mktime(&now); pthread_mutex_lock(&user_mutex); struct user_t *user = user_get(newnick); if(user->seen_first == 0 || now_ut < user->seen_first) { user->seen_first = now_ut; } user->seen_last = max(now_ut, user->seen_last); pthread_mutex_unlock(&user_mutex); continue; } rc = pcre_exec(rs->log_opened, rs->log_opened_e, line, strlen(line), 0, 0, ovector, 30); if(rc > 0) { char date[DATE_BUFFER_SIZE]; if(!log_date_format) { continue; } pcre_copy_named_substring(rs->log_opened, line, ovector, rc, "date", date, DATE_BUFFER_SIZE); /* TODO: Find a better way around this. * Locking early to allow setting of correct date before any other threads start setting time * before date is set. */ pthread_mutex_lock(&time_mutex); if(!strptime(date, log_date_format, &now)) { fprintf(stderr, "log fail: %s\n", date); pthread_mutex_unlock(&time_mutex); continue; } now_global = now; pthread_mutex_unlock(&time_mutex); continue; } /* day_changed is optional */ rc = rs->day_changed ? pcre_exec(rs->day_changed, rs->day_changed_e, line, strlen(line), 0, 0, ovector, 30) : 0; if(rc > 0) { char date[DATE_BUFFER_SIZE]; if(!day_date_format) { continue; } pcre_copy_named_substring(rs->day_changed, line, ovector, rc, "date", date, DATE_BUFFER_SIZE); /* See comment in log_opened parsing. */ pthread_mutex_lock(&time_mutex); if(!strptime(date, day_date_format, &now)) { fprintf(stderr, "day fail: %s\n", date); pthread_mutex_unlock(&time_mutex); continue; } now_global = now; pthread_mutex_unlock(&time_mutex); continue; } rc = pcre_exec(rs->kick, rs->kick_e, line, strlen(line), 0, 0, ovector, 30); if(rc > 0) { char nick[NICK_BUFFER_SIZE], victim[NICK_BUFFER_SIZE], hour_s[TIME_BUFFER_SIZE], min_s[TIME_BUFFER_SIZE]; pcre_copy_named_substring(rs->kick, line, ovector, rc, "hour", hour_s, TIME_BUFFER_SIZE); pcre_copy_named_substring(rs->kick, line, ovector, rc, "minute", min_s, TIME_BUFFER_SIZE); pcre_copy_named_substring(rs->kick, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE); pcre_copy_named_substring(rs->kick, line, ovector, rc, "victim", victim, NICK_BUFFER_SIZE); int hour, min; hour = atoi(hour_s); min = atoi(min_s); pthread_mutex_lock(&time_mutex); now = now_global; pthread_mutex_unlock(&time_mutex); now.tm_hour = hour; now.tm_min = min; time_t now_ut = mktime(&now); pthread_mutex_lock(&user_mutex); struct user_t *user = user_get(nick), *victim_user = user_get(victim); user->kicks++; victim_user->kicked++; if(user->seen_first == 0 || now_ut < user->seen_first) { user->seen_first = now_ut; } user->seen_last = max(now_ut, user->seen_last); pthread_mutex_unlock(&user_mutex); continue; } } } struct thread_arg_t { FILE *f; struct channel_t *channel; struct regexset_t *rs; }; static void *thread_func(void *arg) { struct thread_arg_t *ta = arg; process_file(ta->f, ta->channel, ta->rs); return NULL; } void process(int thread_n) { pthread_mutex_init(&file_mutex, NULL); pthread_mutex_init(&user_mutex, NULL); pthread_mutex_init(&word_mutex, NULL); pthread_mutex_init(&channel_mutex, NULL); pthread_mutex_init(&time_mutex, NULL); /* Parsing stuff goes here. */ for(int chan_i = 0; chan_i < channel_get_count(); chan_i++) { user_init(); word_init(); struct channel_t *channel = channel_get(chan_i); printf("Channel %s\n", channel->name); struct channel_file_t *file = channel->files; while(file) { struct regexset_t *rs = file->rs; FILE *f = fopen(file->path, "r"); if(!f) { fprintf(stderr, "\tFailed to open %s\n", file->path); file = file->next; continue; } else printf("\tParsing %s\n", file->path); last_user = NULL; in_monolog = monolog_len = 0; pthread_t *threads; threads = malloc(sizeof(pthread_t) * thread_n); struct thread_arg_t ta; ta.f = f; ta.channel = channel; ta.rs = rs; for(int i = 0; i < thread_n; i++) { pthread_create(&threads[i], NULL, thread_func, &ta); } for(int i = 0; i < thread_n; i++) { pthread_join(threads[i], NULL); } free(threads); fclose(f); file = file->next; } export_xml(channel, users); user_free(); word_free(); } pthread_mutex_destroy(&file_mutex); pthread_mutex_destroy(&time_mutex); pthread_mutex_destroy(&user_mutex); pthread_mutex_destroy(&word_mutex); pthread_mutex_destroy(&channel_mutex); }