#include #include #include #include #include #include "parsing.h" #include "channel.h" #include "user.h" #include "word.h" #include "export_xml.h" #include "config.h" #define NICK_BUFFER_SIZE 0x100 #define TEXT_BUFFER_SIZE 0x400 #define LINE_BUFFER_SIZE 0x400 #define TIME_BUFFER_SIZE 0xf static pthread_mutex_t user_mutex, word_mutex, channel_mutex; static struct user_t *last_user = NULL; static int in_monolog = 0, monolog_len = 0; static inline void add_word(struct user_t *user, wchar_t *word, int len) { pthread_mutex_lock(&user_mutex); user->words++; pthread_mutex_unlock(&user_mutex); word[len] = '\0'; char mbword[TEXT_BUFFER_SIZE]; wcstombs(mbword, word, TEXT_BUFFER_SIZE); pthread_mutex_lock(&word_mutex); struct word_t *word_s = word_get(mbword); word_s->count++; pthread_mutex_unlock(&word_mutex); } static void process_file(FILE *f, struct channel_t *channel, struct regexset_t *rs) { char line[LINE_BUFFER_SIZE]; while(fgets(line, LINE_BUFFER_SIZE, f)) { int rc; int ovector[30]; rc = pcre_exec(rs->text, rs->text_e, line, strlen(line), 0, 0, ovector, 30); if(rc > 0) { char nick[NICK_BUFFER_SIZE], text[TEXT_BUFFER_SIZE], hour_s[TIME_BUFFER_SIZE], min_s[TIME_BUFFER_SIZE]; pcre_copy_named_substring(rs->text, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE); pcre_copy_named_substring(rs->text, line, ovector, rc, "text", text, TEXT_BUFFER_SIZE); pcre_copy_named_substring(rs->text, line, ovector, rc, "hour", hour_s, TIME_BUFFER_SIZE); pcre_copy_named_substring(rs->text, line, ovector, rc, "minute", min_s, TIME_BUFFER_SIZE); pthread_mutex_lock(&user_mutex); struct user_t *user = user_get(nick); if(user == last_user) { monolog_len++; if(!in_monolog && monolog_len >= ircstats_config.monolog_min) { in_monolog = 1; user->monologs++; /* Count first lines. */ user->monolog_lines += monolog_len; } else if(in_monolog) { user->monolog_lines++; } } else { last_user = user; in_monolog = 0; monolog_len = 1; } pthread_mutex_unlock(&user_mutex); /* Calculate array index for lines. */ int hour, min, time_i; hour = atoi(hour_s); min = atoi(min_s); time_i = hour*4 + min / 15; /* Count words. */ wchar_t wtext[TEXT_BUFFER_SIZE]; mbstowcs(wtext, text, TEXT_BUFFER_SIZE); pthread_mutex_lock(&user_mutex); user->characters += wcslen(wtext); user->lines[time_i]++; pthread_mutex_unlock(&user_mutex); pthread_mutex_lock(&channel_mutex); channel->hours[time_i]++; pthread_mutex_unlock(&channel_mutex); wchar_t word[TEXT_BUFFER_SIZE]; wchar_t *end = wcschr(wtext, '\0'); *word = '\0'; int len = 0; for(wchar_t *pos = wtext; pos < end; pos++) { if(iswblank(*pos)) { if(len >= ircstats_config.wordlen_min) { add_word(user, word, len); } len = 0; *word = '\0'; } else if(iswalpha(*pos)) { word[len++] = towlower(*pos); } else { len = 0; *word = '\0'; } } if(len >= ircstats_config.wordlen_min) { add_word(user, word, len); } continue; } rc = pcre_exec(rs->join, rs->join_e, line, strlen(line), 0, 0, ovector, 30); if(rc > 0) { char nick[NICK_BUFFER_SIZE]; pcre_copy_named_substring(rs->join, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE); pthread_mutex_lock(&user_mutex); struct user_t *user = user_get(nick); pthread_mutex_unlock(&user_mutex); continue; } rc = pcre_exec(rs->kick, rs->kick_e, line, strlen(line), 0, 0, ovector, 30); if(rc > 0) { char nick[NICK_BUFFER_SIZE], victim[NICK_BUFFER_SIZE]; pcre_copy_named_substring(rs->kick, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE); pcre_copy_named_substring(rs->kick, line, ovector, rc, "victim", victim, NICK_BUFFER_SIZE); pthread_mutex_lock(&user_mutex); struct user_t *user = user_get(nick), *victim_user = user_get(victim); user->kicks++; victim_user->kicked++; pthread_mutex_unlock(&user_mutex); continue; } } } struct thread_arg_t { FILE *f; struct channel_t *channel; struct regexset_t *rs; }; static void *thread_func(void *arg) { struct thread_arg_t *ta = arg; process_file(ta->f, ta->channel, ta->rs); return NULL; } void process(int thread_n) { pthread_mutex_init(&user_mutex, NULL); pthread_mutex_init(&word_mutex, NULL); pthread_mutex_init(&channel_mutex, NULL); /* Parsing stuff goes here. */ for(int chan_i = 0; chan_i < channel_get_count(); chan_i++) { user_init(); word_init(); struct channel_t *channel = channel_get(chan_i); printf("Channel %s\n", channel->name); struct channel_file_t *file = channel->files; while(file) { struct regexset_t *rs = file->rs; FILE *f = fopen(file->path, "r"); if(!f) { fprintf(stderr, "\tFailed to open %s\n", file->path); file = file->next; continue; } else printf("\tParsing %s\n", file->path); last_user = NULL; in_monolog = monolog_len = 0; pthread_t *threads; threads = malloc(sizeof(pthread_t) * thread_n); struct thread_arg_t ta; ta.f = f; ta.channel = channel; ta.rs = rs; for(int i = 0; i < thread_n; i++) { pthread_create(&threads[i], NULL, thread_func, &ta); } for(int i = 0; i < thread_n; i++) { pthread_join(threads[i], NULL); } free(threads); fclose(f); file = file->next; } export_xml(channel, users); user_free(); word_free(); } pthread_mutex_destroy(&user_mutex); pthread_mutex_destroy(&word_mutex); pthread_mutex_destroy(&channel_mutex); }