diff options
Diffstat (limited to 'parsing.c')
-rw-r--r-- | parsing.c | 168 |
1 files changed, 168 insertions, 0 deletions
diff --git a/parsing.c b/parsing.c new file mode 100644 index 0000000..58a5303 --- /dev/null +++ b/parsing.c @@ -0,0 +1,168 @@ +#include <stdio.h> +#include <string.h> +#include <wctype.h> +#include <wchar.h> +#include <pthread.h> + +#include "parsing.h" +#include "channel.h" +#include "user.h" +#include "word.h" +#include "export_xml.h" + +#define NICK_BUFFER_SIZE 0x100 +#define TEXT_BUFFER_SIZE 0x400 +#define LINE_BUFFER_SIZE 0x400 +#define TIME_BUFFER_SIZE 0xf + +pthread_mutex_t user_mutex, word_mutex; + +static void process_file(FILE *f, struct channel_t *channel, struct regexset_t *rs) { + char line[LINE_BUFFER_SIZE]; + while(fgets(line, LINE_BUFFER_SIZE, f)) { + int rc; + int ovector[30]; + + rc = pcre_exec(rs->text, rs->text_e, line, strlen(line), 0, 0, ovector, 30); + if(rc > 0) { + char nick[NICK_BUFFER_SIZE], text[TEXT_BUFFER_SIZE], hour_s[TIME_BUFFER_SIZE], min_s[TIME_BUFFER_SIZE]; + pcre_copy_named_substring(rs->text, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE); + pcre_copy_named_substring(rs->text, line, ovector, rc, "text", text, TEXT_BUFFER_SIZE); + pcre_copy_named_substring(rs->text, line, ovector, rc, "hour", hour_s, TIME_BUFFER_SIZE); + pcre_copy_named_substring(rs->text, line, ovector, rc, "minute", min_s, TIME_BUFFER_SIZE); + pthread_mutex_lock(&user_mutex); + struct user_t *user = user_get(nick); + pthread_mutex_unlock(&user_mutex); + + /* Calculate array index for lines. */ + int hour, min, time_i; + hour = atoi(hour_s); + min = atoi(min_s); + time_i = hour*4 + min / 15; + + user->lines[time_i]++; + channel->hours[time_i]++; + + /* Count words. */ + wchar_t wtext[TEXT_BUFFER_SIZE]; + mbstowcs(wtext, text, TEXT_BUFFER_SIZE); + user->characters += wcslen(wtext); + wchar_t word[TEXT_BUFFER_SIZE]; + wchar_t *end = wcschr(wtext, '\0'); + *word = '\0'; + int len = 0; + for(wchar_t *pos = wtext; pos < end; pos++) { + if(iswblank(*pos)) { + if(len) { + user->words++; + word[len] = '\0'; + char mbword[TEXT_BUFFER_SIZE]; + wcstombs(mbword, word, TEXT_BUFFER_SIZE); + pthread_mutex_lock(&word_mutex); + struct word_t *word_s = word_get(mbword); + pthread_mutex_unlock(&word_mutex); + word_s->count++; + } + len = 0; + *word = '\0'; + } else if(iswalpha(*pos)) { + word[len++] = towlower(*pos); + } else { + len = 0; + *word = '\0'; + } + } + if(len) { + user->words++; + word[len] = '\0'; + char mbword[TEXT_BUFFER_SIZE]; + wcstombs(mbword, word, TEXT_BUFFER_SIZE); + pthread_mutex_lock(&word_mutex); + struct word_t *word_s = word_get(mbword); + pthread_mutex_unlock(&word_mutex); + word_s->count++; + } + continue; + } + + rc = pcre_exec(rs->join, rs->join_e, line, strlen(line), 0, 0, ovector, 30); + if(rc > 0) { + char nick[NICK_BUFFER_SIZE]; + pcre_copy_named_substring(rs->join, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE); + pthread_mutex_lock(&user_mutex); + struct user_t *user = user_get(nick); + pthread_mutex_unlock(&user_mutex); + continue; + } + + rc = pcre_exec(rs->kick, rs->kick_e, line, strlen(line), 0, 0, ovector, 30); + if(rc > 0) { + char nick[NICK_BUFFER_SIZE], victim[NICK_BUFFER_SIZE]; + pcre_copy_named_substring(rs->kick, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE); + pcre_copy_named_substring(rs->kick, line, ovector, rc, "victim", victim, NICK_BUFFER_SIZE); + pthread_mutex_lock(&user_mutex); + struct user_t *user = user_get(nick), + *victim_user = user_get(victim); + pthread_mutex_unlock(&user_mutex); + user->kicks++; + victim_user->kicked++; + continue; + } + } +} + +struct thread_arg_t { + FILE *f; + struct channel_t *channel; + struct regexset_t *rs; +}; + +static void *thread_func(void *arg) { + struct thread_arg_t *ta = arg; + process_file(ta->f, ta->channel, ta->rs); +} + +void process(int thread_n) { + pthread_mutex_init(&user_mutex, NULL); + pthread_mutex_init(&word_mutex, NULL); + /* Parsing stuff goes here. */ + for(int chan_i = 0; chan_i < channel_get_count(); chan_i++) { + user_init(); + word_init(); + struct channel_t *channel = channel_get(chan_i); + printf("Channel %s\n", channel->name); + struct channel_file_t *file = channel->files; + while(file) { + struct regexset_t *rs = file->rs; + FILE *f = fopen(file->path, "r"); + if(!f) { + fprintf(stderr, "\tFailed to open %s\n", file->path); + file = file->next; + continue; + } else + printf("\tParsing %s\n", file->path); + + pthread_t *threads; + threads = malloc(sizeof(pthread_t) * thread_n); + struct thread_arg_t ta; + ta.f = f; + ta.channel = channel; + ta.rs = rs; + for(int i = 0; i < thread_n; i++) { + pthread_create(&threads[i], NULL, thread_func, &ta); + } + for(int i = 0; i < thread_n; i++) { + pthread_join(threads[i], NULL); + } + free(threads); + + fclose(f); + file = file->next; + } + export_xml(channel, users); + user_free(); + word_free(); + } + pthread_mutex_destroy(&user_mutex); + pthread_mutex_destroy(&word_mutex); +} |