From a34ddc5dd68c4530c91853507868684a9bc4b45e Mon Sep 17 00:00:00 2001 From: Jon Bergli Heier Date: Wed, 2 Jun 2010 17:52:32 +0200 Subject: Added a minimum word length config option, "wordlen_min". Also did some minor cleanup of the word add code in process_file. --- parsing.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) (limited to 'parsing.c') diff --git a/parsing.c b/parsing.c index 1172cd1..4ef3fb7 100644 --- a/parsing.c +++ b/parsing.c @@ -21,6 +21,19 @@ static pthread_mutex_t user_mutex, word_mutex, channel_mutex; static struct user_t *last_user = NULL; static int in_monolog = 0, monolog_len = 0; +static inline void add_word(struct user_t *user, wchar_t *word, int len) { + pthread_mutex_lock(&user_mutex); + user->words++; + pthread_mutex_unlock(&user_mutex); + word[len] = '\0'; + char mbword[TEXT_BUFFER_SIZE]; + wcstombs(mbword, word, TEXT_BUFFER_SIZE); + pthread_mutex_lock(&word_mutex); + struct word_t *word_s = word_get(mbword); + word_s->count++; + pthread_mutex_unlock(&word_mutex); +} + static void process_file(FILE *f, struct channel_t *channel, struct regexset_t *rs) { char line[LINE_BUFFER_SIZE]; @@ -79,17 +92,8 @@ static void process_file(FILE *f, struct channel_t *channel, struct regexset_t * int len = 0; for(wchar_t *pos = wtext; pos < end; pos++) { if(iswblank(*pos)) { - if(len) { - pthread_mutex_lock(&user_mutex); - user->words++; - pthread_mutex_unlock(&user_mutex); - word[len] = '\0'; - char mbword[TEXT_BUFFER_SIZE]; - wcstombs(mbword, word, TEXT_BUFFER_SIZE); - pthread_mutex_lock(&word_mutex); - struct word_t *word_s = word_get(mbword); - word_s->count++; - pthread_mutex_unlock(&word_mutex); + if(len >= ircstats_config.wordlen_min) { + add_word(user, word, len); } len = 0; *word = '\0'; @@ -100,17 +104,8 @@ static void process_file(FILE *f, struct channel_t *channel, struct regexset_t * *word = '\0'; } } - if(len) { - pthread_mutex_lock(&user_mutex); - user->words++; - pthread_mutex_unlock(&user_mutex); - word[len] = '\0'; - char mbword[TEXT_BUFFER_SIZE]; - wcstombs(mbword, word, TEXT_BUFFER_SIZE); - pthread_mutex_lock(&word_mutex); - struct word_t *word_s = word_get(mbword); - word_s->count++; - pthread_mutex_unlock(&word_mutex); + if(len >= ircstats_config.wordlen_min) { + add_word(user, word, len); } continue; } -- cgit v1.2.3