diff options
author | Jon Bergli Heier <snakebite@jvnv.net> | 2010-06-02 17:52:32 +0200 |
---|---|---|
committer | Jon Bergli Heier <snakebite@jvnv.net> | 2010-06-02 17:52:32 +0200 |
commit | a34ddc5dd68c4530c91853507868684a9bc4b45e (patch) | |
tree | dc50334c6a8181047bf9ba0a012c3aac93719c3b /parsing.c | |
parent | 6dd916eaedfd23a93ba7aa2e2b39511e7e907dec (diff) |
Added a minimum word length config option, "wordlen_min".
Also did some minor cleanup of the word add code in process_file.
Diffstat (limited to 'parsing.c')
-rw-r--r-- | parsing.c | 39 |
1 files changed, 17 insertions, 22 deletions
@@ -21,6 +21,19 @@ static pthread_mutex_t user_mutex, word_mutex, channel_mutex; static struct user_t *last_user = NULL; static int in_monolog = 0, monolog_len = 0; +static inline void add_word(struct user_t *user, wchar_t *word, int len) { + pthread_mutex_lock(&user_mutex); + user->words++; + pthread_mutex_unlock(&user_mutex); + word[len] = '\0'; + char mbword[TEXT_BUFFER_SIZE]; + wcstombs(mbword, word, TEXT_BUFFER_SIZE); + pthread_mutex_lock(&word_mutex); + struct word_t *word_s = word_get(mbword); + word_s->count++; + pthread_mutex_unlock(&word_mutex); +} + static void process_file(FILE *f, struct channel_t *channel, struct regexset_t *rs) { char line[LINE_BUFFER_SIZE]; @@ -79,17 +92,8 @@ static void process_file(FILE *f, struct channel_t *channel, struct regexset_t * int len = 0; for(wchar_t *pos = wtext; pos < end; pos++) { if(iswblank(*pos)) { - if(len) { - pthread_mutex_lock(&user_mutex); - user->words++; - pthread_mutex_unlock(&user_mutex); - word[len] = '\0'; - char mbword[TEXT_BUFFER_SIZE]; - wcstombs(mbword, word, TEXT_BUFFER_SIZE); - pthread_mutex_lock(&word_mutex); - struct word_t *word_s = word_get(mbword); - word_s->count++; - pthread_mutex_unlock(&word_mutex); + if(len >= ircstats_config.wordlen_min) { + add_word(user, word, len); } len = 0; *word = '\0'; @@ -100,17 +104,8 @@ static void process_file(FILE *f, struct channel_t *channel, struct regexset_t * *word = '\0'; } } - if(len) { - pthread_mutex_lock(&user_mutex); - user->words++; - pthread_mutex_unlock(&user_mutex); - word[len] = '\0'; - char mbword[TEXT_BUFFER_SIZE]; - wcstombs(mbword, word, TEXT_BUFFER_SIZE); - pthread_mutex_lock(&word_mutex); - struct word_t *word_s = word_get(mbword); - word_s->count++; - pthread_mutex_unlock(&word_mutex); + if(len >= ircstats_config.wordlen_min) { + add_word(user, word, len); } continue; } |