diff options
author | Jon Bergli Heier <snakebite@jvnv.net> | 2010-06-02 17:52:32 +0200 |
---|---|---|
committer | Jon Bergli Heier <snakebite@jvnv.net> | 2010-06-02 17:52:32 +0200 |
commit | a34ddc5dd68c4530c91853507868684a9bc4b45e (patch) | |
tree | dc50334c6a8181047bf9ba0a012c3aac93719c3b | |
parent | 6dd916eaedfd23a93ba7aa2e2b39511e7e907dec (diff) |
Added a minimum word length config option, "wordlen_min".
Also did some minor cleanup of the word add code in process_file.
-rw-r--r-- | config.c | 4 | ||||
-rw-r--r-- | config.h | 2 | ||||
-rw-r--r-- | parsing.c | 39 |
3 files changed, 22 insertions, 23 deletions
@@ -35,6 +35,10 @@ int cfg_init() { ircstats_config.monolog_min = 5; } + if(!config_lookup_int(&config, "wordlen_min", &ircstats_config.wordlen_min)) { + ircstats_config.wordlen_min = 3; + } + config_setting_t *regexes_setting = config_lookup(&config, "regexes"); if(!config_setting_is_aggregate(regexes_setting)) { fprintf(stderr, "Setting \"regexes\" must be an aggregate type.\n"); @@ -5,7 +5,7 @@ int cfg_init(); void cfg_free(); struct ircstats_config_t { - long int threads, monolog_min; + long int threads, monolog_min, wordlen_min; }; extern struct ircstats_config_t ircstats_config; @@ -21,6 +21,19 @@ static pthread_mutex_t user_mutex, word_mutex, channel_mutex; static struct user_t *last_user = NULL; static int in_monolog = 0, monolog_len = 0; +static inline void add_word(struct user_t *user, wchar_t *word, int len) { + pthread_mutex_lock(&user_mutex); + user->words++; + pthread_mutex_unlock(&user_mutex); + word[len] = '\0'; + char mbword[TEXT_BUFFER_SIZE]; + wcstombs(mbword, word, TEXT_BUFFER_SIZE); + pthread_mutex_lock(&word_mutex); + struct word_t *word_s = word_get(mbword); + word_s->count++; + pthread_mutex_unlock(&word_mutex); +} + static void process_file(FILE *f, struct channel_t *channel, struct regexset_t *rs) { char line[LINE_BUFFER_SIZE]; @@ -79,17 +92,8 @@ static void process_file(FILE *f, struct channel_t *channel, struct regexset_t * int len = 0; for(wchar_t *pos = wtext; pos < end; pos++) { if(iswblank(*pos)) { - if(len) { - pthread_mutex_lock(&user_mutex); - user->words++; - pthread_mutex_unlock(&user_mutex); - word[len] = '\0'; - char mbword[TEXT_BUFFER_SIZE]; - wcstombs(mbword, word, TEXT_BUFFER_SIZE); - pthread_mutex_lock(&word_mutex); - struct word_t *word_s = word_get(mbword); - word_s->count++; - pthread_mutex_unlock(&word_mutex); + if(len >= ircstats_config.wordlen_min) { + add_word(user, word, len); } len = 0; *word = '\0'; @@ -100,17 +104,8 @@ static void process_file(FILE *f, struct channel_t *channel, struct regexset_t * *word = '\0'; } } - if(len) { - pthread_mutex_lock(&user_mutex); - user->words++; - pthread_mutex_unlock(&user_mutex); - word[len] = '\0'; - char mbword[TEXT_BUFFER_SIZE]; - wcstombs(mbword, word, TEXT_BUFFER_SIZE); - pthread_mutex_lock(&word_mutex); - struct word_t *word_s = word_get(mbword); - word_s->count++; - pthread_mutex_unlock(&word_mutex); + if(len >= ircstats_config.wordlen_min) { + add_word(user, word, len); } continue; } |