summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--config.c4
-rw-r--r--config.h2
-rw-r--r--parsing.c39
3 files changed, 22 insertions, 23 deletions
diff --git a/config.c b/config.c
index 0ae6beb..2cb6aa6 100644
--- a/config.c
+++ b/config.c
@@ -35,6 +35,10 @@ int cfg_init() {
ircstats_config.monolog_min = 5;
}
+ if(!config_lookup_int(&config, "wordlen_min", &ircstats_config.wordlen_min)) {
+ ircstats_config.wordlen_min = 3;
+ }
+
config_setting_t *regexes_setting = config_lookup(&config, "regexes");
if(!config_setting_is_aggregate(regexes_setting)) {
fprintf(stderr, "Setting \"regexes\" must be an aggregate type.\n");
diff --git a/config.h b/config.h
index 3b42ee8..775d7eb 100644
--- a/config.h
+++ b/config.h
@@ -5,7 +5,7 @@ int cfg_init();
void cfg_free();
struct ircstats_config_t {
- long int threads, monolog_min;
+ long int threads, monolog_min, wordlen_min;
};
extern struct ircstats_config_t ircstats_config;
diff --git a/parsing.c b/parsing.c
index 1172cd1..4ef3fb7 100644
--- a/parsing.c
+++ b/parsing.c
@@ -21,6 +21,19 @@ static pthread_mutex_t user_mutex, word_mutex, channel_mutex;
static struct user_t *last_user = NULL;
static int in_monolog = 0, monolog_len = 0;
+static inline void add_word(struct user_t *user, wchar_t *word, int len) {
+ pthread_mutex_lock(&user_mutex);
+ user->words++;
+ pthread_mutex_unlock(&user_mutex);
+ word[len] = '\0';
+ char mbword[TEXT_BUFFER_SIZE];
+ wcstombs(mbword, word, TEXT_BUFFER_SIZE);
+ pthread_mutex_lock(&word_mutex);
+ struct word_t *word_s = word_get(mbword);
+ word_s->count++;
+ pthread_mutex_unlock(&word_mutex);
+}
+
static void process_file(FILE *f, struct channel_t *channel, struct regexset_t *rs) {
char line[LINE_BUFFER_SIZE];
@@ -79,17 +92,8 @@ static void process_file(FILE *f, struct channel_t *channel, struct regexset_t *
int len = 0;
for(wchar_t *pos = wtext; pos < end; pos++) {
if(iswblank(*pos)) {
- if(len) {
- pthread_mutex_lock(&user_mutex);
- user->words++;
- pthread_mutex_unlock(&user_mutex);
- word[len] = '\0';
- char mbword[TEXT_BUFFER_SIZE];
- wcstombs(mbword, word, TEXT_BUFFER_SIZE);
- pthread_mutex_lock(&word_mutex);
- struct word_t *word_s = word_get(mbword);
- word_s->count++;
- pthread_mutex_unlock(&word_mutex);
+ if(len >= ircstats_config.wordlen_min) {
+ add_word(user, word, len);
}
len = 0;
*word = '\0';
@@ -100,17 +104,8 @@ static void process_file(FILE *f, struct channel_t *channel, struct regexset_t *
*word = '\0';
}
}
- if(len) {
- pthread_mutex_lock(&user_mutex);
- user->words++;
- pthread_mutex_unlock(&user_mutex);
- word[len] = '\0';
- char mbword[TEXT_BUFFER_SIZE];
- wcstombs(mbword, word, TEXT_BUFFER_SIZE);
- pthread_mutex_lock(&word_mutex);
- struct word_t *word_s = word_get(mbword);
- word_s->count++;
- pthread_mutex_unlock(&word_mutex);
+ if(len >= ircstats_config.wordlen_min) {
+ add_word(user, word, len);
}
continue;
}