summaryrefslogtreecommitdiff
path: root/parsing.c
diff options
context:
space:
mode:
authorJon Bergli Heier <snakebite@jvnv.net>2010-06-02 17:52:32 +0200
committerJon Bergli Heier <snakebite@jvnv.net>2010-06-02 17:52:32 +0200
commita34ddc5dd68c4530c91853507868684a9bc4b45e (patch)
treedc50334c6a8181047bf9ba0a012c3aac93719c3b /parsing.c
parent6dd916eaedfd23a93ba7aa2e2b39511e7e907dec (diff)
Added a minimum word length config option, "wordlen_min".
Also did some minor cleanup of the word add code in process_file.
Diffstat (limited to 'parsing.c')
-rw-r--r--parsing.c39
1 files changed, 17 insertions, 22 deletions
diff --git a/parsing.c b/parsing.c
index 1172cd1..4ef3fb7 100644
--- a/parsing.c
+++ b/parsing.c
@@ -21,6 +21,19 @@ static pthread_mutex_t user_mutex, word_mutex, channel_mutex;
static struct user_t *last_user = NULL;
static int in_monolog = 0, monolog_len = 0;
+static inline void add_word(struct user_t *user, wchar_t *word, int len) {
+ pthread_mutex_lock(&user_mutex);
+ user->words++;
+ pthread_mutex_unlock(&user_mutex);
+ word[len] = '\0';
+ char mbword[TEXT_BUFFER_SIZE];
+ wcstombs(mbword, word, TEXT_BUFFER_SIZE);
+ pthread_mutex_lock(&word_mutex);
+ struct word_t *word_s = word_get(mbword);
+ word_s->count++;
+ pthread_mutex_unlock(&word_mutex);
+}
+
static void process_file(FILE *f, struct channel_t *channel, struct regexset_t *rs) {
char line[LINE_BUFFER_SIZE];
@@ -79,17 +92,8 @@ static void process_file(FILE *f, struct channel_t *channel, struct regexset_t *
int len = 0;
for(wchar_t *pos = wtext; pos < end; pos++) {
if(iswblank(*pos)) {
- if(len) {
- pthread_mutex_lock(&user_mutex);
- user->words++;
- pthread_mutex_unlock(&user_mutex);
- word[len] = '\0';
- char mbword[TEXT_BUFFER_SIZE];
- wcstombs(mbword, word, TEXT_BUFFER_SIZE);
- pthread_mutex_lock(&word_mutex);
- struct word_t *word_s = word_get(mbword);
- word_s->count++;
- pthread_mutex_unlock(&word_mutex);
+ if(len >= ircstats_config.wordlen_min) {
+ add_word(user, word, len);
}
len = 0;
*word = '\0';
@@ -100,17 +104,8 @@ static void process_file(FILE *f, struct channel_t *channel, struct regexset_t *
*word = '\0';
}
}
- if(len) {
- pthread_mutex_lock(&user_mutex);
- user->words++;
- pthread_mutex_unlock(&user_mutex);
- word[len] = '\0';
- char mbword[TEXT_BUFFER_SIZE];
- wcstombs(mbword, word, TEXT_BUFFER_SIZE);
- pthread_mutex_lock(&word_mutex);
- struct word_t *word_s = word_get(mbword);
- word_s->count++;
- pthread_mutex_unlock(&word_mutex);
+ if(len >= ircstats_config.wordlen_min) {
+ add_word(user, word, len);
}
continue;
}