From 3efd96ff79f4f5669c3422a1f592f09176c77121 Mon Sep 17 00:00:00 2001
From: Jon Bergli Heier <snakebite@jvnv.net>
Date: Sat, 15 Aug 2009 18:07:02 +0200
Subject: Added a hash table to keep track of words. Moved the sdbm hash
 function into sdbm.c. Init and free users and words inside the channel loop.
 Increased the size of the user hash table to 1000.

---
 Makefile |  2 +-
 main.c   | 20 +++++++++++++++-----
 sdbm.c   | 10 ++++++++++
 sdbm.h   |  6 ++++++
 user.c   | 10 +---------
 user.h   |  2 +-
 word.c   | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 word.h   | 17 +++++++++++++++++
 8 files changed, 100 insertions(+), 16 deletions(-)
 create mode 100644 sdbm.c
 create mode 100644 sdbm.h
 create mode 100644 word.c
 create mode 100644 word.h

diff --git a/Makefile b/Makefile
index 680b520..f3e182f 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ CFLAGS += $(shell pkg-config --cflags libconfig)
 CFLAGS += $(shell pcre-config --cflags)
 LDFLAGS += $(shell pkg-config --libs libconfig)
 LDFLAGS += $(shell pcre-config --libs)
-OBJECTS = main.o config.o regexset.o channel.o user.o
+OBJECTS = main.o config.o regexset.o channel.o user.o word.o sdbm.o
 TARGET = ircstats
 
 all: $(TARGET)
diff --git a/main.c b/main.c
index 300a424..f5b743b 100644
--- a/main.c
+++ b/main.c
@@ -6,6 +6,7 @@
 #include "regexset.h"
 #include "channel.h"
 #include "user.h"
+#include "word.h"
 
 #define NICK_BUFFER_SIZE 0x100
 #define TEXT_BUFFER_SIZE 0x400
@@ -22,10 +23,11 @@ int main(int argc, char **argv) {
 		channel_free();
 		return 1;
 	}
-	user_init();
 
 	/* Parsing stuff goes here. */
 	for(int chan_i = 0; chan_i < channel_get_count(); chan_i++) {
+		user_init();
+		word_init();
 		struct channel_t *channel = channel_get(chan_i);
 		printf("Channel %s\n", channel->name);
 		struct channel_file_t *file = channel->files;
@@ -59,9 +61,12 @@ int main(int argc, char **argv) {
 					int len = 0;
 					for(char *pos = text; pos < end; pos++) {
 						if(isblank(*pos)) {
-							if(len)
+							if(len) {
 								user->words++;
-							word[len] = '\0';
+								word[len] = '\0';
+								struct word_t *word_s = word_get(word);
+								word_s->count++;
+							}
 							len = 0;
 							*word = '\0';
 						} else if isalpha(*pos) {
@@ -71,8 +76,12 @@ int main(int argc, char **argv) {
 							*word = '\0';
 						}
 					}
-					if(len)
+					if(len) {
 						user->words++;
+						word[len] = '\0';
+						struct word_t *word_s = word_get(word);
+						word_s->count++;
+					}
 					continue;
 				}
 
@@ -88,9 +97,10 @@ int main(int argc, char **argv) {
 
 			file = file->next;
 		}
+		user_free();
+		word_free();
 	}
 
-	user_free();
 	cfg_free();
 	channel_free();
 	rs_free();
diff --git a/sdbm.c b/sdbm.c
new file mode 100644
index 0000000..8c97bd2
--- /dev/null
+++ b/sdbm.c
@@ -0,0 +1,10 @@
+#include "sdbm.h"
+
+unsigned long sdbm(char *str) {
+	unsigned long hash = 0;
+	int c;
+	while(c = *str++) {
+		hash = c + (hash << 6) + (hash << 16) - hash;
+	}
+	return hash;
+}
diff --git a/sdbm.h b/sdbm.h
new file mode 100644
index 0000000..5f9228b
--- /dev/null
+++ b/sdbm.h
@@ -0,0 +1,6 @@
+#ifndef _SDBM_H_
+#define _SDBM_H_
+
+unsigned long sdbm(char *str);
+
+#endif
diff --git a/user.c b/user.c
index 064007b..a9e2927 100644
--- a/user.c
+++ b/user.c
@@ -2,6 +2,7 @@
 #include <string.h>
 
 #include "user.h"
+#include "sdbm.h"
 
 struct user_t *users;
 
@@ -10,15 +11,6 @@ void user_init() {
 	memset(users, 0, sizeof(struct user_t) * USERS_MAX);
 }
 
-static unsigned long sdbm(char *str) {
-	unsigned long hash = 0;
-	int c;
-	while(c = *str++) {
-		hash = c + (hash << 6) + (hash << 16) - hash;
-	}
-	return hash;
-}
-
 struct user_t *user_get(char *nick) {
 	unsigned long hash = sdbm(nick);
 	int index = hash % USERS_MAX;
diff --git a/user.h b/user.h
index b0e602b..0bf3686 100644
--- a/user.h
+++ b/user.h
@@ -1,7 +1,7 @@
 #ifndef _USER_H_
 #define _USER_H_
 
-#define USERS_MAX 100
+#define USERS_MAX 1000
 
 struct user_t {
 	unsigned long hash;
diff --git a/word.c b/word.c
new file mode 100644
index 0000000..bf1fada
--- /dev/null
+++ b/word.c
@@ -0,0 +1,49 @@
+#include <stdlib.h>
+#include <string.h>
+
+#include "word.h"
+#include "sdbm.h"
+
+struct word_t *words;
+
+void word_init() {
+	words = malloc(sizeof(struct word_t) * WORDS_MAX);
+	memset(words, 0, sizeof(struct word_t) * WORDS_MAX);
+}
+
+struct word_t *word_get(char *name) {
+	unsigned long hash = sdbm(name);
+	int index = hash % WORDS_MAX;
+
+	struct word_t *word = &words[index];
+	/* Fetch next word if hash doesn't match or the word differ (hash matching first). */
+	while((word->hash != hash || (word->name && strcmp(name, word->name) != 0)) && word->next) word = word->next;
+	/* Add new word if the word exists, but both hash and name doesn't match. */
+	if(word->name && (word->hash != hash || strcmp(name, word->name) != 0)) {
+		struct word_t *temp_word = malloc(sizeof(struct word_t));
+		word->next = temp_word;
+		word = temp_word;
+		word->name = NULL;
+	}
+	if(!word->name) {
+		word->hash = hash;
+		word->name = strdup(name);
+		word->count = 0;
+		word->next = NULL;
+	}
+	return word;
+}
+
+void word_free() {
+	struct word_t *word;
+	for(int i = 0; i < WORDS_MAX; i++) {
+		word = words[i].next;
+		while(word) {
+			struct word_t *temp = word->next;
+			free(word->name);
+			free(word);
+			word = temp;
+		}
+	}
+	free(words);
+}
diff --git a/word.h b/word.h
new file mode 100644
index 0000000..b0c5d15
--- /dev/null
+++ b/word.h
@@ -0,0 +1,17 @@
+#ifndef _WORD_H_
+#define _WORD_H_
+
+#define WORDS_MAX 10000
+
+struct word_t {
+	unsigned long hash;
+	char *name;
+	unsigned long count;
+	struct word_t *next;
+};
+
+void word_init();
+struct word_t *word_get(char *name);
+void word_free();
+
+#endif
-- 
cgit v1.2.3