From f7fbbc5428d601833168b1d6f60d7ef344f5cc88 Mon Sep 17 00:00:00 2001 From: Jon Bergli Heier Date: Sat, 5 Sep 2009 15:28:08 +0200 Subject: Add locale and multibyte support. Set LC_CTYPE to support locales. Use wchar_t to handle multibyte strings. Generate character tables for PCRE to support locales. --- main.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'main.c') diff --git a/main.c b/main.c index ee23264..6bed126 100644 --- a/main.c +++ b/main.c @@ -1,7 +1,9 @@ #include #include #include -#include +#include +#include +#include #include "config.h" #include "regexset.h" @@ -17,6 +19,9 @@ #define TIME_BUFFER_SIZE 0xf int main(int argc, char **argv) { + /* Set locale. */ + setlocale(LC_CTYPE, ""); + /* Regex sets must be initialized before config. */ rs_init(); channel_init(); @@ -70,21 +75,25 @@ int main(int argc, char **argv) { channel->hours[time_i]++; /* Count words. */ - char word[TEXT_BUFFER_SIZE]; - char *end = strchr(text, '\0'); + wchar_t wtext[TEXT_BUFFER_SIZE]; + mbstowcs(wtext, text, TEXT_BUFFER_SIZE); + wchar_t word[TEXT_BUFFER_SIZE]; + wchar_t *end = wcschr(wtext, '\0'); *word = '\0'; int len = 0; - for(char *pos = text; pos < end; pos++) { - if(isblank(*pos)) { + for(wchar_t *pos = wtext; pos < end; pos++) { + if(iswblank(*pos)) { if(len) { user->words++; word[len] = '\0'; - struct word_t *word_s = word_get(word); + char mbword[TEXT_BUFFER_SIZE]; + wcstombs(mbword, word, TEXT_BUFFER_SIZE); + struct word_t *word_s = word_get(mbword); word_s->count++; } len = 0; *word = '\0'; - } else if isalpha(*pos) { + } else if(iswalpha(*pos)) { word[len++] = *pos; } else { len = 0; @@ -94,7 +103,9 @@ int main(int argc, char **argv) { if(len) { user->words++; word[len] = '\0'; - struct word_t *word_s = word_get(word); + char mbword[TEXT_BUFFER_SIZE]; + wcstombs(mbword, word, TEXT_BUFFER_SIZE); + struct word_t *word_s = word_get(mbword); word_s->count++; } continue; -- cgit v1.2.3