From f7fbbc5428d601833168b1d6f60d7ef344f5cc88 Mon Sep 17 00:00:00 2001 From: Jon Bergli Heier Date: Sat, 5 Sep 2009 15:28:08 +0200 Subject: Add locale and multibyte support. Set LC_CTYPE to support locales. Use wchar_t to handle multibyte strings. Generate character tables for PCRE to support locales. --- main.c | 27 +++++++++++++++++++-------- nick.c | 5 ++++- regexset.c | 8 ++++++-- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/main.c b/main.c index ee23264..6bed126 100644 --- a/main.c +++ b/main.c @@ -1,7 +1,9 @@ #include #include #include -#include +#include +#include +#include #include "config.h" #include "regexset.h" @@ -17,6 +19,9 @@ #define TIME_BUFFER_SIZE 0xf int main(int argc, char **argv) { + /* Set locale. */ + setlocale(LC_CTYPE, ""); + /* Regex sets must be initialized before config. */ rs_init(); channel_init(); @@ -70,21 +75,25 @@ int main(int argc, char **argv) { channel->hours[time_i]++; /* Count words. */ - char word[TEXT_BUFFER_SIZE]; - char *end = strchr(text, '\0'); + wchar_t wtext[TEXT_BUFFER_SIZE]; + mbstowcs(wtext, text, TEXT_BUFFER_SIZE); + wchar_t word[TEXT_BUFFER_SIZE]; + wchar_t *end = wcschr(wtext, '\0'); *word = '\0'; int len = 0; - for(char *pos = text; pos < end; pos++) { - if(isblank(*pos)) { + for(wchar_t *pos = wtext; pos < end; pos++) { + if(iswblank(*pos)) { if(len) { user->words++; word[len] = '\0'; - struct word_t *word_s = word_get(word); + char mbword[TEXT_BUFFER_SIZE]; + wcstombs(mbword, word, TEXT_BUFFER_SIZE); + struct word_t *word_s = word_get(mbword); word_s->count++; } len = 0; *word = '\0'; - } else if isalpha(*pos) { + } else if(iswalpha(*pos)) { word[len++] = *pos; } else { len = 0; @@ -94,7 +103,9 @@ int main(int argc, char **argv) { if(len) { user->words++; word[len] = '\0'; - struct word_t *word_s = word_get(word); + char mbword[TEXT_BUFFER_SIZE]; + wcstombs(mbword, word, TEXT_BUFFER_SIZE); + struct word_t *word_s = word_get(mbword); word_s->count++; } continue; diff --git a/nick.c b/nick.c index 54a0b2f..181a3a1 100644 --- a/nick.c +++ b/nick.c @@ -6,9 +6,11 @@ #include "nick.h" struct nick_t *nicks; +const unsigned char *nick_pcre_tables; void nick_init() { nicks = NULL; + nick_pcre_tables = pcre_maketables(); } struct nick_t *nick_add(const char *name) { @@ -43,7 +45,7 @@ int nick_regex_add(struct nick_t *nick, const char *re_s) { const char *error; int erroffset; - nre->re = pcre_compile(re_s, 0, &error, &erroffset, NULL); + nre->re = pcre_compile(re_s, 0, &error, &erroffset, nick_pcre_tables); if(!nre->re) { /* Copied from re_error in regexset.c */ fprintf(stderr, "Nick RE failed to compile: %s\n", error); @@ -102,4 +104,5 @@ void nick_free() { nick = next; } free(nicks); + pcre_free((void*)nick_pcre_tables); } diff --git a/regexset.c b/regexset.c index c816217..5e2d002 100644 --- a/regexset.c +++ b/regexset.c @@ -7,10 +7,12 @@ struct regexset_t *regexes; int rs_count; +const unsigned char *rs_pcre_tables; void rs_init() { regexes = NULL; rs_count = 0; + rs_pcre_tables = pcre_maketables(); } static void re_error(const char *name, const char *pattern, const char *error, int erroffset) { @@ -34,7 +36,7 @@ struct regexset_t *rs_add(const char *text, const char *join) { const char *error; int erroffset; - rs->text = pcre_compile(text, 0, &error, &erroffset, NULL); + rs->text = pcre_compile(text, 0, &error, &erroffset, rs_pcre_tables); if(rs->text == NULL) { re_error("text", text, error, erroffset); return NULL; @@ -43,7 +45,7 @@ struct regexset_t *rs_add(const char *text, const char *join) { /* Free compiled patterns from this point when failing. */ - rs->join = pcre_compile(join, 0, &error, &erroffset, NULL); + rs->join = pcre_compile(join, 0, &error, &erroffset, rs_pcre_tables); if(rs->join == NULL) { pcre_free(rs->text); if(rs->text_e) @@ -71,4 +73,6 @@ void rs_free() { } free(regexes); regexes = NULL; + pcre_free((void*)rs_pcre_tables); + rs_pcre_tables = NULL; } -- cgit v1.2.3