diff options
author | Jon Bergli Heier <snakebite@jvnv.net> | 2009-09-05 15:28:08 +0200 |
---|---|---|
committer | Jon Bergli Heier <snakebite@jvnv.net> | 2009-09-05 15:28:08 +0200 |
commit | f7fbbc5428d601833168b1d6f60d7ef344f5cc88 (patch) | |
tree | e2ee9735d7af3e6d86b575f2f9b332804c941e71 | |
parent | 4e80effe0ec3edcef9e2f240864eacab9cdbb035 (diff) |
Add locale and multibyte support.
Set LC_CTYPE to support locales.
Use wchar_t to handle multibyte strings.
Generate character tables for PCRE to support locales.
-rw-r--r-- | main.c | 27 | ||||
-rw-r--r-- | nick.c | 5 | ||||
-rw-r--r-- | regexset.c | 8 |
3 files changed, 29 insertions, 11 deletions
@@ -1,7 +1,9 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <ctype.h> +#include <wctype.h> +#include <wchar.h> +#include <locale.h> #include "config.h" #include "regexset.h" @@ -17,6 +19,9 @@ #define TIME_BUFFER_SIZE 0xf int main(int argc, char **argv) { + /* Set locale. */ + setlocale(LC_CTYPE, ""); + /* Regex sets must be initialized before config. */ rs_init(); channel_init(); @@ -70,21 +75,25 @@ int main(int argc, char **argv) { channel->hours[time_i]++; /* Count words. */ - char word[TEXT_BUFFER_SIZE]; - char *end = strchr(text, '\0'); + wchar_t wtext[TEXT_BUFFER_SIZE]; + mbstowcs(wtext, text, TEXT_BUFFER_SIZE); + wchar_t word[TEXT_BUFFER_SIZE]; + wchar_t *end = wcschr(wtext, '\0'); *word = '\0'; int len = 0; - for(char *pos = text; pos < end; pos++) { - if(isblank(*pos)) { + for(wchar_t *pos = wtext; pos < end; pos++) { + if(iswblank(*pos)) { if(len) { user->words++; word[len] = '\0'; - struct word_t *word_s = word_get(word); + char mbword[TEXT_BUFFER_SIZE]; + wcstombs(mbword, word, TEXT_BUFFER_SIZE); + struct word_t *word_s = word_get(mbword); word_s->count++; } len = 0; *word = '\0'; - } else if isalpha(*pos) { + } else if(iswalpha(*pos)) { word[len++] = *pos; } else { len = 0; @@ -94,7 +103,9 @@ int main(int argc, char **argv) { if(len) { user->words++; word[len] = '\0'; - struct word_t *word_s = word_get(word); + char mbword[TEXT_BUFFER_SIZE]; + wcstombs(mbword, word, TEXT_BUFFER_SIZE); + struct word_t *word_s = word_get(mbword); word_s->count++; } continue; @@ -6,9 +6,11 @@ #include "nick.h" struct nick_t *nicks; +const unsigned char *nick_pcre_tables; void nick_init() { nicks = NULL; + nick_pcre_tables = pcre_maketables(); } struct nick_t *nick_add(const char *name) { @@ -43,7 +45,7 @@ int nick_regex_add(struct nick_t *nick, const char *re_s) { const char *error; int erroffset; - nre->re = pcre_compile(re_s, 0, &error, &erroffset, NULL); + nre->re = pcre_compile(re_s, 0, &error, &erroffset, nick_pcre_tables); if(!nre->re) { /* Copied from re_error in regexset.c */ fprintf(stderr, "Nick RE failed to compile: %s\n", error); @@ -102,4 +104,5 @@ void nick_free() { nick = next; } free(nicks); + pcre_free((void*)nick_pcre_tables); } @@ -7,10 +7,12 @@ struct regexset_t *regexes; int rs_count; +const unsigned char *rs_pcre_tables; void rs_init() { regexes = NULL; rs_count = 0; + rs_pcre_tables = pcre_maketables(); } static void re_error(const char *name, const char *pattern, const char *error, int erroffset) { @@ -34,7 +36,7 @@ struct regexset_t *rs_add(const char *text, const char *join) { const char *error; int erroffset; - rs->text = pcre_compile(text, 0, &error, &erroffset, NULL); + rs->text = pcre_compile(text, 0, &error, &erroffset, rs_pcre_tables); if(rs->text == NULL) { re_error("text", text, error, erroffset); return NULL; @@ -43,7 +45,7 @@ struct regexset_t *rs_add(const char *text, const char *join) { /* Free compiled patterns from this point when failing. */ - rs->join = pcre_compile(join, 0, &error, &erroffset, NULL); + rs->join = pcre_compile(join, 0, &error, &erroffset, rs_pcre_tables); if(rs->join == NULL) { pcre_free(rs->text); if(rs->text_e) @@ -71,4 +73,6 @@ void rs_free() { } free(regexes); regexes = NULL; + pcre_free((void*)rs_pcre_tables); + rs_pcre_tables = NULL; } |