summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJon Bergli Heier <snakebite@jvnv.net>2009-09-05 15:28:08 +0200
committerJon Bergli Heier <snakebite@jvnv.net>2009-09-05 15:28:08 +0200
commitf7fbbc5428d601833168b1d6f60d7ef344f5cc88 (patch)
treee2ee9735d7af3e6d86b575f2f9b332804c941e71
parent4e80effe0ec3edcef9e2f240864eacab9cdbb035 (diff)
Add locale and multibyte support.
Set LC_CTYPE to support locales. Use wchar_t to handle multibyte strings. Generate character tables for PCRE to support locales.
-rw-r--r--main.c27
-rw-r--r--nick.c5
-rw-r--r--regexset.c8
3 files changed, 29 insertions, 11 deletions
diff --git a/main.c b/main.c
index ee23264..6bed126 100644
--- a/main.c
+++ b/main.c
@@ -1,7 +1,9 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <ctype.h>
+#include <wctype.h>
+#include <wchar.h>
+#include <locale.h>
#include "config.h"
#include "regexset.h"
@@ -17,6 +19,9 @@
#define TIME_BUFFER_SIZE 0xf
int main(int argc, char **argv) {
+ /* Set locale. */
+ setlocale(LC_CTYPE, "");
+
/* Regex sets must be initialized before config. */
rs_init();
channel_init();
@@ -70,21 +75,25 @@ int main(int argc, char **argv) {
channel->hours[time_i]++;
/* Count words. */
- char word[TEXT_BUFFER_SIZE];
- char *end = strchr(text, '\0');
+ wchar_t wtext[TEXT_BUFFER_SIZE];
+ mbstowcs(wtext, text, TEXT_BUFFER_SIZE);
+ wchar_t word[TEXT_BUFFER_SIZE];
+ wchar_t *end = wcschr(wtext, '\0');
*word = '\0';
int len = 0;
- for(char *pos = text; pos < end; pos++) {
- if(isblank(*pos)) {
+ for(wchar_t *pos = wtext; pos < end; pos++) {
+ if(iswblank(*pos)) {
if(len) {
user->words++;
word[len] = '\0';
- struct word_t *word_s = word_get(word);
+ char mbword[TEXT_BUFFER_SIZE];
+ wcstombs(mbword, word, TEXT_BUFFER_SIZE);
+ struct word_t *word_s = word_get(mbword);
word_s->count++;
}
len = 0;
*word = '\0';
- } else if isalpha(*pos) {
+ } else if(iswalpha(*pos)) {
word[len++] = *pos;
} else {
len = 0;
@@ -94,7 +103,9 @@ int main(int argc, char **argv) {
if(len) {
user->words++;
word[len] = '\0';
- struct word_t *word_s = word_get(word);
+ char mbword[TEXT_BUFFER_SIZE];
+ wcstombs(mbword, word, TEXT_BUFFER_SIZE);
+ struct word_t *word_s = word_get(mbword);
word_s->count++;
}
continue;
diff --git a/nick.c b/nick.c
index 54a0b2f..181a3a1 100644
--- a/nick.c
+++ b/nick.c
@@ -6,9 +6,11 @@
#include "nick.h"
struct nick_t *nicks;
+const unsigned char *nick_pcre_tables;
void nick_init() {
nicks = NULL;
+ nick_pcre_tables = pcre_maketables();
}
struct nick_t *nick_add(const char *name) {
@@ -43,7 +45,7 @@ int nick_regex_add(struct nick_t *nick, const char *re_s) {
const char *error;
int erroffset;
- nre->re = pcre_compile(re_s, 0, &error, &erroffset, NULL);
+ nre->re = pcre_compile(re_s, 0, &error, &erroffset, nick_pcre_tables);
if(!nre->re) {
/* Copied from re_error in regexset.c */
fprintf(stderr, "Nick RE failed to compile: %s\n", error);
@@ -102,4 +104,5 @@ void nick_free() {
nick = next;
}
free(nicks);
+ pcre_free((void*)nick_pcre_tables);
}
diff --git a/regexset.c b/regexset.c
index c816217..5e2d002 100644
--- a/regexset.c
+++ b/regexset.c
@@ -7,10 +7,12 @@
struct regexset_t *regexes;
int rs_count;
+const unsigned char *rs_pcre_tables;
void rs_init() {
regexes = NULL;
rs_count = 0;
+ rs_pcre_tables = pcre_maketables();
}
static void re_error(const char *name, const char *pattern, const char *error, int erroffset) {
@@ -34,7 +36,7 @@ struct regexset_t *rs_add(const char *text, const char *join) {
const char *error;
int erroffset;
- rs->text = pcre_compile(text, 0, &error, &erroffset, NULL);
+ rs->text = pcre_compile(text, 0, &error, &erroffset, rs_pcre_tables);
if(rs->text == NULL) {
re_error("text", text, error, erroffset);
return NULL;
@@ -43,7 +45,7 @@ struct regexset_t *rs_add(const char *text, const char *join) {
/* Free compiled patterns from this point when failing. */
- rs->join = pcre_compile(join, 0, &error, &erroffset, NULL);
+ rs->join = pcre_compile(join, 0, &error, &erroffset, rs_pcre_tables);
if(rs->join == NULL) {
pcre_free(rs->text);
if(rs->text_e)
@@ -71,4 +73,6 @@ void rs_free() {
}
free(regexes);
regexes = NULL;
+ pcre_free((void*)rs_pcre_tables);
+ rs_pcre_tables = NULL;
}