1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "config.h"
#include "regexset.h"
#include "channel.h"
#include "user.h"
#include "word.h"
#include "export_xml.h"
#include "nick.h"
#define NICK_BUFFER_SIZE 0x100
#define TEXT_BUFFER_SIZE 0x400
#define LINE_BUFFER_SIZE 0x400
#define TIME_BUFFER_SIZE 0xf
int main(int argc, char **argv) {
/* Regex sets must be initialized before config. */
rs_init();
channel_init();
nick_init();
if(!cfg_init()) {
/* Free any registered regex sets and channels when config fails.
Config will fail if a regex set fails to compile all parts. */
rs_free();
channel_free();
return 1;
}
/* Parsing stuff goes here. */
for(int chan_i = 0; chan_i < channel_get_count(); chan_i++) {
user_init();
word_init();
struct channel_t *channel = channel_get(chan_i);
printf("Channel %s\n", channel->name);
struct channel_file_t *file = channel->files;
while(file) {
struct regexset_t *rs = file->rs;
FILE *f = fopen(file->path, "r");
if(!f) {
fprintf(stderr, "\tFailed to open %s\n", file->path);
file = file->next;
continue;
} else
printf("\tParsing %s\n", file->path);
char line[LINE_BUFFER_SIZE];
while(fgets(line, LINE_BUFFER_SIZE, f)) {
int rc;
int ovector[30];
rc = pcre_exec(rs->text, rs->text_e, line, strlen(line), 0, 0, ovector, 30);
if(rc > 0) {
char nick[NICK_BUFFER_SIZE], text[TEXT_BUFFER_SIZE], hour_s[TIME_BUFFER_SIZE], min_s[TIME_BUFFER_SIZE];
pcre_copy_named_substring(rs->text, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE);
pcre_copy_named_substring(rs->text, line, ovector, rc, "text", text, TEXT_BUFFER_SIZE);
pcre_copy_named_substring(rs->text, line, ovector, rc, "hour", hour_s, TIME_BUFFER_SIZE);
pcre_copy_named_substring(rs->text, line, ovector, rc, "minute", min_s, TIME_BUFFER_SIZE);
struct user_t *user = user_get(nick);
/* Calculate array index for lines. */
int hour, min, time_i;
hour = atoi(hour_s);
min = atoi(min_s);
time_i = hour*4 + min / 15;
user->lines[time_i]++;
channel->hours[time_i]++;
/* Count words. */
char word[TEXT_BUFFER_SIZE];
char *end = strchr(text, '\0');
*word = '\0';
int len = 0;
for(char *pos = text; pos < end; pos++) {
if(isblank(*pos)) {
if(len) {
user->words++;
word[len] = '\0';
struct word_t *word_s = word_get(word);
word_s->count++;
}
len = 0;
*word = '\0';
} else if isalpha(*pos) {
word[len++] = *pos;
} else {
len = 0;
*word = '\0';
}
}
if(len) {
user->words++;
word[len] = '\0';
struct word_t *word_s = word_get(word);
word_s->count++;
}
continue;
}
rc = pcre_exec(rs->join, rs->join_e, line, strlen(line), 0, 0, ovector, 30);
if(rc > 0) {
char nick[NICK_BUFFER_SIZE];
pcre_copy_named_substring(rs->join, line, ovector, rc, "nick", nick, NICK_BUFFER_SIZE);
struct user_t *user = user_get(nick);
continue;
}
}
fclose(f);
file = file->next;
}
export_xml(channel, users);
user_free();
word_free();
}
nick_free();
cfg_free();
channel_free();
rs_free();
return 0;
}
|