Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-ports-gnome
Path: blob/main/japanese/awffull/files/awffull-ja.diff
16135 views
1
--- src/awffull.c.orig 2008-12-13 11:28:35.000000000 +0900
2
+++ src/awffull.c 2008-12-31 16:43:45.000000000 +0900
3
@@ -37,6 +37,9 @@
4
/* STANDARD INCLUDES */
5
/*********************************************/
6
#include "awffull.h" /* main header */
7
+#ifdef HAVE_ICONV
8
+#include <iconv.h>
9
+#endif
10
11
/* internal function prototypes */
12
13
@@ -137,6 +140,10 @@ static char const ab_month_name[][4] = {
14
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
15
};
16
17
+#ifdef HAVE_ICONV
18
+iconv_t cd_from_sjis, cd_from_eucj;
19
+#endif
20
+
21
/*********************************************/
22
/* MAIN - start here */
23
/*********************************************/
24
@@ -339,6 +346,11 @@ main(int argc, char *argv[])
25
26
start_time = times(&mytms);
27
28
+#ifdef HAVE_ICONV
29
+ cd_from_sjis = iconv_open("UTF-8", "Shift_JIS");
30
+ cd_from_eucj = iconv_open("UTF-8", "EUC-JP");
31
+#endif
32
+
33
/*********************************************
34
* MAIN PROCESS LOOP - read through log file *
35
*********************************************/
36
@@ -801,9 +813,17 @@ main(int argc, char *argv[])
37
}
38
39
del_htabs();
40
+#ifdef HAVE_ICONV
41
+ iconv_close(cd_from_sjis);
42
+ iconv_close(cd_from_eucj);
43
+#endif
44
/* Whew, all done! Exit with completion status (0) */
45
exit(0);
46
} else {
47
+#ifdef HAVE_ICONV
48
+ iconv_close(cd_from_sjis);
49
+ iconv_close(cd_from_eucj);
50
+#endif
51
/* No valid records found... exit with error (1) */
52
VPRINT(VERBOSE1, "%s\n", _("No valid records found!"));
53
exit(1);
54
@@ -1740,6 +1760,26 @@ unescape(char *str)
55
if (!str)
56
return NULL; /* make sure strings valid */
57
58
+ /* for apache log's escape code. */
59
+ while (*cp1) {
60
+ if (*cp1 == '\\' && *(cp1 + 1) == 'x' &&
61
+ isxdigit(*(cp1 + 2)) && isxdigit(*(cp1 + 3))) {
62
+ *cp2 = from_hex(*(cp1 + 2)) * 16 + from_hex(*(cp1 + 3));
63
+ if ((*cp2 < 32) || (*cp2 == 127))
64
+ *cp2 = '_';
65
+ cp1 += 4;
66
+ cp2++;
67
+ } else if (*cp1 == '\\' && *(cp1 + 1) == '\\') {
68
+ *cp2 = '\\';
69
+ cp1 += 2;
70
+ cp2++;
71
+ } else {
72
+ *cp2++ = *cp1++;
73
+ }
74
+ }
75
+ *cp2 = *cp1;
76
+ cp1 = cp2 = str;
77
+
78
while (*cp1) {
79
if (*cp1 == '%') { /* Found an escape? */
80
cp1++;
81
@@ -1762,16 +1802,139 @@ unescape(char *str)
82
return str; /* return the string */
83
}
84
85
+#ifdef HAVE_ICONV
86
+
87
+/*********************************************/
88
+/* SCORE_XXX - calculate score */
89
+/*********************************************/
90
+
91
+int score_eucj(unsigned char *str)
92
+{
93
+ int stat=0;
94
+ int score=0;
95
+ int bad=0;
96
+ if(str==NULL) return -1;
97
+
98
+ for(; *str!=0;str++){
99
+ switch(stat){
100
+ case 0:
101
+ if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII
102
+ else if(*str >= 0xa1 && *str <= 0xfe) stat=1; //KANJI(1)
103
+ else if(*str == 0x8f); // HOJYO KANJI
104
+ else if(*str == 0x8e) stat=2; // KANA
105
+ else if(*str < 0x20); //CTRL
106
+ else bad=1;
107
+ break;
108
+ case 1:
109
+ if(*str >= 0xa1 && *str <= 0xfe) score += 2; //KANJI(2)
110
+ else bad=1;
111
+ stat=0;
112
+ break;
113
+ case 2:
114
+ if(*str >= 0xa1 && *str <= 0xdf); //hankaku <- 0
115
+ else bad=1;
116
+ stat=0;
117
+ break;
118
+ }
119
+ }
120
+ if(bad != 0) score = -1;
121
+ return score;
122
+}
123
+
124
+int score_sjis(unsigned char *str)
125
+{
126
+ int stat=0;
127
+ int score=0;
128
+ int bad=0;
129
+ if(str==NULL) return -1;
130
+
131
+ for(; *str != 0; str++){
132
+ switch(stat){
133
+ case 0:
134
+ if(*str>= 0x20 && *str <= 0x7e) score++;//ASCII
135
+ else if((*str >= 0x81 && *str <= 0x9f) ||
136
+ (*str >= 0xe0 && *str <= 0xfc)) stat=1; //SJIS(1)
137
+ else if(*str >= 0xa1 && *str <= 0xdf); // KANA
138
+ else if(*str < 0x20); // CTRL
139
+ else bad=1;
140
+ break;
141
+ case 1:
142
+ if((*str >= 0x40 && *str <= 0x7e) ||
143
+ (*str >= 0x80 && *str <= 0xfc)) score += 2; //SJIS(2)
144
+ else bad=1;
145
+ stat=0;
146
+ break;
147
+ }
148
+ }
149
+ if(bad != 0) score = -1;
150
+ return score;
151
+}
152
+
153
+int score_utf8(unsigned char *str)
154
+{
155
+ int stat=0;
156
+ int score=0;
157
+ int bad=0;
158
+ if(str==NULL) return -1;
159
+
160
+ for(; *str != 0; str++){
161
+ switch(stat){
162
+ case 0:
163
+ if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII
164
+ else if(*str >= 0xc0 && *str <= 0xdf) stat=1; //greek etc.
165
+ else if(*str >= 0xe0 && *str <= 0xef) stat=2; //KANJI etc.
166
+ else if(*str >= 0xf0 && *str <= 0xf7) stat=4;
167
+ else if(*str < 0x20); //CTRL
168
+ else bad=1;
169
+ break;
170
+ case 1:
171
+ if(*str >= 0x80 && *str <= 0xbf) score++;
172
+ else bad=1;
173
+ stat=0;
174
+ break;
175
+ case 2:
176
+ if(*str >= 0x80 && *str <= 0xbf) stat=3; //KANJI(2)
177
+ else {bad=1; stat=0;}
178
+ break;
179
+ case 3:
180
+ if(*str >= 0x80 && *str <= 0xbf) score+=3; //KANJI(3)
181
+ else bad=1;
182
+ stat=0;
183
+ break;
184
+ case 4:
185
+ case 5:
186
+ if(*str >= 0x80 && *str <= 0xbf) stat++;
187
+ else {bad=1; stat=0;}
188
+ break;
189
+ case 6:
190
+ if(*str >= 0x80 && *str <= 0xbf) score+=4;
191
+ else bad=1;
192
+ stat=0;
193
+ break;
194
+ }
195
+ }
196
+ if(bad != 0) score = -1;
197
+ return score;
198
+}
199
+
200
+#endif
201
+
202
/*********************************************/
203
/* SRCH_STRING - get search strings from ref */
204
/*********************************************/
205
void
206
srch_string(char *refer, char *ptr)
207
{
208
- char tmpbuf[BUFSIZE];
209
- char srch[80] = "";
210
- char *cp1, *cp2, *cps;
211
+ unsigned char tmpbuf[BUFSIZE];
212
+ unsigned char srch[80] = "";
213
+ unsigned char *cp1, *cp2, *cps;
214
int sp_flg = 0;
215
+#ifdef HAVE_ICONV
216
+ int sjis, eucj, utf8;
217
+ unsigned char tmpbuf2[BUFSIZE];
218
+ unsigned char *cp3;
219
+ size_t inlen, outlen;
220
+#endif
221
222
/* Check if search engine referrer or return */
223
if ((cps = isinlist(search_list, refer)) == NULL)
224
@@ -1832,6 +1995,35 @@ srch_string(char *refer, char *ptr)
225
else
226
break;
227
228
+#ifdef HAVE_ICONV
229
+ utf8 = score_utf8(cp2);
230
+ sjis = score_sjis(cp2);
231
+ eucj = score_eucj(cp2);
232
+ if (sjis > utf8 && sjis > eucj) {
233
+ iconv(cd_from_sjis, NULL, 0, NULL, 0);
234
+ cp3 = cp2;
235
+ inlen = strlen(cp2) + 1;
236
+ cp1 = tmpbuf2;
237
+ outlen = sizeof(tmpbuf2);
238
+ if (iconv(cd_from_sjis,
239
+ (const char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 &&
240
+ inlen == 0) {
241
+ cp2 = tmpbuf2;
242
+ }
243
+ } else if (eucj > utf8 && eucj > sjis) {
244
+ iconv(cd_from_eucj, NULL, 0, NULL, 0);
245
+ cp3 = cp2;
246
+ inlen = strlen(cp2) + 1;
247
+ cp1 = tmpbuf2;
248
+ outlen = sizeof(tmpbuf2);
249
+ if (iconv(cd_from_eucj,
250
+ (const char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 &&
251
+ inlen == 0) {
252
+ cp2 = tmpbuf2;
253
+ }
254
+ }
255
+#endif
256
+
257
/* strip invalid chars */
258
cp1 = cp2;
259
while (*cp1 != '\0') {
260
@@ -2391,6 +2583,7 @@ cleanup_refer(char *refer, char *srchstr
261
262
/* unescape referrer */
263
unescape(refer);
264
+ unescape(refer); /* XXX */
265
266
/* fix referrer field */
267
cp1 = refer;
268
269