Path: blob/main/japanese/awffull/files/awffull-ja.diff
16135 views
--- src/awffull.c.orig 2008-12-13 11:28:35.000000000 +09001+++ src/awffull.c 2008-12-31 16:43:45.000000000 +09002@@ -37,6 +37,9 @@3/* STANDARD INCLUDES */4/*********************************************/5#include "awffull.h" /* main header */6+#ifdef HAVE_ICONV7+#include <iconv.h>8+#endif910/* internal function prototypes */1112@@ -137,6 +140,10 @@ static char const ab_month_name[][4] = {13"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"14};1516+#ifdef HAVE_ICONV17+iconv_t cd_from_sjis, cd_from_eucj;18+#endif19+20/*********************************************/21/* MAIN - start here */22/*********************************************/23@@ -339,6 +346,11 @@ main(int argc, char *argv[])2425start_time = times(&mytms);2627+#ifdef HAVE_ICONV28+ cd_from_sjis = iconv_open("UTF-8", "Shift_JIS");29+ cd_from_eucj = iconv_open("UTF-8", "EUC-JP");30+#endif31+32/*********************************************33* MAIN PROCESS LOOP - read through log file *34*********************************************/35@@ -801,9 +813,17 @@ main(int argc, char *argv[])36}3738del_htabs();39+#ifdef HAVE_ICONV40+ iconv_close(cd_from_sjis);41+ iconv_close(cd_from_eucj);42+#endif43/* Whew, all done! Exit with completion status (0) */44exit(0);45} else {46+#ifdef HAVE_ICONV47+ iconv_close(cd_from_sjis);48+ iconv_close(cd_from_eucj);49+#endif50/* No valid records found... exit with error (1) */51VPRINT(VERBOSE1, "%s\n", _("No valid records found!"));52exit(1);53@@ -1740,6 +1760,26 @@ unescape(char *str)54if (!str)55return NULL; /* make sure strings valid */5657+ /* for apache log's escape code. */58+ while (*cp1) {59+ if (*cp1 == '\\' && *(cp1 + 1) == 'x' &&60+ isxdigit(*(cp1 + 2)) && isxdigit(*(cp1 + 3))) {61+ *cp2 = from_hex(*(cp1 + 2)) * 16 + from_hex(*(cp1 + 3));62+ if ((*cp2 < 32) || (*cp2 == 127))63+ *cp2 = '_';64+ cp1 += 4;65+ cp2++;66+ } else if (*cp1 == '\\' && *(cp1 + 1) == '\\') {67+ *cp2 = '\\';68+ cp1 += 2;69+ cp2++;70+ } else {71+ *cp2++ = *cp1++;72+ }73+ }74+ *cp2 = *cp1;75+ cp1 = cp2 = str;76+77while (*cp1) {78if (*cp1 == '%') { /* Found an escape? */79cp1++;80@@ -1762,16 +1802,139 @@ unescape(char *str)81return str; /* return the string */82}8384+#ifdef HAVE_ICONV85+86+/*********************************************/87+/* SCORE_XXX - calculate score */88+/*********************************************/89+90+int score_eucj(unsigned char *str)91+{92+ int stat=0;93+ int score=0;94+ int bad=0;95+ if(str==NULL) return -1;96+97+ for(; *str!=0;str++){98+ switch(stat){99+ case 0:100+ if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII101+ else if(*str >= 0xa1 && *str <= 0xfe) stat=1; //KANJI(1)102+ else if(*str == 0x8f); // HOJYO KANJI103+ else if(*str == 0x8e) stat=2; // KANA104+ else if(*str < 0x20); //CTRL105+ else bad=1;106+ break;107+ case 1:108+ if(*str >= 0xa1 && *str <= 0xfe) score += 2; //KANJI(2)109+ else bad=1;110+ stat=0;111+ break;112+ case 2:113+ if(*str >= 0xa1 && *str <= 0xdf); //hankaku <- 0114+ else bad=1;115+ stat=0;116+ break;117+ }118+ }119+ if(bad != 0) score = -1;120+ return score;121+}122+123+int score_sjis(unsigned char *str)124+{125+ int stat=0;126+ int score=0;127+ int bad=0;128+ if(str==NULL) return -1;129+130+ for(; *str != 0; str++){131+ switch(stat){132+ case 0:133+ if(*str>= 0x20 && *str <= 0x7e) score++;//ASCII134+ else if((*str >= 0x81 && *str <= 0x9f) ||135+ (*str >= 0xe0 && *str <= 0xfc)) stat=1; //SJIS(1)136+ else if(*str >= 0xa1 && *str <= 0xdf); // KANA137+ else if(*str < 0x20); // CTRL138+ else bad=1;139+ break;140+ case 1:141+ if((*str >= 0x40 && *str <= 0x7e) ||142+ (*str >= 0x80 && *str <= 0xfc)) score += 2; //SJIS(2)143+ else bad=1;144+ stat=0;145+ break;146+ }147+ }148+ if(bad != 0) score = -1;149+ return score;150+}151+152+int score_utf8(unsigned char *str)153+{154+ int stat=0;155+ int score=0;156+ int bad=0;157+ if(str==NULL) return -1;158+159+ for(; *str != 0; str++){160+ switch(stat){161+ case 0:162+ if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII163+ else if(*str >= 0xc0 && *str <= 0xdf) stat=1; //greek etc.164+ else if(*str >= 0xe0 && *str <= 0xef) stat=2; //KANJI etc.165+ else if(*str >= 0xf0 && *str <= 0xf7) stat=4;166+ else if(*str < 0x20); //CTRL167+ else bad=1;168+ break;169+ case 1:170+ if(*str >= 0x80 && *str <= 0xbf) score++;171+ else bad=1;172+ stat=0;173+ break;174+ case 2:175+ if(*str >= 0x80 && *str <= 0xbf) stat=3; //KANJI(2)176+ else {bad=1; stat=0;}177+ break;178+ case 3:179+ if(*str >= 0x80 && *str <= 0xbf) score+=3; //KANJI(3)180+ else bad=1;181+ stat=0;182+ break;183+ case 4:184+ case 5:185+ if(*str >= 0x80 && *str <= 0xbf) stat++;186+ else {bad=1; stat=0;}187+ break;188+ case 6:189+ if(*str >= 0x80 && *str <= 0xbf) score+=4;190+ else bad=1;191+ stat=0;192+ break;193+ }194+ }195+ if(bad != 0) score = -1;196+ return score;197+}198+199+#endif200+201/*********************************************/202/* SRCH_STRING - get search strings from ref */203/*********************************************/204void205srch_string(char *refer, char *ptr)206{207- char tmpbuf[BUFSIZE];208- char srch[80] = "";209- char *cp1, *cp2, *cps;210+ unsigned char tmpbuf[BUFSIZE];211+ unsigned char srch[80] = "";212+ unsigned char *cp1, *cp2, *cps;213int sp_flg = 0;214+#ifdef HAVE_ICONV215+ int sjis, eucj, utf8;216+ unsigned char tmpbuf2[BUFSIZE];217+ unsigned char *cp3;218+ size_t inlen, outlen;219+#endif220221/* Check if search engine referrer or return */222if ((cps = isinlist(search_list, refer)) == NULL)223@@ -1832,6 +1995,35 @@ srch_string(char *refer, char *ptr)224else225break;226227+#ifdef HAVE_ICONV228+ utf8 = score_utf8(cp2);229+ sjis = score_sjis(cp2);230+ eucj = score_eucj(cp2);231+ if (sjis > utf8 && sjis > eucj) {232+ iconv(cd_from_sjis, NULL, 0, NULL, 0);233+ cp3 = cp2;234+ inlen = strlen(cp2) + 1;235+ cp1 = tmpbuf2;236+ outlen = sizeof(tmpbuf2);237+ if (iconv(cd_from_sjis,238+ (const char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 &&239+ inlen == 0) {240+ cp2 = tmpbuf2;241+ }242+ } else if (eucj > utf8 && eucj > sjis) {243+ iconv(cd_from_eucj, NULL, 0, NULL, 0);244+ cp3 = cp2;245+ inlen = strlen(cp2) + 1;246+ cp1 = tmpbuf2;247+ outlen = sizeof(tmpbuf2);248+ if (iconv(cd_from_eucj,249+ (const char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 &&250+ inlen == 0) {251+ cp2 = tmpbuf2;252+ }253+ }254+#endif255+256/* strip invalid chars */257cp1 = cp2;258while (*cp1 != '\0') {259@@ -2391,6 +2583,7 @@ cleanup_refer(char *refer, char *srchstr260261/* unescape referrer */262unescape(refer);263+ unescape(refer); /* XXX */264265/* fix referrer field */266cp1 = refer;267268269