Re: Re:請問該如何把big5轉成utf8?

看板Programming作者 (爽過頭)時間18年前 (2007/01/05 14:57), 編輯推噓0(000)
留言0則, 0人參與, 最新討論串1/1
libiconv 有 win32 版本。 有點複雜, 貼一段 sample code 給你參考。 我在 win xp, linux, mac 上測過。 #define OUTBUF_SIZE 32768 char convert_str[OUTBUF_SIZE]; int convert_str_len=0; // big5 -> utf8 iconv_t cd; cd = iconv_open ("UTF-8", "BIG5"); process_block(cd, filename, strlen(filename) ); convert_str 存放轉換後的編碼。 這是從 glibc-2.5/iconv/iconv_prog.c 擷取出來的 code. // ref glibc-2.5/iconv/iconv_prog.c int process_block (iconv_t cd, ICONV_CONST char *addr, size_t len) { //#define OUTBUF_SIZE 32768 const char *start = addr; char outbuf[OUTBUF_SIZE]; char *outptr; size_t outlen; size_t n; int ret = 0; //char *outbuf=new char [OUTBUF_SIZE]; convert_str_len=0; while (len > 0) { outptr = outbuf; outlen = OUTBUF_SIZE; n = iconv (cd, &addr, &len, &outptr, &outlen); if (n == (size_t) -1 && omit_invalid && errno == EILSEQ) { ret = 1; if (len == 0) n = 0; else errno = E2BIG; } if (outptr != outbuf) { /* We have something to write out. */ int errno_save = errno; convert_str_len=outptr - outbuf; qDebug("convert_str_len: %d", convert_str_len); memcpy(convert_str, outbuf, convert_str_len); convert_str[convert_str_len]=0; #if 0 if (fwrite (outbuf, 1, outptr - outbuf, output) < (size_t) (outptr - outbuf) || ferror (output)) { /* Error occurred while printing the result. */ //error (0, 0, _("\ conversion stopped due to problem in writing the output")); return -1; } #endif errno = errno_save; } if (n != (size_t) -1) { /* All the input test is processed. For state-dependent character sets we have to flush the state now. */ outptr = outbuf; outlen = OUTBUF_SIZE; n = iconv (cd, NULL, NULL, &outptr, &outlen); if (outptr != outbuf) { /* We have something to write out. */ int errno_save = errno; memcpy(convert_str+convert_str_len, outbuf, outptr - outbuf); convert_str_len+=outptr - outbuf; qDebug("xxx convert_str_len: %d", convert_str_len); convert_str[convert_str_len]=0; #if 0 if (fwrite (outbuf, 1, outptr - outbuf, output) < (size_t) (outptr - outbuf) || ferror (output)) { /* Error occurred while printing the result. */ //error (0, 0, _("\ conversion stopped due to problem in writing the output")); return -1; } #endif errno = errno_save; } if (n != (size_t) -1) break; if (omit_invalid && errno == EILSEQ) { ret = 1; break; } } if (errno != E2BIG) { /* iconv() ran into a problem. */ switch (errno) { case EILSEQ: if (! omit_invalid) ;//error (0, 0, _("illegal input sequence at position %ld"), (long int) (addr - start)); break; case EINVAL: //error (0, 0, _("\ incomplete character or shift sequence at end of buffer")); break; case EBADF: //error (0, 0, _("internal error (illegal descriptor)")); break; default: //error (0, 0, _("unknown iconv() error %d"), errno); break; } return -1; } } return ret; } ※ 引述《kornelius.bbs@bbs.cs.nctu.edu.tw (柯娜麗絲)》之銘言: : 我想請問該如何將big5的中文轉成utf8呢? : ※ 引述《tester.bbs@bbs.csie.ncu.edu.tw (try or test)》之銘言: : > 6. 常用字其實很少, 可以用常用字過濾比用詞典簡單. 那個十萬筆幾乎都是常用 : > 字組成的. : > 7. 漢字怎麼會淪落到靠洋公司來替使用漢字的人解決問題 ? -- ※ 發信站: 批踢踢實業坊(ptt.cc) ◆ From: 218.168.158.135
文章代碼(AID): #15dVRctX (Programming)
文章代碼(AID): #15dVRctX (Programming)