Re: Re:請問該如何把big5轉成utf8?
libiconv 有 win32 版本。
有點複雜, 貼一段 sample code 給你參考。
我在 win xp, linux, mac 上測過。
#define OUTBUF_SIZE 32768
char convert_str[OUTBUF_SIZE];
int convert_str_len=0;
// big5 -> utf8
iconv_t cd;
cd = iconv_open ("UTF-8", "BIG5");
process_block(cd, filename, strlen(filename) );
convert_str 存放轉換後的編碼。
這是從 glibc-2.5/iconv/iconv_prog.c 擷取出來的 code.
// ref glibc-2.5/iconv/iconv_prog.c
int process_block (iconv_t cd, ICONV_CONST char *addr, size_t len)
{
//#define OUTBUF_SIZE 32768
const char *start = addr;
char outbuf[OUTBUF_SIZE];
char *outptr;
size_t outlen;
size_t n;
int ret = 0;
//char *outbuf=new char [OUTBUF_SIZE];
convert_str_len=0;
while (len > 0)
{
outptr = outbuf;
outlen = OUTBUF_SIZE;
n = iconv (cd, &addr, &len, &outptr, &outlen);
if (n == (size_t) -1 && omit_invalid && errno == EILSEQ)
{
ret = 1;
if (len == 0)
n = 0;
else
errno = E2BIG;
}
if (outptr != outbuf)
{
/* We have something to write out. */
int errno_save = errno;
convert_str_len=outptr - outbuf;
qDebug("convert_str_len: %d", convert_str_len);
memcpy(convert_str, outbuf, convert_str_len);
convert_str[convert_str_len]=0;
#if 0
if (fwrite (outbuf, 1, outptr - outbuf, output)
< (size_t) (outptr - outbuf)
|| ferror (output))
{
/* Error occurred while printing the result. */
//error (0, 0, _("\ conversion stopped due to problem in writing
the output"));
return -1;
}
#endif
errno = errno_save;
}
if (n != (size_t) -1)
{
/* All the input test is processed. For state-dependent
character sets we have to flush the state now. */
outptr = outbuf;
outlen = OUTBUF_SIZE;
n = iconv (cd, NULL, NULL, &outptr, &outlen);
if (outptr != outbuf)
{
/* We have something to write out. */
int errno_save = errno;
memcpy(convert_str+convert_str_len, outbuf, outptr - outbuf);
convert_str_len+=outptr - outbuf;
qDebug("xxx convert_str_len: %d", convert_str_len);
convert_str[convert_str_len]=0;
#if 0
if (fwrite (outbuf, 1, outptr - outbuf, output)
< (size_t) (outptr - outbuf)
|| ferror (output))
{
/* Error occurred while printing the result. */
//error (0, 0, _("\ conversion stopped due to problem in writing
the output"));
return -1;
}
#endif
errno = errno_save;
}
if (n != (size_t) -1)
break;
if (omit_invalid && errno == EILSEQ)
{
ret = 1;
break;
}
}
if (errno != E2BIG)
{
/* iconv() ran into a problem. */
switch (errno)
{
case EILSEQ:
if (! omit_invalid)
;//error (0, 0, _("illegal input sequence at position %ld"),
(long int) (addr - start));
break;
case EINVAL:
//error (0, 0, _("\ incomplete character or shift sequence at end
of buffer"));
break;
case EBADF:
//error (0, 0, _("internal error (illegal descriptor)"));
break;
default:
//error (0, 0, _("unknown iconv() error %d"), errno);
break;
}
return -1;
}
}
return ret;
}
※ 引述《kornelius.bbs@bbs.cs.nctu.edu.tw (柯娜麗絲)》之銘言:
: 我想請問該如何將big5的中文轉成utf8呢?
: ※ 引述《tester.bbs@bbs.csie.ncu.edu.tw (try or test)》之銘言:
: > 6. 常用字其實很少, 可以用常用字過濾比用詞典簡單. 那個十萬筆幾乎都是常用
: > 字組成的.
: > 7. 漢字怎麼會淪落到靠洋公司來替使用漢字的人解決問題 ?
--
※ 發信站: 批踢踢實業坊(ptt.cc)
◆ From: 218.168.158.135
Programming 近期熱門文章
PTT數位生活區 即時熱門文章