Bug 523298 - win_iconv can't convert from UTF-8 to GB18030 (or vice versa)

2008-03-19  Tor Lillqvist  <tml@novell.com>

	Bug 523298 - win_iconv can't convert from UTF-8 to GB18030 (or vice versa)

	* glib/win_iconv.c: Fixes for code page 54936 (GB18030)
	(mbtowc_flags): New function. Check if a code page is one of those
	for which the dwFlags parameter to MultiByteToWideChar() must be
	zero. Return 0 or MB_ERR_INVALID_CHARS.
	(mbcs_mblen): New function for multi-byte (more than two bytes for
	some characters) code pages. Only handles 54936 for now.
	(make_csconv): Use it for 54936.
	(kernel_mbtowc): Use mbtowc_flags().


svn path=/trunk/; revision=6741
This commit is contained in:
Tor Lillqvist 2008-03-20 02:40:23 +00:00 committed by Tor Lillqvist
parent 43db4fb1dd
commit 908d3ef0d4
2 changed files with 71 additions and 4 deletions

View File

@ -1,3 +1,16 @@
2008-03-19 Tor Lillqvist <tml@novell.com>
Bug 523298 - win_iconv can't convert from UTF-8 to GB18030 (or vice versa)
* glib/win_iconv.c: Fixes for code page 54936 (GB18030)
(mbtowc_flags): New function. Check if a code page is one of those
for which the dwFlags parameter to MultiByteToWideChar() must be
zero. Return 0 or MB_ERR_INVALID_CHARS.
(mbcs_mblen): New function for multi-byte (more than two bytes for
some characters) code pages. Only handles 54936 for now.
(make_csconv): Use it for 54936.
(kernel_mbtowc): Use mbtowc_flags().
2008-03-18 Sebastian Dröge <slomo@circular-chaos.org>
Bug 522292 - Gives warnings in glib/gutils.h with GCC in C99 mode

View File

@ -137,6 +137,7 @@ static int name_to_codepage(const char *name);
static uint utf16_to_ucs4(const ushort *wbuf);
static void ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize);
static int is_unicode(int codepage);
static int mbtowc_flags(int codepage);
static int must_use_null_useddefaultchar(int codepage);
static void check_utf_bom(rec_iconv_t *cd, ushort *wbuf, int *wbufsize);
static char *strrstr(const char *str, const char *token);
@ -152,6 +153,7 @@ static HMODULE hlastdll; /* keep dll loaded for efficiency (unnecessary?) */
static int sbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
static int dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
static int mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
static int utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize);
static int eucjp_mblen(csconv_t *cv, const uchar *buf, int bufsize);
@ -925,15 +927,16 @@ make_csconv(const char *_name)
cv.mblen = eucjp_mblen;
}
else if (IsValidCodePage(cv.codepage)
&& GetCPInfoEx(cv.codepage, 0, &cpinfoex) != 0
&& (cpinfoex.MaxCharSize == 1 || cpinfoex.MaxCharSize == 2))
&& GetCPInfoEx(cv.codepage, 0, &cpinfoex) != 0)
{
cv.mbtowc = kernel_mbtowc;
cv.wctomb = kernel_wctomb;
if (cpinfoex.MaxCharSize == 1)
cv.mblen = sbcs_mblen;
else
else if (cpinfoex.MaxCharSize == 2)
cv.mblen = dbcs_mblen;
else
cv.mblen = mbcs_mblen;
}
else
{
@ -1013,6 +1016,35 @@ is_unicode(int codepage)
codepage == 65000 || codepage == 65001);
}
/*
* Check if codepage is one of those for which the dwFlags parameter
* to MultiByteToWideChar() must be zero. Return zero or
* MB_ERR_INVALID_CHARS. The docs in Platform SDK for for Windows
* Server 2003 R2 claims that also codepage 65001 is one of these, but
* that doesn't seem to be the case. The MSDN docs for MSVS2008 leave
* out 65001 (UTF-8), and that indeed seems to be the case on XP, it
* works fine to pass MB_ERR_INVALID_CHARS in dwFlags when converting
* from UTF-8.
*/
static int
mbtowc_flags(int codepage)
{
return (codepage == 50220 || codepage == 50221 ||
codepage == 50222 || codepage == 50225 ||
codepage == 50227 || codepage == 50229 ||
codepage == 52936 || codepage == 54936 ||
(codepage >= 57002 && codepage <= 57011) ||
codepage == 65000 || codepage == 42) ? 0 : MB_ERR_INVALID_CHARS;
}
/*
* Check if codepage is one those for which the lpUsedDefaultChar
* parameter to WideCharToMultiByte() must be NULL. The docs in
* Platform SDK for for Windows Server 2003 R2 claims that this is the
* list below, while the MSDN docs for MSVS2008 claim that it is only
* for 65000 (UTF-7) and 65001 (UTF-8). This time the earlier Platform
* SDK seems to be correct, at least for XP.
*/
static int
must_use_null_useddefaultchar(int codepage)
{
@ -1220,6 +1252,28 @@ dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize)
return len;
}
static int
mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize)
{
int len = 0;
if (cv->codepage == 54936) {
if (buf[0] <= 0x7F) len = 1;
else if (buf[0] >= 0x81 && buf[0] <= 0xFE &&
bufsize >= 2 &&
((buf[1] >= 0x40 && buf[1] <= 0x7E) ||
(buf[1] >= 0x80 && buf[1] <= 0xFE))) len = 2;
else if (buf[0] >= 0x81 && buf[0] <= 0xFE &&
bufsize >= 4 &&
buf[1] >= 0x30 && buf[1] <= 0x39) len = 4;
else
return_error(EINVAL);
return len;
}
else
return_error(EINVAL);
}
static int
utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize)
{
@ -1280,7 +1334,7 @@ kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wb
len = cv->mblen(cv, buf, bufsize);
if (len == -1)
return -1;
*wbufsize = MultiByteToWideChar(cv->codepage, MB_ERR_INVALID_CHARS,
*wbufsize = MultiByteToWideChar(cv->codepage, mbtowc_flags (cv->codepage),
(const char *)buf, len, (wchar_t *)wbuf, *wbufsize);
if (*wbufsize == 0)
return_error(EILSEQ);