--- bashintl.h | 3 +++ builtins/printf.def | 21 +++++++++++++++++++++ builtins/read.def | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++- lib/sh/strtrans.c | 21 +++++++++++++++++++++ locale.c | 26 ++++++++++++++++++++++++++ parse.y | 42 +++++++++++++++++++++++++++++++++++++++++- 6 files changed, 163 insertions(+), 2 deletions(-) --- bashintl.h +++ bashintl.h 2018-11-29 08:14:56.165762022 +0000 @@ -43,6 +43,9 @@ # undef HAVE_SETLOCALE #endif +int bash_mbcs_non_utf8; +int bash_mb_cur_max; + #if !defined (HAVE_SETLOCALE) # define setlocale(cat, loc) #endif --- builtins/printf.def +++ builtins/printf.def 2018-11-29 08:18:21.445909015 +0000 @@ -247,6 +247,8 @@ printf_builtin (list) #if defined (HANDLE_MULTIBYTE) char mbch[25]; /* 25 > MB_LEN_MAX, plus can handle 4-byte UTF-8 and large Unicode characters*/ int mbind, mblen; + size_t charlen; + mbstate_t mbs; #endif #if defined (ARRAY_VARS) int arrayflags; @@ -334,6 +336,25 @@ printf_builtin (list) precision = fieldwidth = 0; have_fieldwidth = have_precision = 0; +#if defined (HANDLE_MULTIBYTE) + if (bash_mbcs_non_utf8) + { + memset (&mbs, 0, sizeof (mbs)); + charlen = mbrlen (fmt, bash_mb_cur_max, &mbs); + if (charlen > 1 && charlen != (size_t)-2 && charlen != (size_t)-1) + { + while (charlen > 0) + { + PC (*fmt); + fmt++; + charlen--; + } + fmt--; + continue; + } + } +#endif + if (*fmt == '\\') { fmt++; --- builtins/read.def +++ builtins/read.def 2018-11-29 08:14:56.165762022 +0000 @@ -188,6 +188,12 @@ read_builtin (list) char c; char *input_string, *orig_input_string, *ifs_chars, *prompt, *arrayname; char *e, *t, *t1, *ps2, *tofree; +#if defined (HANDLE_MULTIBYTE) + static char mbch[25]; + size_t charlen; + int mblen; + mbstate_t mbs; +#endif struct stat tsb; SHELL_VAR *var; TTYSTRUCT ttattrs, ttset; @@ -548,6 +554,7 @@ read_builtin (list) #endif ps2 = 0; + charlen = 0; for (print_ps2 = eof = retval = 0;;) { CHECK_ALRM; @@ -589,14 +596,40 @@ read_builtin (list) reading = 1; CHECK_ALRM; +#if defined (HANDLE_MULTIBYTE) + charlen = 0; + mblen = 0; + if (bash_mbcs_non_utf8) + { + do + { + if (unbuffered_read) + retval = posixly_correct ? zreadintr (fd, &c, 1) : zread (fd, &c, 1); + else + retval = posixly_correct ? zreadcintr (fd, &c) : zreadc (fd, &c); + + if (retval <= 0) + break; + + mbch[mblen++] = c; + memset (&mbs, 0, sizeof (mbs)); + charlen = mbrlen (mbch, mblen, &mbs); + } + while (charlen == (size_t)-2 && mblen < bash_mb_cur_max); + } + else + { +#endif if (unbuffered_read == 2) retval = posixly_correct ? zreadintr (fd, &c, 1) : zreadn (fd, &c, nchars - nr); else if (unbuffered_read) retval = posixly_correct ? zreadintr (fd, &c, 1) : zread (fd, &c, 1); else retval = posixly_correct ? zreadcintr (fd, &c) : zreadc (fd, &c); +#if defined (HANDLE_MULTIBYTE) + } +#endif reading = 0; - if (retval <= 0) { if (retval < 0 && errno == EINTR) @@ -639,6 +672,11 @@ read_builtin (list) } } +#if defined (HANDLE_MULTIBYTE) + if (bash_mbcs_non_utf8 && charlen > 1) + goto add_char; +#endif + /* If the next character is to be accepted verbatim, a backslash newline pair still disappears from the input. */ if (pass_next) @@ -681,7 +719,19 @@ read_builtin (list) } add_char: +#if defined (HANDLE_MULTIBYTE) + if (bash_mbcs_non_utf8 && charlen > 1) + { + for (mblen = 0; mblen < charlen; mblen++) + input_string[i++] = mbch[mblen]; + } + else + { +#endif input_string[i++] = c; +#if defined (HANDLE_MULTIBYTE) + } +#endif CHECK_ALRM; #if defined (HANDLE_MULTIBYTE) --- lib/sh/strtrans.c +++ lib/sh/strtrans.c 2018-11-29 08:14:56.165762022 +0000 @@ -29,6 +29,7 @@ #include #include "shell.h" +#include "bashintl.h" #include "shmbchar.h" #include "shmbutil.h" @@ -55,6 +56,10 @@ ansicstr (string, len, flags, sawc, rlen int c, temp; char *ret, *r, *s; unsigned long v; +#if defined (HANDLE_MULTIBYTE) + size_t charlen; + mbstate_t mbs; +#endif if (string == 0 || *string == '\0') return ((char *)NULL); @@ -69,6 +74,22 @@ ansicstr (string, len, flags, sawc, rlen #endif for (r = ret, s = string; s && *s; ) { +#if defined (HANDLE_MULTIBYTE) + if (bash_mbcs_non_utf8) + { + memset (&mbs, 0, sizeof (mbs)); + charlen = mbrlen (s, bash_mb_cur_max, &mbs); + if (charlen > 1 && charlen != (size_t)-2 && charlen != (size_t)-1) + { + while (charlen > 0) + { + *r++ = *s++; + charlen--; + } + continue; + } + } +#endif c = *s++; if (c != '\\' || *s == '\0') *r++ = c; --- locale.c +++ locale.c 2018-11-29 08:14:56.165762022 +0000 @@ -52,6 +52,7 @@ int locale_mb_cur_max; /* value of MB_CU int locale_shiftstates; extern int dump_translatable_strings, dump_po_strings; +extern int bash_mb_cur_max, bash_mbcs_non_utf8; /* The current locale when the program begins */ static char *default_locale; @@ -76,6 +77,26 @@ static int reset_locale_vars __P((void)) static void locale_setblanks __P((void)); static int locale_isutf8 __P((char *)); +static void +set_mbcs_values () +{ + /* Enhance the performance. */ +#if defined (HANDLE_MULTIBYTE) + bash_mb_cur_max = MB_CUR_MAX; +# if defined (HAVE_LANGINFO_CODESET) + if (strcasestr (nl_langinfo (CODESET), "JIS") != 0 && bash_mb_cur_max == 2) + bash_mbcs_non_utf8 = 1; + else + bash_mbcs_non_utf8 = 0; +# else + bash_mbcs_non_utf8 = 0; +# endif +#else + bash_mb_cur_max = 1; + bash_mbcs_non_utf8 = 0; +#endif +} + /* Set the value of default_locale and make the current locale the system default locale. This should be called very early in main(). */ void @@ -94,6 +115,8 @@ set_default_locale () default_locale = setlocale (LC_ALL, ""); if (default_locale) default_locale = savestring (default_locale); + + set_mbcs_values (); #endif /* HAVE_SETLOCALE */ bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); @@ -150,6 +173,7 @@ set_default_locale_vars () setlocale (LC_TIME, lc_all); # endif /* LC_TIME */ + set_mbcs_values (); #endif /* HAVE_SETLOCALE */ val = get_string_value ("TEXTDOMAIN"); @@ -240,6 +264,7 @@ set_locale_var (var, value) if (lc_all == 0 || *lc_all == '\0') { x = setlocale (LC_CTYPE, get_locale_var ("LC_CTYPE")); + set_mbcs_values (); locale_setblanks (); locale_mb_cur_max = MB_CUR_MAX; /* if setlocale() returns NULL, the locale is not changed */ @@ -388,6 +413,7 @@ reset_locale_vars () t = setlocale (LC_TIME, get_locale_var ("LC_TIME")); # endif + set_mbcs_values (); locale_setblanks (); locale_mb_cur_max = MB_CUR_MAX; if (x) --- parse.y +++ parse.y 2018-11-29 08:14:56.165762022 +0000 @@ -2005,7 +2005,12 @@ read_a_line (remove_quoted_newline) { static char *line_buffer = (char *)NULL; static int buffer_size = 0; - int indx, c, peekc, pass_next; + int indx, c, peekc, pass_next, chari; +#if defined (HANDLE_MULTIBYTE) + size_t charlen; + mbstate_t mbs; + static char char_buffer[10]; +#endif #if defined (READLINE) if (no_line_editing && SHOULD_PROMPT ()) @@ -2020,7 +2025,42 @@ read_a_line (remove_quoted_newline) /* Allow immediate exit if interrupted during input. */ QUIT; +#if defined (HANDLE_MULTIBYTE) + if (!bash_mbcs_non_utf8) + c = yy_getc (); + else + { + chari = 0; + charlen = 0; + while (chari < 10) + { + char_buffer[chari++] = c = yy_getc (); + + if (c == EOF) + break; + + memset (&mbs, 0, sizeof (mbs)); + charlen = mbrlen (char_buffer, chari, &mbs); + + if ((size_t)charlen != (size_t)-2 && (size_t)charlen != (size_t)-1) + break; + } + + if (charlen != 1) + { + charlen = chari; + for (chari = 0; chari < charlen && char_buffer[chari] != EOF; chari++) + { + RESIZE_MALLOCED_BUFFER (line_buffer, indx, 2, buffer_size, 128); + line_buffer[indx++] = char_buffer[chari]; + } + if (chari == charlen) + continue; + } + } +#else c = yy_getc (); +#endif /* Ignore null bytes in input. */ if (c == 0)