--- generic/tclCmdMZ.c.orig +++ generic/tclCmdMZ.c @@ -2629,7 +2629,7 @@ StringEqualCmd( */ objv += objc-2; - match = TclStringCmp(objv[0], objv[1], 0, nocase, reqlength); + match = TclStringCmp(objv[0], objv[1], 1, nocase, reqlength); Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1)); return TCL_OK; } @@ -2702,8 +2702,8 @@ TclStringCmp( Tcl_Obj *value2Ptr, int checkEq, /* comparison is only for equality */ int nocase, /* comparison is not case sensitive */ - int reqlength) /* requested length; -1 to compare whole - * strings */ + int reqlength) /* requested length in characters; -1 to + * compare whole strings */ { const char *s1, *s2; int empty, length, match, s1len, s2len; @@ -2731,10 +2731,10 @@ TclStringCmp( } else if ((value1Ptr->typePtr == &tclStringType) && (value2Ptr->typePtr == &tclStringType)) { /* - * Do a unicode-specific comparison if both of the args are of String + * Do a Unicode-specific comparison if both of the args are of String * type. If the char length == byte length, we can do a memcmp. In * benchmark testing this proved the most efficient check between the - * unicode and string comparison operations. + * Unicode and string comparison operations. */ if (nocase) { @@ -2748,6 +2748,9 @@ TclStringCmp( && (value1Ptr->bytes != NULL) && (s2len == value2Ptr->length) && (value2Ptr->bytes != NULL)) { + /* each byte represents one character so s1l3n, s2l3n, and + * reqlength are in both bytes and characters + */ s1 = value1Ptr->bytes; s2 = value2Ptr->bytes; memCmpFn = memcmp; @@ -2756,14 +2759,17 @@ TclStringCmp( s2 = (char *) Tcl_GetUnicode(value2Ptr); if ( #if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX != 4) - 1 + 1 #else - checkEq + checkEq #endif /* WORDS_BIGENDIAN */ - ) { + ) { memCmpFn = memcmp; s1len *= sizeof(Tcl_UniChar); s2len *= sizeof(Tcl_UniChar); + if (reqlength > 0) { + reqlength *= sizeof(Tcl_UniChar); + } } else { memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp; } @@ -2805,7 +2811,7 @@ TclStringCmp( s2 = TclGetStringFromObj(value2Ptr, &s2len); } - if (!nocase && checkEq) { + if (!nocase && checkEq && reqlength < 0) { /* * When we have equal-length we can check only for (in)equality. * We can use memcmp() in all (n)eq cases because we don't need to @@ -2826,24 +2832,28 @@ TclStringCmp( s1len = Tcl_NumUtfChars(s1, s1len); s2len = Tcl_NumUtfChars(s2, s2len); memCmpFn = (memCmpFn_t) - (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); + (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); } } } + /* At this point s1len, s2len, and reqlength should by now have been + * adjusted so that they are all in the units expected by the selected + * comparison function. + */ + length = (s1len < s2len) ? s1len : s2len; if (reqlength > 0 && reqlength < length) { length = reqlength; } else if (reqlength < 0) { /* - * The requested length is negative, so we ignore it by setting it to - * length + 1 so we correct the match var. + * The requested length is negative, so ignore it by setting it to + * length + 1 to correct the match var. */ - reqlength = length + 1; } - if (checkEq && (s1len != s2len)) { + if (checkEq && reqlength < 0 && (s1len != s2len)) { match = 1; /* This will be reversed below. */ } else { /* --- tests/stringComp.test.orig +++ tests/stringComp.test @@ -100,7 +100,7 @@ foreach {tname tbody tresult tcode} { {unicode} {string compare \334 \u00fc} -1 {} {unicode} {string compare \334\334\334\374\374 \334\334\334\334\334} 1 {} {high bit} { - # This test will fail if the underlying comparison + # This test fails if the underlying comparison # is using signed chars instead of unsigned chars. # (like SunOS's default memcmp thus the compat/memcmp.c) string compare "\x80" "@" @@ -156,10 +156,10 @@ foreach {tname tbody tresult tcode} { {-nocase null strings} { string compare -nocase foo "" } 1 {} - {with length, unequal strings} { + {with length, unequal strings, partial first string} { string compare -length 2 abc abde } 0 {} - {with length, unequal strings} { + {with length, unequal strings 2, full first string} { string compare -length 2 ab abde } 0 {} {with NUL character vs. other ASCII} {