2 * Copyright (c) 2008 Secure Endpoints Inc.
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37 /* This is part of the Microsoft Internationalized Domain Name
39 #include <normalization.h>
41 /* TODO: All the normalization and conversion code should NUL
42 terminate destination strings. */
45 (WINAPI *pNormalizeString)( __in NORM_FORM NormForm,
46 __in_ecount(cwSrcLength) LPCWSTR lpSrcString,
48 __out_ecount(cwDstLength) LPWSTR lpDstString,
49 __in int cwDstLength ) = NULL;
52 (WINAPI *pIsNormalizedString)( __in NORM_FORM NormForm,
53 __in_ecount(cwLength) LPCWSTR lpString,
54 __in int cwLength ) = NULL;
57 #define NLSDLLNAME "Normaliz.dll"
58 #define NLSMAXCCH 1024
61 #define AFS_NORM_FORM NormalizationC
63 long cm_InitNormalization(void)
67 if (pNormalizeString != NULL)
70 h_Nls = LoadLibrary(NLSDLLNAME);
71 if (h_Nls == INVALID_HANDLE_VALUE) {
76 (int (WINAPI *)( NORM_FORM, LPCWSTR,
78 GetProcAddress(h_Nls, "NormalizeString");
82 (WINAPI *)( NORM_FORM, LPCWSTR, int ))
83 GetProcAddress(h_Nls, "IsNormalizedString");
85 return (pNormalizeString && pIsNormalizedString);
88 /* \brief Normalize a UTF-16 string.
90 If the supplied destination buffer is insufficient or NULL, then a
91 new buffer will be allocated to hold the normalized string.
93 \param[in] src : Source UTF-16 string. Length is specified in
96 \param[in] cch_src : The character count in cch_src is assumed to
97 be tight and include the terminating NULL character if there is
98 one. If the NULL is absent, the resulting string will not be
101 \param[out] ext_dest : The destination buffer. Can be NULL, in
102 which case *pcch_dest MUST be 0.
104 \param[in,out] pcch_dest : On entry *pcch_dest contains a count of
105 characters in the destination buffer. On exit, it will contain
106 a count of characters that were copied to the destination
109 Returns a pointer to the buffer containing the normalized string or
110 NULL if the call was unsuccessful. If the returned destination
111 buffer is different from the supplied buffer and non-NULL, it
112 should be freed using free().
115 NormalizeUtf16String(const wchar_t * src, int cch_src, wchar_t * ext_dest, int *pcch_dest)
118 assert (pNormalizeString != NULL && pIsNormalizedString != NULL);
122 cch_src = wcslen(src) + 1;
124 if ((pIsNormalizedString && (*pIsNormalizedString)(AFS_NORM_FORM, src, cch_src)) ||
125 (!pNormalizeString)) {
127 if (ext_dest == NULL || *pcch_dest < cch_src) {
128 ext_dest = malloc(cch_src * sizeof(wchar_t));
129 *pcch_dest = cch_src;
132 /* No need to or unable to normalize. Just copy the string.
133 Note that the string is not NUL terminated if the source
134 string is not NUL terminated. */
137 memcpy(ext_dest, src, cch_src * sizeof(wchar_t));
138 *pcch_dest = cch_src;
150 int cch_dest = *pcch_dest;
154 while (tries-- > 0) {
156 rv = (*pNormalizeString)(AFS_NORM_FORM, src, cch_src, dest, cch_dest);
158 if (rv <= 0 && (gle = GetLastError()) != ERROR_SUCCESS) {
159 if (gle == ERROR_INSUFFICIENT_BUFFER) {
161 /* The buffer wasn't big enough. We are going to
162 try allocating one. */
164 cch_dest = (-rv) + NLSERRCCH;
168 /* Something else is wrong */
172 } else if (rv < 0) { /* rv < 0 && gle == ERROR_SUCCESS */
174 /* Technically not one of the expected outcomes */
177 } else { /* rv > 0 || (rv == 0 && gle == ERROR_SUCCESS) */
179 /* Possibly succeeded */
181 if (rv == 0) { /* Succeeded and the return string is empty */
187 /* Nope. We only calculated the required size of the buffer */
189 cch_dest = rv + NLSERRCCH;
197 /* Can't NUL terminate */
198 cch_dest = max(rv,cch_dest) + NLSERRCCH;
207 if (dest != ext_dest && dest)
209 dest = malloc(cch_dest * sizeof(wchar_t));
214 if (dest != ext_dest && dest)
222 /*! \brief Normalize a Unicode string into a newly allocated buffer
224 The input string will be normalized using NFC.
226 \param[in] s UTF-16 string to be normalized.
228 \param[in] cch_src The number of characters in the input string. If
229 this is -1, then the input string is assumed to be NUL
232 \param[out] pcch_dest Receives the number of characters copied to
233 the output buffer. Note that the character count is the number
234 of wchar_t characters copied, and not the count of Unicode code
235 points. This includes the terminating NUL if cch_src was -1 or
236 included the terminating NUL.
238 \return A newly allocated buffer holding the normalized string or
239 NULL if the call failed.
241 cm_normchar_t * cm_NormalizeStringAlloc(const cm_unichar_t * s, int cch_src, int *pcch_dest)
246 r = NormalizeUtf16String(s, cch_src, NULL, &cch_dest);
249 *pcch_dest = cch_dest;
254 int cm_NormalizeString(const cm_unichar_t * s, int cch_src,
255 cm_normchar_t * dest, int cch_dest)
260 r = NormalizeUtf16String(s, cch_src, dest, &tcch);
263 /* The supplied buffer was insufficient */
271 /*! \brief Convert a UTF-16 string to a UTF-8 string using a newly allocated buffer
273 \param[in] s UTF-16 source string
275 \param[in] cch_src Number of characters in \a s. This can be set to
276 -1 if \a s is NUL terminated.
278 \param[out] pcch_dest Receives a count of characters that were
279 copied to the target buffer.
281 \return A newly allocated buffer holding the UTF-8 string.
284 cm_utf8char_t * cm_Utf16ToUtf8Alloc(const cm_unichar_t * s, int cch_src, int *pcch_dest)
287 cm_utf8char_t * dest;
289 cch_dest = WideCharToMultiByte(CP_UTF8, 0, s, cch_src, NULL, 0, NULL, FALSE);
293 *pcch_dest = cch_dest;
297 dest = malloc((cch_dest + 1) * sizeof(cm_utf8char_t));
299 WideCharToMultiByte(CP_UTF8, 0, s, cch_src, dest, cch_dest, NULL, FALSE);
303 *pcch_dest = cch_dest;
308 int cm_Utf16ToUtf8(const cm_unichar_t * src, int cch_src,
309 cm_utf8char_t * dest, int cch_dest)
311 return WideCharToMultiByte(CP_UTF8, 0, src, cch_src, dest, cch_dest, NULL, FALSE);
314 int cm_Utf16ToUtf16(const cm_unichar_t * src, int cch_src,
315 cm_unichar_t * dest, int cch_dest)
318 StringCchCopyW(dest, cch_dest, src);
319 return wcslen(dest) + 1;
321 int cch_conv = min(cch_src, cch_dest);
322 memcpy(dest, src, cch_conv * sizeof(cm_unichar_t));
327 /* \brief Normalize a UTF-16 string into a UTF-8 string.
329 \param[in] src : Source string.
331 \param[in] cch_src : Count of characters in src. If the count includes the
332 NULL terminator, then the resulting string will be NULL
333 terminated. If it is -1, then src is assumed to be NULL
336 \param[out] adest : Destination buffer.
338 \param[in] cch_adest : Number of characters in the destination buffer.
340 Returns the number of characters stored into cch_adest. This will
341 include the terminating NULL if cch_src included the terminating
342 NULL or was -1. If this is 0, then the operation was unsuccessful.
344 long cm_NormalizeUtf16StringToUtf8(const wchar_t * src, int cch_src,
345 char * adest, int cch_adest)
350 if (FAILED(StringCchLengthW(src, NLSMAXCCH, &cch)))
357 wchar_t nbuf[NLSMAXCCH];
358 wchar_t * normalized;
359 int cch_norm = NLSMAXCCH;
361 normalized = NormalizeUtf16String(src, cch_src, nbuf, &cch_norm);
363 cch_adest = WideCharToMultiByte(CP_UTF8, 0, normalized, cch_norm,
364 adest, cch_adest, NULL, 0);
366 if (normalized != nbuf && normalized)
379 #define ESCVAL 0x1000
380 #define Esc(c) (ESCVAL + (short)(c))
381 #define IS_ESCAPED(c) (((c) & ESCVAL) == ESCVAL)
383 /* \brief Character sanitization map for CP-1252
385 The following map indicates which characters should be escaped in
386 the CP-1252 character map. Characters that are documented as
387 illegal characters in a file name are marked as escaped. Escaped
388 characters are marked using the ::Esc macro defined above. The
389 following exceptions apply:
391 - Path delimeters '\\' and '/' are NOT escaped because the
392 sanitization map applies to paths. While those characters are
393 illegal in filenames, they are legal in paths.
395 - Wildcard characters '*' and '?' ARE escaped. The document
396 referred below does not specify these characters as invalid.
397 Since no other escape mechanism exists, names containing
398 wildcards are indistinguishable from actual wildcards used in SMB
401 - Reserved names are not and cannot be represented in this map.
404 CON, PRN, AUX, NUL, COM1, COM2, COM3, COM4, COM5, COM6, COM7,
405 COM8, COM9, LPT1, LPT2, LPT3, LPT4, LPT5, LPT6, LPT7, LPT8, LPT9,
408 - Characters 0x80, 0x81, 0x8d, 0x8e, 0x8f, 0x90, 0x9d, 0x9e, 0x9f
409 are also escaped because they are unused in CP-1252 and hence
410 cannot be convered to a Unicode string.
412 Reserved names with extensions are also invalid. (i.e. NUL.txt)
414 \note The only bit we are actually interested in from the following
415 table is the ESCVAL bit. However, the characters themselves are
416 included for ease of maintenance.
418 \see "Naming a File" topic in the Windows SDK.
420 static const short sanitized_escapes_1252[] = {
421 Esc(0x00),Esc(0x01),Esc(0x02),Esc(0x03),Esc(0x04),Esc(0x05),Esc(0x06),Esc(0x07),
422 Esc(0x08),Esc(0x09),Esc(0x0a),Esc(0x0b),Esc(0x0c),Esc(0x0d),Esc(0x0e),Esc(0x0f),
423 Esc(0x10),Esc(0x11),Esc(0x12),Esc(0x13),Esc(0x14),Esc(0x15),Esc(0x16),Esc(0x17),
424 Esc(0x18),Esc(0x19),Esc(0x1a),Esc(0x1b),Esc(0x1c),Esc(0x1d),Esc(0x1e),Esc(0x1f),
425 ' ','!',Esc('"'),'#','$','%','&','\'','(',')',Esc('*'),'+',',','-','.','/',
426 '0','1','2','3','4','5','6','7','8','9',Esc(':'),';',Esc('<'),'=',Esc('>'),Esc('?'),
427 '@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
428 'P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_',
429 '`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
430 'p','q','r','s','t','u','v','w','x','y','z','{',Esc('|'),'}','~',Esc(0x7f),
431 Esc(0x80),Esc(0x81),0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,Esc(0x8d),Esc(0x8e),Esc(0x8f),
432 Esc(0x90),0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,Esc(0x9d),Esc(0x9e),0x9f,
433 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
434 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
435 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
436 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
437 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
438 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
441 static int sanitize_bytestring(const char * src, int cch_src,
442 char * odest, int cch_dest)
445 while (cch_src > 0 && *src && cch_dest > 0) {
449 rc = sanitized_escapes_1252[*src];
450 if (IS_ESCAPED(rc)) {
451 static const char hex[] =
452 {'0','1','2','3','4','5','6','7',
453 '8','9','a','b','c','d','e','f'};
461 *dest++ = hex[(((int)*src) >> 4) & 0x0f];
462 *dest++ = hex[(((int)*src) & 0x0f)];
474 if (cch_src > 0 && cch_dest > 0) {
478 return (int)(dest - odest);
485 long cm_NormalizeUtf8StringToUtf16(const char * src, int cch_src,
486 wchar_t * dest, int cch_dest)
488 wchar_t wsrcbuf[NLSMAXCCH];
493 /* Get some edge cases out first, so we don't have to worry about
494 cch_src being 0 etc. */
497 } else if (*src == '\0') {
504 cch_src = strlen(src) + 1;
507 cch = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src,
508 cch_src * sizeof(char), wsrcbuf, NLSMAXCCH);
511 if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) {
512 char sanitized[NLSMAXCCH];
515 /* If src doesn't have a unicode translation, then it
516 wasn't valid UTF-8. In this case, we assume that src
517 is CP-1252 and then try to convert again. But before
518 that, we use a translation table to "sanitize" the
521 cch_sanitized = sanitize_bytestring(src, cch_src, sanitized,
522 sizeof(sanitized)/sizeof(char));
524 if (cch_sanitized == 0) {
531 cch = MultiByteToWideChar(1252, 0, sanitized,
532 cch_sanitized * sizeof(char), wsrcbuf, NLSMAXCCH);
534 /* Well, that didn't work either. Something is very wrong. */
546 wnorm = NormalizeUtf16String(wsrcbuf, cch, dest, &cch_norm);
555 /* The buffer was insufficient */
556 if (dest != NULL && cch_dest > 1) {
567 cm_normchar_t *cm_NormalizeUtf8StringToUtf16Alloc(const cm_utf8char_t * src, int cch_src,
570 wchar_t wsrcbuf[NLSMAXCCH];
575 /* Get some edge cases out first, so we don't have to worry about
576 cch_src being 0 etc. */
579 } else if (*src == '\0') {
584 cch_src = strlen(src) + 1;
587 cch = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src,
588 cch_src * sizeof(char), wsrcbuf, NLSMAXCCH);
591 if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) {
592 char sanitized[NLSMAXCCH];
595 /* If src doesn't have a unicode translation, then it
596 wasn't valid UTF-8. In this case, we assume that src
597 is CP-1252 and then try to convert again. But before
598 that, we use a translation table to "sanitize" the
601 cch_sanitized = sanitize_bytestring(src, cch_src, sanitized,
602 sizeof(sanitized)/sizeof(char));
604 if (cch_sanitized == 0) {
611 cch = MultiByteToWideChar(1252, 0, sanitized,
612 cch_sanitized * sizeof(char), wsrcbuf, NLSMAXCCH);
614 /* Well, that didn't work either. Something is very wrong. */
626 wnorm = NormalizeUtf16String(wsrcbuf, cch, NULL, &cch_norm);
635 *pcch_dest = cch_norm;
640 int cm_Utf8ToUtf16(const cm_utf8char_t * src, int cch_src,
641 cm_unichar_t * dest, int cch_dest)
646 cch_src = strlen(src) + 1;
649 cch = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src,
650 cch_src * sizeof(char), dest, cch_dest);
653 if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) {
654 char sanitized[NLSMAXCCH];
657 /* If src doesn't have a unicode translation, then it
658 wasn't valid UTF-8. In this case, we assume that src
659 is CP-1252 and then try to convert again. But before
660 that, we use a translation table to "sanitize" the
663 cch_sanitized = sanitize_bytestring(src, cch_src, sanitized,
664 sizeof(sanitized)/sizeof(char));
666 if (cch_sanitized == 0) {
673 cch = MultiByteToWideChar(1252, 0, sanitized,
674 cch_sanitized * sizeof(char), dest, cch_dest);
676 /* Well, that didn't work either. Something is very wrong. */
693 cm_unichar_t * cm_Utf8ToUtf16Alloc(const cm_utf8char_t * src, int cch_src, int *pcch_dest)
695 cm_unichar_t * ustr = NULL;
699 cch_src = strlen(src) + 1;
702 cch = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src,
703 cch_src * sizeof(char), NULL, 0);
706 if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) {
707 char sanitized[NLSMAXCCH];
710 /* If src doesn't have a unicode translation, then it
711 wasn't valid UTF-8. In this case, we assume that src
712 is CP-1252 and then try to convert again. But before
713 that, we use a translation table to "sanitize" the
716 cch_sanitized = sanitize_bytestring(src, cch_src, sanitized,
717 sizeof(sanitized)/sizeof(char));
719 if (cch_sanitized == 0) {
726 cch = MultiByteToWideChar(1252, 0, sanitized,
727 cch_sanitized * sizeof(char), NULL, 0);
729 /* Well, that didn't work either. Something is very wrong. */
736 ustr = malloc((cch + 1) * sizeof(wchar_t));
738 cch = MultiByteToWideChar(1252, 0, sanitized,
739 cch_sanitized * sizeof(char), ustr, cch);
746 ustr = malloc((cch + 1) * sizeof(wchar_t));
748 cch = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src,
749 cch_src * sizeof(char), ustr, cch);
761 /* \brief Normalize a UTF-8 string.
763 \param[in] src String to normalize.
765 \param[in] cch_src : Count of characters in src. If this value is
766 -1, then src is assumed to be NULL terminated. The translated
767 string will be NULL terminated only if this is -1 or the count
768 includes the terminating NULL.
770 \param[out] adest : Destination string. Only considered valid if
771 \a cch_adest is non-zero.
773 \param[in] cch_adest : Number of characters in the destination
774 string. If this is zero, then the return value is the number
777 \return If \a cch_adest is non-zero, then the return value is the
778 number of bytes stored into adest. If \a cch_adest is zero,
779 then the return value is the number of bytes required. In both
780 cases, the return value is 0 if the call was unsuccessful.
782 long cm_NormalizeUtf8String(const char * src, int cch_src,
783 char * adest, int cch_adest)
785 wchar_t wsrcbuf[NLSMAXCCH];
790 /* Get some edge cases out first, so we don't have to worry about
791 cch_src being 0 etc. */
794 } else if (*src == '\0') {
801 cch_src = strlen(src) + 1;
804 cch = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src,
805 cch_src * sizeof(char), wsrcbuf, NLSMAXCCH);
808 if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) {
809 char sanitized[NLSMAXCCH];
812 /* If src doesn't have a unicode translation, then it
813 wasn't valid UTF-8. In this case, we assume that src
814 is CP-1252 and then try to convert again. But before
815 that, we use a translation table to "sanitize" the
818 cch_sanitized = sanitize_bytestring(src, cch_src, sanitized,
819 sizeof(sanitized)/sizeof(char));
821 if (cch_sanitized == 0) {
828 cch = MultiByteToWideChar(1252, 0, sanitized,
829 cch_sanitized * sizeof(char), wsrcbuf, NLSMAXCCH);
831 /* Well, that didn't work either. Something is very wrong. */
843 wnorm = NormalizeUtf16String(wsrcbuf, cch, NULL, &cch_norm);
851 cch = WideCharToMultiByte(CP_UTF8, 0, wnorm,
852 cch_norm, adest, cch_adest * sizeof(char),
861 /*! \brief Case insensitive comparison with specific length
863 \param[in] str1 First string to compare. Assumed to be encoded in UTF-8.
865 \param[in] str2 Second string to compare. Assumed to be encoded in UTF-8.
867 \param[in] n Max byte count.
870 int cm_strnicmp_utf8(const char * str1, const char * str2, int n)
872 wchar_t wstr1[NLSMAXCCH];
875 wchar_t wstr2[NLSMAXCCH];
878 /* first check for NULL pointers (assume NULL < "") */
884 } else if (str2 == NULL) {
888 len1 = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str1, n, wstr1, NLSMAXCCH);
896 len2 = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str2, n, wstr2, NLSMAXCCH);
904 rv = CompareStringW(LOCALE_INVARIANT, NORM_IGNORECASE, wstr1, len1, wstr2, len2);
915 int cm_strnicmp_utf16(const cm_unichar_t * str1, const cm_unichar_t * str2, int len)
921 /* first check for NULL pointers */
927 } else if (str2 == NULL) {
931 if (FAILED(StringCchLengthW(str1, len, &cch1)))
934 if (FAILED(StringCchLengthW(str2, len, &cch2)))
937 rv = CompareStringW(LOCALE_INVARIANT, NORM_IGNORECASE, str1, cch1, str2, cch2);
948 int cm_stricmp_utf16(const cm_unichar_t * str1, const cm_unichar_t * str2)
952 /* first check for NULL pointers */
958 } else if (str2 == NULL) {
962 rv = CompareStringW(LOCALE_INVARIANT, NORM_IGNORECASE, str1, -1, str2, -1);
973 cm_unichar_t *cm_strlwr_utf16(cm_unichar_t * str)
978 len = wcslen(str) + 1;
979 rv = LCMapStringW(LOCALE_INVARIANT, LCMAP_LOWERCASE, str, len, str, len);
989 cm_unichar_t *cm_strupr_utf16(cm_unichar_t * str)
994 len = wcslen(str) + 1;
995 rv = LCMapStringW(LOCALE_INVARIANT, LCMAP_UPPERCASE, str, len, str, len);
1006 int cm_stricmp_utf8(const char * str1, const char * str2)
1008 wchar_t wstr1[NLSMAXCCH];
1011 wchar_t wstr2[NLSMAXCCH];
1014 /* first check for NULL pointers */
1020 } else if (str2 == NULL) {
1024 len1 = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str1, -1, wstr1, NLSMAXCCH);
1032 len2 = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str2, -1, wstr2, NLSMAXCCH);
1040 rv = CompareStringW(LOCALE_INVARIANT, NORM_IGNORECASE, wstr1, len1, wstr2, len2);
1052 wchar_t * strupr_utf16(wchar_t * wstr, size_t cbstr)
1054 wchar_t wstrd[NLSMAXCCH];
1057 len = cbstr / sizeof(wchar_t);
1058 len = LCMapStringW(LOCALE_INVARIANT, LCMAP_UPPERCASE, wstr, len, wstrd, NLSMAXCCH);
1059 StringCbCopyW(wstr, cbstr, wstrd);
1065 char * strupr_utf8(char * str, size_t cbstr)
1067 wchar_t wstr[NLSMAXCCH];
1068 wchar_t wstrd[NLSMAXCCH];
1071 len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str, -1, wstr, NLSMAXCCH);
1075 len = LCMapStringW(LOCALE_INVARIANT, LCMAP_UPPERCASE, wstr, len, wstrd, NLSMAXCCH);
1077 len = WideCharToMultiByte(CP_UTF8, 0, wstrd, -1, str, cbstr, NULL, FALSE);
1082 char * char_next_utf8(const char * c)
1084 #define CH (*((const unsigned char *)c))
1086 if ((CH & 0x80) == 0)
1087 return (char *) c+1;
1089 switch (CH & 0xf0) {
1092 return (char *) c+2;
1095 return (char *) c+3;
1098 return (char *) c+4;
1101 return (char *) c+1;
1108 char * char_prev_utf8(const char * c)
1110 #define CH (*((const unsigned char *)c))
1114 if ((CH & 0x80) == 0)
1117 while ((CH & 0xc0) == 0x80)
1124 wchar_t * char_next_utf16(const wchar_t * c)
1126 unsigned short sc = (unsigned short) *c;
1128 if (sc >= 0xd800 && sc <= 0xdbff)
1129 return (wchar_t *) c+2;
1130 return (wchar_t *) c+1;
1133 wchar_t * char_prev_utf16(const wchar_t * c)
1135 unsigned short sc = (unsigned short) *(--c);
1137 if (sc >= 0xdc00 && sc <= 0xdfff)
1138 return (wchar_t *) --c;
1139 return (wchar_t *) c;
1142 wchar_t * char_this_utf16(const wchar_t * c)
1144 unsigned short sc = (unsigned short) *c;
1146 if (sc >= 0xdc00 && sc <= 0xdfff)
1147 return (wchar_t *) --c;
1148 return (wchar_t *) c;