windows-unicode-support-20080509
[openafs.git] / src / WINNT / afsd / cm_utils.c
index 8b26f50..fc1d8fb 100644 (file)
@@ -69,6 +69,9 @@
 #include <osi.h>
 #include <rx/rx.h>
 
+#define STRSAFE_NO_DEPRECATE
+#include <strsafe.h>
+
 
 static osi_once_t cm_utilsOnce;
 
@@ -359,3 +362,276 @@ void cm_FreeSpace(cm_space_t *tsp)
        cm_spaceListp = tsp;
         lock_ReleaseWrite(&cm_utilsLock);
 }
+
+/* This is part of the Microsoft Internationalized Domain Name
+   Mitigation APIs. */
+#include <normalization.h>
+
+int
+(WINAPI *pNormalizeString)( __in NORM_FORM NormForm,
+                            __in_ecount(cwSrcLength) LPCWSTR lpSrcString,
+                            __in int cwSrcLength,
+                            __out_ecount(cwDstLength) LPWSTR lpDstString,
+                            __in int cwDstLength ) = NULL;
+
+BOOL
+(WINAPI *pIsNormalizedString)( __in NORM_FORM NormForm,
+                               __in_ecount(cwLength) LPCWSTR lpString,
+                               __in int cwLength ) = NULL;
+
+
+#define NLSDLLNAME "Normaliz.dll"
+#define NLSMAXCCH  1024
+#define NLSERRCCH  8
+
+#define AFS_NORM_FORM NormalizationC
+
+long cm_InitNormalization(void)
+{
+    HMODULE h_Nls;
+
+    if (pNormalizeString != NULL)
+        return 0;
+
+    h_Nls = LoadLibrary(NLSDLLNAME);
+    if (h_Nls == INVALID_HANDLE_VALUE) {
+        afsi_log("Can't load " NLSDLLNAME ": LastError=%d", GetLastError());
+        return 1;
+    }
+
+    pNormalizeString = GetProcAddress(h_Nls, "NormalizeString");
+    pIsNormalizedString = GetProcAddress(h_Nls, "IsNormalizedString");
+
+    return (pNormalizeString && pIsNormalizedString);
+}
+
+/* \brief Normalize a UTF-16 string.
+
+   If the supplied destination buffer is
+   insufficient or NULL, then a new buffer will be allocated to hold
+   the normalized string.
+
+   \param[in] src : Source UTF-16 string.  Length is specified in
+       cch_src.
+
+   \param[in] cch_src : The character count in cch_src is assumed to
+       be tight and include the terminating NULL character if there is
+       one.  If the NULL is absent, the resulting string will not be
+       NULL terminated.
+
+   \param[out] ext_dest : The destination buffer.  Can be NULL, in
+       which case *pcch_dest MUST be NULL.
+
+   \param[in,out] pcch_dest : On entry *pcch_dest contains a count of
+       characters in the destination buffer.  On exit, it will contain
+       a count of characters that were copied to the destination
+       buffer.
+
+   Returns a pointer to the buffer containing the normalized string or
+   NULL if the call was unsuccessful.  If the returned destination
+   buffer is different fron the supplied buffer and non-NULL, it
+   should be freed using free().
+*/
+static wchar_t * 
+NormalizeUtf16String(const wchar_t * src, int cch_src, wchar_t * ext_dest, int *pcch_dest)
+{
+    if ((pIsNormalizedString && (*pIsNormalizedString)(AFS_NORM_FORM, src, cch_src)) ||
+        (!pNormalizeString)) {
+
+        int rv;
+        DWORD gle;
+        int tries = 10;
+        wchar_t * dest;
+        int cch_dest = *pcch_dest;
+
+        dest = ext_dest;
+
+        while (tries-- > 0) {
+
+            rv = (*pNormalizeString)(AFS_NORM_FORM, src, cch_src, dest, cch_dest);
+
+            if (rv <= 0 && (gle = GetLastError()) != ERROR_SUCCESS) {
+#ifdef DEBUG
+                osi_Log1(afsd_logp, "NormalizeUtf16String error = %d", gle);
+#endif
+                if (gle == ERROR_INSUFFICIENT_BUFFER) {
+
+                    /* The buffer wasn't big enough.  We are going to
+                       try allocating one. */
+
+                    cch_dest = (-rv) + NLSERRCCH;
+                    goto cont;
+
+                } else {
+                    /* Something else is wrong */
+                    break;
+                }
+
+            } else if (rv < 0) { /* rv < 0 && gle == ERROR_SUCCESS */
+
+                /* Technically not one of the expected outcomes */
+                break;
+
+            } else {            /* rv > 0 || (rv == 0 && gle == ERROR_SUCCESS) */
+
+                /* Possibly succeeded */
+
+                if (rv == 0) { /* Succeeded and the return string is empty */
+                    *pcch_dest = 0;
+                    return dest;
+                }
+
+                if (cch_dest == 0) {
+                    /* Nope.  We only calculated the required size of the buffer */
+
+                    cch_dest = rv + NLSERRCCH;
+                    goto cont;
+                }
+
+                *pcch_dest = rv;
+
+                /* Success! */
+                return dest;
+            }
+
+        cont:
+            if (dest != ext_dest && dest)
+                free(dest);
+            dest = malloc(cch_dest * sizeof(wchar_t));
+        }
+
+        /* Failed */
+
+        if (dest != ext_dest && dest)
+            free(dest);
+
+        *pcch_dest = 0;
+        return NULL;
+    } else {
+
+        /* No need to or unable to normalize.  Just copy the string */
+        if (SUCCEEDED(StringCchCopyNW(ext_dest, *pcch_dest, src, cch_src))) {
+            *pcch_dest = cch_src;
+            return ext_dest;
+        } else {
+            *pcch_dest = 0;
+            return NULL;
+        }
+    }
+}
+
+/* \brief Normalize a UTF-16 string into a UTF-8 string.
+
+   \param[in] src : Source string.
+
+   \param[in] cch_src : Count of characters in src. If the count includes the
+       NULL terminator, then the resulting string will be NULL
+       terminated.  If it is -1, then src is assumed to be NULL
+       terminated.
+
+   \param[out] adest : Destination buffer.
+
+   \param[in] cch_adest : Number of characters in the destination buffer.
+
+   Returns the number of characters stored into cch_adest. This will
+   include the terminating NULL if cch_src included the terminating
+   NULL or was -1.  If this is 0, then the operation was unsuccessful.
+ */
+long cm_NormalizeUtf16StringToUtf8(const wchar_t * src, int cch_src,
+                                   char * adest, int cch_adest)
+{
+    if (cch_src < 0) {
+        size_t cch;
+
+        if (FAILED(StringCchLengthW(src, NLSMAXCCH, &cch)))
+            return CM_ERROR_TOOBIG;
+
+        cch_src = cch+1;
+    }
+
+    {
+        wchar_t nbuf[NLSMAXCCH];
+        wchar_t * normalized;
+        int cch_norm = NLSMAXCCH;
+
+        normalized = NormalizeUtf16String(src, cch_src, nbuf, &cch_norm);
+        if (normalized) {
+            cch_adest = WideCharToMultiByte(CP_UTF8, 0, normalized, cch_norm,
+                                            adest, cch_adest, NULL, 0);
+
+            if (normalized != nbuf && normalized)
+                free(normalized);
+
+            return cch_adest;
+
+        } else {
+
+            return 0;
+
+        }
+    }
+}
+
+
+/* \brief Normalize a UTF-8 string.
+
+   \param[in] src String to normalize.
+
+   \param[in] cch_src : Count of characters in src.  If this value is
+       -1, then src is assumed to be NULL terminated.  The translated
+       string will be NULL terminated only if this is -1 or the count
+       includes the terminating NULL.
+
+   \param[out] adest : Destination string.
+
+   \param[in] cch_adest : Number of characters in the destination
+       string.
+
+   Returns the number of characters stored into adest or 0 if the call
+   was unsuccessful.
+ */
+long cm_NormalizeUtf8String(const char * src, int cch_src,
+                            char * adest, int cch_adest)
+{
+    wchar_t wsrcbuf[NLSMAXCCH];
+    wchar_t *wnorm;
+    int cch;
+    int cch_norm;
+
+    /* Get some edge cases out first, so we don't have to worry about
+       cch_src being 0 etc. */
+    if (cch_src == 0) {
+        return 0;
+    } else if (*src == '\0') {
+        *adest = '\0';
+        return 1;
+    }
+
+    cch = MultiByteToWideChar(CP_UTF8, 0, src, cch_src * sizeof(char),
+                             wsrcbuf, NLSMAXCCH);
+
+    if (cch == 0) {
+#ifdef DEBUG
+        DebugBreak();
+#endif
+        return 0;
+    }
+
+    cch_norm = 0;
+    wnorm = NormalizeUtf16String(wsrcbuf, cch, NULL, &cch_norm);
+    if (wnorm == NULL) {
+#ifdef DEBUG
+        DebugBreak();
+#endif
+        return 0;
+    }
+
+    cch = WideCharToMultiByte(CP_UTF8, 0, wnorm, cch_norm,
+                              adest, cch_adest * sizeof(char),
+                              NULL, FALSE);
+
+    if (wnorm)
+        free(wnorm);
+
+    return cch;
+}