Logo Search packages:      
Sourcecode: icu version File versions  Download package

U_STABLE UChar32* U_EXPORT2 u_strToUTF32WithSub ( UChar32 dest,
int32_t  destCapacity,
int32_t *  pDestLength,
const UChar *  src,
int32_t  srcLength,
UChar32  subchar,
int32_t *  pNumSubstitutions,
UErrorCode pErrorCode 
)

Convert a UTF-16 string to UTF-32. If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.

Same as u_strToUTF32() except for the additional subchar which is output for illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF32().

Parameters:
dest A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
destCapacity The size of the buffer (number of UChar32s). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
pDestLength A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
src The original source string
srcLength The length of the original string. If -1, then src must be zero-terminated.
subchar The substitution character to use in place of an illegal input sequence, or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. A substitution character can be any valid Unicode code point (up to U+10FFFF) except for surrogate code points (U+D800..U+DFFF). The recommended value is U+FFFD "REPLACEMENT CHARACTER".
pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. Set to 0 if no substitutions occur or subchar<0. pNumSubstitutions can be NULL.
pErrorCode Pointer to a standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns:
The pointer to destination buffer.
See also:
u_strToUTF32

u_strFromUTF32WithSub ICU 4.2

Definition at line 148 of file ustrtrns.c.

References NULL, U16_GET_SUPPLEMENTARY, U16_IS_SURROGATE, U16_IS_SURROGATE_LEAD, U16_IS_TRAIL, U_FAILURE, U_ILLEGAL_ARGUMENT_ERROR, U_INVALID_CHAR_FOUND, and U_IS_SURROGATE.

                                     {
    const UChar *srcLimit;
    UChar32 ch;
    UChar ch2;
    UChar32 *destLimit;
    UChar32 *pDest;
    int32_t reqLength;
    int32_t numSubstitutions;

    /* args check */
    if(U_FAILURE(*pErrorCode)){
        return NULL;
    }
    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
        (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
        subchar > 0x10ffff || U_IS_SURROGATE(subchar)
    ) {
        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }

    if(pNumSubstitutions != NULL) {
        *pNumSubstitutions = 0;
    }

    pDest = dest;
    destLimit = dest + destCapacity;
    reqLength = 0;
    numSubstitutions = 0;

    if(srcLength < 0) {
        /* simple loop for conversion of a NUL-terminated BMP string */
        while((ch=*src) != 0 && !U16_IS_SURROGATE(ch)) {
            ++src;
            if(pDest < destLimit) {
                *pDest++ = ch;
            } else {
                ++reqLength;
            }
        }
        srcLimit = src;
        if(ch != 0) {
            /* "complicated" case, find the end of the remaining string */
            while(*++srcLimit != 0) {}
        }
    } else {
        srcLimit = src + srcLength;
    }

    /* convert with length */
    while(src < srcLimit) {
        ch = *src++;
        if(!U16_IS_SURROGATE(ch)) {
            /* write or count ch below */
        } else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) {
            ++src;
            ch = U16_GET_SUPPLEMENTARY(ch, ch2);
        } else if((ch = subchar) < 0) {
            /* unpaired surrogate */
            *pErrorCode = U_INVALID_CHAR_FOUND;
            return NULL;
        } else {
            ++numSubstitutions;
        }
        if(pDest < destLimit) {
            *pDest++ = ch;
        } else {
            ++reqLength;
        }
    }

    reqLength += (int32_t)(pDest - dest);
    if(pDestLength) {
        *pDestLength = reqLength;
    }
    if(pNumSubstitutions != NULL) {
        *pNumSubstitutions = numSubstitutions;
    }

    /* Terminate the buffer */
    u_terminateUChar32s(dest, destCapacity, reqLength, pErrorCode);

    return dest;
}


Generated by  Doxygen 1.6.0   Back to index