Logo Search packages:      
Sourcecode: icu version File versions  Download package

U_DRAFT char* U_EXPORT2 u_strToJavaModifiedUTF8 ( char *  dest,
int32_t  destCapacity,
int32_t *  pDestLength,
const UChar *  src,
int32_t  srcLength,
UErrorCode pErrorCode 
)

Convert a 16-bit Unicode string to Java Modified UTF-8. See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8

This function behaves according to the documentation for Java DataOutput.writeUTF() except that it does not encode the output length in the destination buffer and does not have an output length restriction. See http://java.sun.com/javase/6/docs/api/java/io/DataOutput.html#writeUTF(java.lang.String)

The input string need not be well-formed UTF-16. (Therefore there is no subchar parameter.)

Parameters:
dest A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
destCapacity The size of the buffer (number of chars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
pDestLength A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
src The original source string
srcLength The length of the original string. If -1, then src must be zero-terminated.
pErrorCode Pointer to a standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns:
The pointer to destination buffer. ICU 4.4
See also:
u_strToUTF8WithSub

u_strFromJavaModifiedUTF8WithSub

Definition at line 1502 of file ustrtrns.c.

References NULL, U_FAILURE, U_ILLEGAL_ARGUMENT_ERROR, and u_strlen().

                                {
    int32_t reqLength=0;
    uint32_t ch=0,ch2=0;
    uint8_t *pDest = (uint8_t *)dest;
    uint8_t *pDestLimit = pDest + destCapacity;
    const UChar *pSrcLimit;
    int32_t count;

    /* args check */
    if(U_FAILURE(*pErrorCode)){
        return NULL;
    }
    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
        (dest==NULL && destCapacity!=0) || destCapacity<0
    ) {
        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }

    if(srcLength==-1) {
        /* Convert NUL-terminated ASCII, then find the string length. */
        while((ch=*src)<=0x7f && ch != 0 && pDest<pDestLimit) {
            *pDest++ = (uint8_t)ch;
            ++src;
        }
        if(ch == 0) {
            reqLength=(int32_t)(pDest - (uint8_t *)dest);
            if(pDestLength) {
                *pDestLength = reqLength;
            }

            /* Terminate the buffer */
            u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
            return dest;
        }
        srcLength = u_strlen(src);
    }

    /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
    pSrcLimit = src+srcLength;
    for(;;) {
        count = (int32_t)(pDestLimit - pDest);
        srcLength = (int32_t)(pSrcLimit - src);
        if(count >= srcLength && srcLength > 0 && *src <= 0x7f) {
            /* fast ASCII loop */
            const UChar *prevSrc = src;
            int32_t delta;
            while(src < pSrcLimit && (ch = *src) <= 0x7f && ch != 0) {
                *pDest++=(uint8_t)ch;
                ++src;
            }
            delta = (int32_t)(src - prevSrc);
            count -= delta;
            srcLength -= delta;
        }
        /*
         * Each iteration of the inner loop progresses by at most 3 UTF-8
         * bytes and one UChar.
         */
        count /= 3;
        if(count > srcLength) {
            count = srcLength; /* min(remaining dest/3, remaining src) */
        }
        if(count < 3) {
            /*
             * Too much overhead if we get near the end of the string,
             * continue with the next loop.
             */
            break;
        }
        do {
            ch=*src++;
            if(ch <= 0x7f && ch != 0) {
                *pDest++ = (uint8_t)ch;
            } else if(ch <= 0x7ff) {
                *pDest++=(uint8_t)((ch>>6)|0xc0);
                *pDest++=(uint8_t)((ch&0x3f)|0x80);
            } else {
                *pDest++=(uint8_t)((ch>>12)|0xe0);
                *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
                *pDest++=(uint8_t)((ch&0x3f)|0x80);
            }
        } while(--count > 0);
    }

    while(src<pSrcLimit) {
        ch=*src++;
        if(ch <= 0x7f && ch != 0) {
            if(pDest<pDestLimit) {
                *pDest++ = (uint8_t)ch;
            } else {
                reqLength = 1;
                break;
            }
        } else if(ch <= 0x7ff) {
            if((pDestLimit - pDest) >= 2) {
                *pDest++=(uint8_t)((ch>>6)|0xc0);
                *pDest++=(uint8_t)((ch&0x3f)|0x80);
            } else {
                reqLength = 2;
                break;
            }
        } else {
            if((pDestLimit - pDest) >= 3) {
                *pDest++=(uint8_t)((ch>>12)|0xe0);
                *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
                *pDest++=(uint8_t)((ch&0x3f)|0x80);
            } else {
                reqLength = 3;
                break;
            }
        }
    }
    while(src<pSrcLimit) {
        ch=*src++;
        if(ch <= 0x7f && ch != 0) {
            ++reqLength;
        } else if(ch<=0x7ff) {
            reqLength+=2;
        } else {
            reqLength+=3;
        }
    }

    reqLength+=(int32_t)(pDest - (uint8_t *)dest);
    if(pDestLength){
        *pDestLength = reqLength;
    }

    /* Terminate the buffer */
    u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
    return dest;
}


Generated by  Doxygen 1.6.0   Back to index