Logo Search packages:      
Sourcecode: icu version File versions  Download package

int32_t SpoofImpl::confusableLookup ( UChar32  inChar,
int32_t  tableMask,
UChar *  destBuf 
) const

Get the confusable skeleton transform for a single code point. The result is a string with a length between 1 and 18.

Parameters:
tableMask bit flag specifying which confusable table to use. One of USPOOF_SL_TABLE_FLAG, USPOOF_MA_TABLE_FLAG, etc.
Returns:
The length in UTF-16 code units of the substition string.

Definition at line 127 of file uspoof_impl.cpp.

References U16_APPEND_UNSAFE.

Referenced by uspoof_getSkeleton().

                                                                                           {

    // Binary search the spoof data key table for the inChar
    int32_t  *low   = fSpoofData->fCFUKeys;
    int32_t  *mid   = NULL;
    int32_t  *limit = low + fSpoofData->fRawData->fCFUKeysSize;
    UChar32   midc;
    do {
        int32_t delta = ((int32_t)(limit-low))/2;
        mid = low + delta;
        midc = *mid & 0x1fffff;
        if (inChar == midc) {
            goto foundChar;
        } else if (inChar < midc) {
            limit = mid;
        } else {
            low = mid;
        }
    } while (low < limit-1);
    mid = low;
    midc = *mid & 0x1fffff;
    if (inChar != midc) {
        // Char not found.  It maps to itself.
        int i = 0;
        U16_APPEND_UNSAFE(destBuf, i, inChar)
        return i;
    } 
  foundChar:
    int32_t keyFlags = *mid & 0xff000000;
    if ((keyFlags & tableMask) == 0) {
        // We found the right key char, but the entry doesn't pertain to the
        //  table we need.  See if there is an adjacent key that does
        if (keyFlags & USPOOF_KEY_MULTIPLE_VALUES) {
            int32_t *altMid;
            for (altMid = mid-1; (*altMid&0x00ffffff) == inChar; altMid--) {
                keyFlags = *altMid & 0xff000000;
                if (keyFlags & tableMask) {
                    mid = altMid;
                    goto foundKey;
                }
            }
            for (altMid = mid+1; (*altMid&0x00ffffff) == inChar; altMid++) {
                keyFlags = *altMid & 0xff000000;
                if (keyFlags & tableMask) {
                    mid = altMid;
                    goto foundKey;
                }
            }
        }
        // No key entry for this char & table.
        // The input char maps to itself.
        int i = 0;
        U16_APPEND_UNSAFE(destBuf, i, inChar)
        return i;
    }

  foundKey:
    int32_t  stringLen = USPOOF_KEY_LENGTH_FIELD(keyFlags) + 1;
    int32_t keyTableIndex = (int32_t)(mid - fSpoofData->fCFUKeys);

    // Value is either a UChar  (for strings of length 1) or
    //                 an index into the string table (for longer strings)
    uint16_t value = fSpoofData->fCFUValues[keyTableIndex];
    if (stringLen == 1) {
        destBuf[0] = value;
        return 1;
    }

    // String length of 4 from the above lookup is used for all strings of length >= 4.
    // For these, get the real length from the string lengths table,
    //   which maps string table indexes to lengths.
    //   All strings of the same length are stored contiguously in the string table.
    //   'value' from the lookup above is the starting index for the desired string.

    int32_t ix;
    if (stringLen == 4) {
        int32_t stringLengthsLimit = fSpoofData->fRawData->fCFUStringLengthsSize;
        for (ix = 0; ix < stringLengthsLimit; ix++) {
            if (fSpoofData->fCFUStringLengths[ix].fLastString >= value) {
                stringLen = fSpoofData->fCFUStringLengths[ix].fStrLength;
                break;
            }
        }
        U_ASSERT(ix < stringLengthsLimit);
    }

    U_ASSERT(value + stringLen < fSpoofData->fRawData->fCFUStringTableLen);
    UChar *src = &fSpoofData->fCFUStrings[value];
    for (ix=0; ix<stringLen; ix++) {
        destBuf[ix] = src[ix];
    }
    return stringLen;
}


Generated by  Doxygen 1.6.0   Back to index