Logo Search packages:      
Sourcecode: icu version File versions  Download package

UnicodeSet & UnicodeSet::add ( UChar32  c  ) 

Adds the specified character to this set if it is not already present. If this set already contains the specified character, the call leaves this set unchanged. A frozen set will not be modified. ICU 2.0

Adds the specified character to this set if it is not already present. If this set already contains the specified character, the call leaves this set unchanged.

Definition at line 873 of file uniset.cpp.

References findCodePoint(), isBogus(), isFrozen(), releasePattern(), U_FAILURE, and U_ZERO_ERROR.

                                     {
    // find smallest i such that c < list[i]
    // if odd, then it is IN the set
    // if even, then it is OUT of the set
    int32_t i = findCodePoint(pinCodePoint(c));

    // already in set?
    if ((i & 1) != 0  || isFrozen() || isBogus()) return *this;

    // HIGH is 0x110000
    // assert(list[len-1] == HIGH);

    // empty = [HIGH]
    // [start_0, limit_0, start_1, limit_1, HIGH]

    // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
    //                             ^
    //                             list[i]

    // i == 0 means c is before the first range

#ifdef DEBUG_US_ADD
    printf("Add of ");
    dump(c);
    printf(" found at %d", i);
    printf(": ");
    dump(list, len);
    printf(" => ");
#endif

    if (c == list[i]-1) {
        // c is before start of next range
        list[i] = c;
        // if we touched the HIGH mark, then add a new one
        if (c == (UNICODESET_HIGH - 1)) {
            UErrorCode status = U_ZERO_ERROR;
            ensureCapacity(len+1, status);
            if (U_FAILURE(status)) {
                return *this; // There is no way to report this error :-(
            }
            list[len++] = UNICODESET_HIGH;
        }
        if (i > 0 && c == list[i-1]) {
            // collapse adjacent ranges

            // [..., start_k-1, c, c, limit_k, ..., HIGH]
            //                     ^
            //                     list[i]

            //for (int32_t k=i-1; k<len-2; ++k) {
            //    list[k] = list[k+2];
            //}
            UChar32* dst = list + i - 1;
            UChar32* src = dst + 2;
            UChar32* srclimit = list + len;
            while (src < srclimit) *(dst++) = *(src++);

            len -= 2;
        }
    }

    else if (i > 0 && c == list[i-1]) {
        // c is after end of prior range
        list[i-1]++;
        // no need to check for collapse here
    }

    else {
        // At this point we know the new char is not adjacent to
        // any existing ranges, and it is not 10FFFF.


        // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
        //                             ^
        //                             list[i]

        // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH]
        //                             ^
        //                             list[i]

        UErrorCode status = U_ZERO_ERROR;
        ensureCapacity(len+2, status);
        if (U_FAILURE(status)) {
            return *this; // There is no way to report this error :-(
        }

        //for (int32_t k=len-1; k>=i; --k) {
        //    list[k+2] = list[k];
        //}
        UChar32* src = list + len;
        UChar32* dst = src + 2;
        UChar32* srclimit = list + i;
        while (src > srclimit) *(--dst) = *(--src);

        list[i] = c;
        list[i+1] = c+1;
        len += 2;
    }

#ifdef DEBUG_US_ADD
    dump(list, len);
    printf("\n");

    for (i=1; i<len; ++i) {
        if (list[i] <= list[i-1]) {
            // Corrupt array!
            printf("ERROR: list has been corrupted\n");
            exit(1);
        }
    }
#endif

    releasePattern();
    return *this;
}


Generated by  Doxygen 1.6.0   Back to index