Logo Search packages:      
Sourcecode: icu version File versions  Download package

UnicodeSet & UnicodeSet::closeOver ( int32_t  attribute  ) 

Close this set over the given attribute. For the attribute USET_CASE, the result is to modify this set so that:

1. For each character or string 'a' in this set, all strings or characters 'b' such that foldCase(a) == foldCase(b) are added to this set.

2. For each string 'e' in the resulting set, if e != foldCase(e), 'e' will be removed.

Example: [aq\u00DF{Bc}{bC}{Fi}] => [aAqQ\u00DF\uFB01{ss}{bc}{fi}]

(Here foldCase(x) refers to the operation u_strFoldCase, and a == b denotes that the contents are the same, not pointer comparison.)

A frozen set will not be modified.

Parameters:
attribute bitmask for attributes to close over. Currently only the USET_CASE bit is supported. Any undefined bits are ignored.
Returns:
a reference to this set. ICU 4.2

Definition at line 1383 of file uniset_props.cpp.

References add(), BreakIterator::createWordInstance(), UVector::elementAt(), UnicodeString::foldCase(), UnicodeString::getBuffer(), getRangeCount(), getRangeEnd(), getRangeStart(), isBogus(), isFrozen(), UnicodeString::length(), NULL, UVector::removeAllElements(), UVector::size(), strings, U_NAMESPACE_END, U_SUCCESS, U_ZERO_ERROR, USET_ADD_CASE_MAPPINGS, and USET_CASE_INSENSITIVE.

Referenced by applyPattern(), and UnicodeSetTest::TestCloseOver().

                                                   {
    if (isFrozen() || isBogus()) {
        return *this;
    }
    if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) {
        UErrorCode status = U_ZERO_ERROR;
        const UCaseProps *csp = ucase_getSingleton(&status);
        if (U_SUCCESS(status)) {
            UnicodeSet foldSet(*this);
            UnicodeString str;
            USetAdder sa = {
                (USet *)&foldSet,
                _set_add,
                _set_addRange,
                _set_addString,
                NULL, // don't need remove()
                NULL // don't need removeRange()
            };

            // start with input set to guarantee inclusion
            // USET_CASE: remove strings because the strings will actually be reduced (folded);
            //            therefore, start with no strings and add only those needed
            if (attribute & USET_CASE_INSENSITIVE) {
                foldSet.strings->removeAllElements();
            }

            int32_t n = getRangeCount();
            UChar32 result;
            const UChar *full;
            int32_t locCache = 0;

            for (int32_t i=0; i<n; ++i) {
                UChar32 start = getRangeStart(i);
                UChar32 end   = getRangeEnd(i);

                if (attribute & USET_CASE_INSENSITIVE) {
                    // full case closure
                    for (UChar32 cp=start; cp<=end; ++cp) {
                        ucase_addCaseClosure(csp, cp, &sa);
                    }
                } else {
                    // add case mappings
                    // (does not add long s for regular s, or Kelvin for k, for example)
                    for (UChar32 cp=start; cp<=end; ++cp) {
                        result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache);
                        addCaseMapping(foldSet, result, full, str);

                        result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache);
                        addCaseMapping(foldSet, result, full, str);

                        result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache);
                        addCaseMapping(foldSet, result, full, str);

                        result = ucase_toFullFolding(csp, cp, &full, 0);
                        addCaseMapping(foldSet, result, full, str);
                    }
                }
            }
            if (strings != NULL && strings->size() > 0) {
                if (attribute & USET_CASE_INSENSITIVE) {
                    for (int32_t j=0; j<strings->size(); ++j) {
                        str = *(const UnicodeString *) strings->elementAt(j);
                        str.foldCase();
                        if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) {
                            foldSet.add(str); // does not map to code points: add the folded string itself
                        }
                    }
                } else {
                    Locale root("");
#if !UCONFIG_NO_BREAK_ITERATION
                    BreakIterator *bi = BreakIterator::createWordInstance(root, status);
#endif
                    if (U_SUCCESS(status)) {
                        const UnicodeString *pStr;

                        for (int32_t j=0; j<strings->size(); ++j) {
                            pStr = (const UnicodeString *) strings->elementAt(j);
                            (str = *pStr).toLower(root);
                            foldSet.add(str);
#if !UCONFIG_NO_BREAK_ITERATION
                            (str = *pStr).toTitle(bi, root);
                            foldSet.add(str);
#endif
                            (str = *pStr).toUpper(root);
                            foldSet.add(str);
                            (str = *pStr).foldCase();
                            foldSet.add(str);
                        }
                    }
#if !UCONFIG_NO_BREAK_ITERATION
                    delete bi;
#endif
                }
            }
            *this = foldSet;
        }
    }
    return *this;
}


Generated by  Doxygen 1.6.0   Back to index