Logo Search packages:      
Sourcecode: icu version File versions  Download package

UMatchDegree UnicodeSet::matches ( const Replaceable text,
int32_t &  offset,
int32_t  limit,
UBool  incremental 
) [virtual]

Implement UnicodeMatcher::matches() ICU 2.4

Implementation of UnicodeMatcher::matches(). Always matches the longest possible multichar string.

Reimplemented from UnicodeFilter.

Definition at line 624 of file uniset.cpp.

References UnicodeString::charAt(), Replaceable::charAt(), contains(), UVector::elementAt(), UnicodeString::length(), UnicodeFilter::matches(), matchRest(), UVector::size(), U_ETHER, U_MATCH, U_MISMATCH, and U_PARTIAL_MATCH.

                                                    {
    if (offset == limit) {
        // Strings, if any, have length != 0, so we don't worry
        // about them here.  If we ever allow zero-length strings
        // we much check for them here.
        if (contains(U_ETHER)) {
            return incremental ? U_PARTIAL_MATCH : U_MATCH;
        } else {
            return U_MISMATCH;
        }
    } else {
        if (strings->size() != 0) { // try strings first

            // might separate forward and backward loops later
            // for now they are combined

            // TODO Improve efficiency of this, at least in the forward
            // direction, if not in both.  In the forward direction we
            // can assume the strings are sorted.

            int32_t i;
            UBool forward = offset < limit;

            // firstChar is the leftmost char to match in the
            // forward direction or the rightmost char to match in
            // the reverse direction.
            UChar firstChar = text.charAt(offset);

            // If there are multiple strings that can match we
            // return the longest match.
            int32_t highWaterLength = 0;

            for (i=0; i<strings->size(); ++i) {
                const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i);

                //if (trial.length() == 0) {
                //    return U_MATCH; // null-string always matches
                //}
                // assert(trial.length() != 0); // We ensure this elsewhere

                UChar c = trial.charAt(forward ? 0 : trial.length() - 1);

                // Strings are sorted, so we can optimize in the
                // forward direction.
                if (forward && c > firstChar) break;
                if (c != firstChar) continue;

                int32_t matchLen = matchRest(text, offset, limit, trial);

                if (incremental) {
                    int32_t maxLen = forward ? limit-offset : offset-limit;
                    if (matchLen == maxLen) {
                        // We have successfully matched but only up to limit.
                        return U_PARTIAL_MATCH;
                    }
                }

                if (matchLen == trial.length()) {
                    // We have successfully matched the whole string.
                    if (matchLen > highWaterLength) {
                        highWaterLength = matchLen;
                    }
                    // In the forward direction we know strings
                    // are sorted so we can bail early.
                    if (forward && matchLen < highWaterLength) {
                        break;
                    }
                    continue;
                }
            }

            // We've checked all strings without a partial match.
            // If we have full matches, return the longest one.
            if (highWaterLength != 0) {
                offset += forward ? highWaterLength : -highWaterLength;
                return U_MATCH;
            }
        }
        return UnicodeFilter::matches(text, offset, limit, incremental);
    }
}


Generated by  Doxygen 1.6.0   Back to index