Logo Search packages:      
Sourcecode: icu version File versions

strmatch.h

/*
* Copyright (C) 2001, International Business Machines Corporation and others. All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   07/23/01    aliu        Creation.
**********************************************************************
*/
#ifndef STRMATCH_H
#define STRMATCH_H

#include "unicode/unistr.h"
#include "unicode/unifunct.h"
#include "unicode/unimatch.h"
#include "unicode/unirepl.h"

U_NAMESPACE_BEGIN

class TransliterationRuleData;

/**
 * An object that matches a fixed input string, implementing the
 * UnicodeMatcher API.  This object also implements the
 * UnicodeReplacer API, allowing it to emit the matched text as
 * output.  Since the match text may contain flexible match elements,
 * such as UnicodeSets, the emitted text is not the match pattern, but
 * instead a substring of the actual matched text.  Following
 * convention, the output text is the leftmost match seen up to this
 * point.
 *
 * A StringMatcher may represent a segment, in which case it has a
 * positive segment number.  This affects how the matcher converts
 * itself to a pattern but does not otherwise affect its function.
 *
 * A StringMatcher that is not a segment should not be used as a
 * UnicodeReplacer.
 */
00037 class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public UnicodeReplacer {

 public:

    /**
     * Construct a matcher that matches the given pattern string.
     * @param theString the pattern to be matched, possibly containing
     * stand-ins that represent nested UnicodeMatcher objects.
     * @param segmentNum the segment number from 1..n, or 0 if this is
     * not a segment.
     * @param theData context object mapping stand-ins to
     * UnicodeMatcher objects.
     */
    StringMatcher(const UnicodeString& string,
                  int32_t start,
                  int32_t limit,
                  int32_t segmentNum,
                  const TransliterationRuleData& data);

    StringMatcher(const StringMatcher& o);
        
    /**
     * Destructor
     */
    virtual ~StringMatcher();

    /**
     * Implement UnicodeFunctor
     */
    virtual UnicodeFunctor* clone() const;

    /**
     * UnicodeFunctor API.  Cast 'this' to a UnicodeMatcher* pointer
     * and return the pointer.
     */
    virtual UnicodeMatcher* toMatcher() const;

    /**
     * UnicodeFunctor API.  Cast 'this' to a UnicodeReplacer* pointer
     * and return the pointer.
     */
    virtual UnicodeReplacer* toReplacer() const;

    /**
     * Implement UnicodeMatcher
     */
    virtual UMatchDegree matches(const Replaceable& text,
                                 int32_t& offset,
                                 int32_t limit,
                                 UBool incremental);

    /**
     * Implement UnicodeMatcher
     */
    virtual UnicodeString& toPattern(UnicodeString& result,
                                     UBool escapeUnprintable = FALSE) const;

    /**
     * Implement UnicodeMatcher
     */
    virtual UBool matchesIndexValue(uint8_t v) const;

    /**
     * Implement UnicodeFunctor
     */
    virtual void setData(const TransliterationRuleData*);

    /**
     * Replace characters in 'text' from 'start' to 'limit' with the
     * output text of this object.  Update the 'cursor' parameter to
     * give the cursor position and return the length of the
     * replacement text.
     *
     * @param text the text to be matched
     * @param start inclusive start index of text to be replaced
     * @param limit exclusive end index of text to be replaced;
     * must be greater than or equal to start
     * @param cursor output parameter for the cursor position.
     * Not all replacer objects will update this, but in a complete
     * tree of replacer objects, representing the entire output side
     * of a transliteration rule, at least one must update it.
     * @return the number of 16-bit code units in the text replacing
     * the characters at offsets start..(limit-1) in text
     */
    virtual int32_t replace(Replaceable& text,
                            int32_t start,
                            int32_t limit,
                            int32_t& cursor);

    /**
     * Returns a string representation of this replacer.  If the
     * result of calling this function is passed to the appropriate
     * parser, typically TransliteratorParser, it will produce another
     * replacer that is equal to this one.
     * @param result the string to receive the pattern.  Previous
     * contents will be deleted.
     * @param escapeUnprintable if TRUE then convert unprintable
     * character to their hex escape representations, \\uxxxx or
     * \\Uxxxxxxxx.  Unprintable characters are defined by
     * Utility.isUnprintable().
     * @return a reference to 'result'.
     */
    virtual UnicodeString& toReplacerPattern(UnicodeString& result,
                                             UBool escapeUnprintable) const;

    /**
     * Remove any match data.  This must be called before performing a
     * set of matches with this segment.
     */
    void resetMatch();

 private:

    /**
     * The text to be matched.
     */
00153     UnicodeString pattern;

    /**
     * Context object that maps stand-ins to matcher and replacer
     * objects.
     */
00159     const TransliterationRuleData* data;

    /**
     * The segment number, 1-based, or 0 if not a segment.
     */
00164     int32_t segmentNumber;

    /**
     * Start offset, in the match text, of the <em>rightmost</em>
     * match.
     */
00170     int32_t matchStart;

    /**
     * Limit offset, in the match text, of the <em>rightmost</em>
     * match.
     */
00176     int32_t matchLimit;
};

U_NAMESPACE_END

#endif

Generated by  Doxygen 1.6.0   Back to index