Logo Search packages:      
Sourcecode: icu version File versions

rbbi_tbl.h

/*
**********************************************************************
*   Copyright (C) 1999 IBM Corp. All rights reserved.
**********************************************************************
*   Date        Name        Description
*   11/11/99    rgillam     Complete port from Java.
**********************************************************************
*/

#ifndef RBBI_TBL_H
#define RBBI_TBL_H

#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/brkiter.h"
#include "unicode/udata.h"
#include "filestrm.h"

U_CDECL_BEGIN
#ifndef UCMP8_H
typedef struct _CompactByteArray CompactByteArray;
#endif
U_CDECL_END

U_NAMESPACE_BEGIN

/* forward declarations */
class RuleBasedBreakIterator;
class DictionaryBasedBreakIterator;

/**
 * This class contains the internal static tables that are used by the
 * RuleBasedBreakIterator.  Once created, these tables are immutable,
 * so they can be shared among all break iterators using a particular
 * set of rules.  This class uses a reference-counting scheme to
 * manage the sharing.
 *
 * @author Richard Gillam
 */
00040 class RuleBasedBreakIteratorTables {

private:
    /**
     * The number of RuleBasedBreakIterators using this object.
     */
00046     int16_t refCount;

protected:
    /**
     * Whether or not we own the storage for the tables (the tables may be
     * stored in a memory-mapped file)
     */
00053     UBool ownTables;

private:
    /**
     * The textual description that was used to create these tables
     */
00059     UnicodeString description;

    /**
     * A table that indexes from character values to character category numbers
     */
00064     CompactByteArray* charCategoryTable;

    /**
     * The table of state transitions used for forward iteration
     */
00069     int16_t* stateTable;

    /**
     * The table of state transitions used to sync up the iterator with the
     * text in backwards and random-access iteration
     */
00075     int16_t* backwardsStateTable;

    /**
     * A list of flags indicating which states in the state table are accepting
     * ("end") states
     */
00081     int8_t* endStates;

    /**
     * A list of flags indicating which states in the state table are
     * lookahead states (states which turn lookahead on and off)
     */
00087     int8_t* lookaheadStates;

    /**
     * The number of character categories (and, thus, the number of columns in
     * the state tables)
     */
00093     int32_t numCategories;

    //=======================================================================
    // constructor
    //=======================================================================

    /**
     * Creates a tables object, adopting all of the tables that are passed in.
     */
protected:
    RuleBasedBreakIteratorTables();
    
    RuleBasedBreakIteratorTables(UDataMemory* memory);
    UDataMemory *fMemory;

private:
    /**
     * The copy constructor is declared private and is a no-op.
     * THIS CLASS MAY NOT BE COPIED.
     */
    RuleBasedBreakIteratorTables(const RuleBasedBreakIteratorTables& that);

    //=======================================================================
    // boilerplate
    //=======================================================================

protected:
    /**
     * Destructor
     */
    virtual ~RuleBasedBreakIteratorTables();

private:
    /**
     * The assignment operator is declared private and is a no-op.
     * THIS CLASS MAY NOT BE COPIED.
     */
    RuleBasedBreakIteratorTables& operator=(const RuleBasedBreakIteratorTables& that);

    /**
     * Equality operator.  Returns TRUE if both tables objects are of the
     * same class, have the same behavior, and iterate over the same text.
     */
    virtual UBool operator==(const RuleBasedBreakIteratorTables& that) const;

    /**
     * Not-equal operator.  If operator== returns TRUE, this returns FALSE,
     * and vice versa.
     */
    UBool operator!=(const RuleBasedBreakIteratorTables& that) const;

    /**
     * Compute a hash code for these tables
     * @return A hash code
     */
    virtual int32_t hashCode(void) const;

    /**
     * Returns the description used to create these tables
     */
    const UnicodeString& getRules(void) const;

    //=======================================================================
    // reference counting
    //=======================================================================
    
    /**
     * increments the reference count.
     */
    void addReference(void);

    /**
     * decrements the reference count and deletes the object if it reaches zero
     */
    void removeReference(void);

protected:
    //=======================================================================
    // implementation
    //=======================================================================
    /**
     * Looks up a character's category (i.e., its category for breaking purposes,
     * not its Unicode category)
     */
    virtual int32_t lookupCategory(UChar c, BreakIterator* bi) const;

    /**
     * Given a current state and a character category, looks up the
     * next state to transition to in the state table.
     */
    virtual int32_t lookupState(int32_t state, int32_t category) const;

    /**
     * Given a current state and a character category, looks up the
     * next state to transition to in the backwards state table.
     */
    virtual int32_t lookupBackwardState(int32_t state, int32_t category) const;

    /**
     * Returns true if the specified state is an accepting state.
     */
    virtual UBool isEndState(int32_t state) const;

    /**
     * Returns true if the specified state is a lookahead state.
     */
    virtual UBool isLookaheadState(int32_t state) const;

#ifdef RBBI_DEBUG
    //
    // Print out state table and character classes.
    //    For debugging only.
    //
    void debugDumpTables() const;
#endif

    friend class RuleBasedBreakIterator;
    friend class DictionaryBasedBreakIterator;
};

inline UBool
00214 RuleBasedBreakIteratorTables::operator!=(const RuleBasedBreakIteratorTables& that) const {
    return !operator==(that);
}

inline const UnicodeString&
00219 RuleBasedBreakIteratorTables::getRules(void) const {
    return description;
}

inline void
00224 RuleBasedBreakIteratorTables::addReference(void) {
    ++refCount;
}

inline void
00229 RuleBasedBreakIteratorTables::removeReference(void) {
    if (--refCount <= 0)
        delete this;
}
U_NAMESPACE_END

#endif

Generated by  Doxygen 1.6.0   Back to index