Logo Search packages:      
Sourcecode: icu version File versions

Unicode Class Reference

#include <unicode.h>

List of all members.


Detailed Description

This class is deprecated and will be removed. Use the C API, see uchar.h and utf.h. The Unicode class is a pure 1:1 wrapper for the functions and macros there.

Old documentation:

The Unicode class allows you to query the properties associated with individual Unicode character values.

The Unicode character information, provided implicitly by the Unicode Standard, includes information about the sript (for example, symbols or control characters) to which the character belongs, as well as semantic information such as whether a character is a digit or uppercase, lowercase, or uncased.

Do not subclass.

Deprecated:
To be removed after 2002-sep-30; use the C API, see uchar.h and utf.h.

Definition at line 55 of file unicode.h.


Public Types

enum  {
  MIN_VALUE = 0, MAX_VALUE = 0x10ffff, MAX_CHAR_LENGTH = UTF_MAX_CHAR_LENGTH, MIN_RADIX = 2,
  MAX_RADIX = 36
}
enum  ECellWidths { ZERO_WIDTH = 0, HALF_WIDTH = 1, FULL_WIDTH = 2, NEUTRAL = 3 }
enum  EDirectionProperty {
  LEFT_TO_RIGHT = 0, RIGHT_TO_LEFT = 1, EUROPEAN_NUMBER = 2, EUROPEAN_NUMBER_SEPARATOR = 3,
  EUROPEAN_NUMBER_TERMINATOR = 4, ARABIC_NUMBER = 5, COMMON_NUMBER_SEPARATOR = 6, BLOCK_SEPARATOR = 7,
  SEGMENT_SEPARATOR = 8, WHITE_SPACE_NEUTRAL = 9, OTHER_NEUTRAL = 10, LEFT_TO_RIGHT_EMBEDDING = 11,
  LEFT_TO_RIGHT_OVERRIDE = 12, RIGHT_TO_LEFT_ARABIC = 13, RIGHT_TO_LEFT_EMBEDDING = 14, RIGHT_TO_LEFT_OVERRIDE = 15,
  POP_DIRECTIONAL_FORMAT = 16, DIR_NON_SPACING_MARK = 17, BOUNDARY_NEUTRAL = 18
}
enum  EUnicodeGeneralTypes {
  UNASSIGNED = 0, UPPERCASE_LETTER = 1, LOWERCASE_LETTER = 2, TITLECASE_LETTER = 3,
  MODIFIER_LETTER = 4, OTHER_LETTER = 5, NON_SPACING_MARK = 6, ENCLOSING_MARK = 7,
  COMBINING_SPACING_MARK = 8, DECIMAL_DIGIT_NUMBER = 9, LETTER_NUMBER = 10, OTHER_NUMBER = 11,
  SPACE_SEPARATOR = 12, LINE_SEPARATOR = 13, PARAGRAPH_SEPARATOR = 14, CONTROL = 15,
  FORMAT = 16, PRIVATE_USE = 17, SURROGATE = 18, DASH_PUNCTUATION = 19,
  START_PUNCTUATION = 20, END_PUNCTUATION = 21, CONNECTOR_PUNCTUATION = 22, OTHER_PUNCTUATION = 23,
  MATH_SYMBOL = 24, CURRENCY_SYMBOL = 25, MODIFIER_SYMBOL = 26, OTHER_SYMBOL = 27,
  INITIAL_PUNCTUATION = 28, FINAL_PUNCTUATION = 29, GENERAL_TYPES_COUNT = 30
}
enum  EUnicodeScript {
  kBasicLatin = UBLOCK_BASIC_LATIN, kLatin1Supplement, kLatinExtendedA, kLatinExtendedB,
  kIPAExtension, kSpacingModifier, kCombiningDiacritical, kGreek,
  kCyrillic, kArmenian, kHebrew, kArabic,
  kSyriac, kThaana, kDevanagari, kBengali,
  kGurmukhi, kGujarati, kOriya, kTamil,
  kTelugu, kKannada, kMalayalam, kSinhala,
  kThai, kLao, kTibetan, kMyanmar,
  kGeorgian, kHangulJamo, kEthiopic, kCherokee,
  kUnifiedCanadianAboriginalSyllabics, kogham, kRunic, kKhmer,
  kMongolian, kLatinExtendedAdditional, kGreekExtended, kGeneralPunctuation,
  kSuperSubScript, kCurrencySymbolScript, kSymbolCombiningMark, kLetterlikeSymbol,
  kNumberForm, kArrow, kMathOperator, kMiscTechnical,
  kControlPicture, kOpticalCharacter, kEnclosedAlphanumeric, kBoxDrawing,
  kBlockElement, kGeometricShape, kMiscSymbol, kDingbat,
  kBraillePatterns, kCJKRadicalsSupplement, kKangxiRadicals, kIdeographicDescriptionCharacters,
  kCJKSymbolPunctuation, kHiragana, kKatakana, kBopomofo,
  kHangulCompatibilityJamo, kKanbun, kBopomofoExtended, kEnclosedCJKLetterMonth,
  kCJKCompatibility, kCJKUnifiedIdeographExtensionA, kCJKUnifiedIdeograph, kYiSyllables,
  kYiRadicals, kHangulSyllable, kHighSurrogate, kHighPrivateUseSurrogate,
  kLowSurrogate, kPrivateUse, kCJKCompatibilityIdeograph, kAlphabeticPresentation,
  kArabicPresentationA, kCombiningHalfMark, kCJKCompatibilityForm, kSmallFormVariant,
  kArabicPresentationB, kNoScript, kHalfwidthFullwidthForm, kScriptCount = UBLOCK_COUNT
}

Static Public Member Functions

static int32_t arraySize (int32_t size)
static EDirectionProperty characterDirection (UChar32 ch)
static int32_t charLength (UChar32 c)
static UChar32 charMirror (UChar32 c)
static int32_t digit (UChar32 ch, int8_t radix)
static int32_t digitValue (UChar32 ch)
static UChar32 foldCase (UChar32 c, uint32_t options)
static UChar32 forDigit (int32_t digit, int8_t radix)
static uint16_t getCellWidth (UChar32 ch)
static int32_t getCharName (uint32_t code, char *buffer, int32_t bufferLength, UCharNameChoice nameChoice=U_UNICODE_CHAR_NAME)
static uint8_t getCombiningClass (UChar32 c)
static EUnicodeScript getScript (UChar32 ch)
static int8_t getType (UChar32 ch)
static void getUnicodeVersion (UVersionInfo info)
static UBool isBaseForm (UChar32 ch)
static UBool isControl (UChar32 ch)
static UBool isDefined (UChar32 ch)
static UBool isDigit (UChar32 ch)
static UBool isError (UChar32 c)
static UBool isIdentifierIgnorable (UChar32 ch)
static UBool isJavaIdentifierPart (UChar32 ch)
static UBool isJavaIdentifierStart (UChar32 ch)
static UBool isLead (UChar c)
static UBool isLetter (UChar32 ch)
static UBool isLowerCase (UChar32 ch)
static UBool isMirrored (UChar32 c)
static UBool isPrintable (UChar32 ch)
static UBool isSingle (UChar c)
static UBool isSpaceChar (UChar32 ch)
static UBool isSurrogate (UChar32 c)
static UBool isTitleCase (UChar32 ch)
static UBool isTrail (UChar c)
static UBool isUnicodeChar (UChar32 c)
static UBool isUnicodeIdentifierPart (UChar32 ch)
static UBool isUnicodeIdentifierStart (UChar32 ch)
static UBool isUpperCase (UChar32 ch)
static UBool isValid (UChar32 c)
static UBool isWhitespace (UChar32 ch)
static UBool needMultipleUChar (UChar32 c)
static UChar32 toLowerCase (UChar32 ch)
static UChar32 toTitleCase (UChar32 ch)
static UChar32 toUpperCase (UChar32 ch)

Protected Member Functions

const Unicodeoperator= (const Unicode &other)
 Unicode (const Unicode &other)

The documentation for this class was generated from the following files:

Generated by  Doxygen 1.6.0   Back to index