Logo Search packages:      
Sourcecode: icu version File versions

uchar.h File Reference


Detailed Description

C API: Unicode Char.

Unicode C API

The Unicode C API allows you to query the properties associated with individual Unicode character values.

The Unicode character information, provided implicitly by the Unicode character encoding standard, includes information about the script (for example, symbols or control characters) to which the character belongs, as well as semantic information such as whether a character is a digit or uppercase, lowercase, or uncased.

Definition in file uchar.h.

#include "unicode/utypes.h"

Go to the source code of this file.

Defines

#define u_charScript   ublock_getCode
#define U_FOLD_CASE_DEFAULT   0
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I   1
#define U_GC_C_MASK   (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
#define U_GC_CC_MASK   U_MASK(U_CONTROL_CHAR)
#define U_GC_CF_MASK   U_MASK(U_FORMAT_CHAR)
#define U_GC_CN_MASK   U_MASK(U_GENERAL_OTHER_TYPES)
#define U_GC_CO_MASK   U_MASK(U_PRIVATE_USE_CHAR)
#define U_GC_CS_MASK   U_MASK(U_SURROGATE)
#define U_GC_L_MASK   (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
#define U_GC_LL_MASK   U_MASK(U_LOWERCASE_LETTER)
#define U_GC_LM_MASK   U_MASK(U_MODIFIER_LETTER)
#define U_GC_LO_MASK   U_MASK(U_OTHER_LETTER)
#define U_GC_LT_MASK   U_MASK(U_TITLECASE_LETTER)
#define U_GC_LU_MASK   U_MASK(U_UPPERCASE_LETTER)
#define U_GC_M_MASK   (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
#define U_GC_MC_MASK   U_MASK(U_COMBINING_SPACING_MARK)
#define U_GC_ME_MASK   U_MASK(U_ENCLOSING_MARK)
#define U_GC_MN_MASK   U_MASK(U_NON_SPACING_MARK)
#define U_GC_N_MASK   (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
#define U_GC_ND_MASK   U_MASK(U_DECIMAL_DIGIT_NUMBER)
#define U_GC_NL_MASK   U_MASK(U_LETTER_NUMBER)
#define U_GC_NO_MASK   U_MASK(U_OTHER_NUMBER)
#define U_GC_P_MASK
#define U_GC_PC_MASK   U_MASK(U_CONNECTOR_PUNCTUATION)
#define U_GC_PD_MASK   U_MASK(U_DASH_PUNCTUATION)
#define U_GC_PE_MASK   U_MASK(U_END_PUNCTUATION)
#define U_GC_PF_MASK   U_MASK(U_FINAL_PUNCTUATION)
#define U_GC_PI_MASK   U_MASK(U_INITIAL_PUNCTUATION)
#define U_GC_PO_MASK   U_MASK(U_OTHER_PUNCTUATION)
#define U_GC_PS_MASK   U_MASK(U_START_PUNCTUATION)
#define U_GC_S_MASK   (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
#define U_GC_SC_MASK   U_MASK(U_CURRENCY_SYMBOL)
#define U_GC_SK_MASK   U_MASK(U_MODIFIER_SYMBOL)
#define U_GC_SM_MASK   U_MASK(U_MATH_SYMBOL)
#define U_GC_SO_MASK   U_MASK(U_OTHER_SYMBOL)
#define U_GC_Z_MASK   (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
#define U_GC_ZL_MASK   U_MASK(U_LINE_SEPARATOR)
#define U_GC_ZP_MASK   U_MASK(U_PARAGRAPH_SEPARATOR)
#define U_GC_ZS_MASK   U_MASK(U_SPACE_SEPARATOR)
#define U_GET_GC_MASK(c)   U_MASK(u_charType(c))
#define U_MASK(x)   ((uint32_t)1<<(x))
#define U_UNICODE_VERSION   "3.1.1"
#define UCHAR_MAX_VALUE   0x10ffff
#define UCHAR_MIN_VALUE   0

Typedefs

typedef enum UBlockCode UBlockCode
typedef enum UCellWidth UCellWidth
typedef enum UCharCategory UCharCategory
typedef enum UCharDirection UCharDirection
typedef UBool U_CALLCONV UCharEnumTypeRange (const void *context, UChar32 start, UChar32 limit, UCharCategory type)
typedef enum UCharNameChoice UCharNameChoice
typedef UBlockCode UCharScript
typedef UBool UEnumCharNamesFn (void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length)
typedef enum UProperty UProperty

Enumerations

enum  UBlockCode {
  UBLOCK_BASIC_LATIN = 1, U_BASIC_LATIN = 1, UBLOCK_LATIN_1_SUPPLEMENT = 2, U_LATIN_1_SUPPLEMENT = 2,
  UBLOCK_LATIN_EXTENDED_A = 3, U_LATIN_EXTENDED_A = 3, UBLOCK_LATIN_EXTENDED_B = 4, U_LATIN_EXTENDED_B = 4,
  UBLOCK_IPA_EXTENSIONS = 5, U_IPA_EXTENSIONS = 5, UBLOCK_SPACING_MODIFIER_LETTERS = 6, U_SPACING_MODIFIER_LETTERS = 6,
  UBLOCK_COMBINING_DIACRITICAL_MARKS = 7, U_COMBINING_DIACRITICAL_MARKS = 7, UBLOCK_GREEK = 8, U_GREEK = 8,
  UBLOCK_CYRILLIC = 9, U_CYRILLIC = 9, UBLOCK_ARMENIAN = 10, U_ARMENIAN = 10,
  UBLOCK_HEBREW = 11, U_HEBREW = 11, UBLOCK_ARABIC = 12, U_ARABIC = 12,
  UBLOCK_SYRIAC = 13, U_SYRIAC = 13, UBLOCK_THAANA = 14, U_THAANA = 14,
  UBLOCK_DEVANAGARI = 15, U_DEVANAGARI = 15, UBLOCK_BENGALI = 16, U_BENGALI = 16,
  UBLOCK_GURMUKHI = 17, U_GURMUKHI = 17, UBLOCK_GUJARATI = 18, U_GUJARATI = 18,
  UBLOCK_ORIYA = 19, U_ORIYA = 19, UBLOCK_TAMIL = 20, U_TAMIL = 20,
  UBLOCK_TELUGU = 21, U_TELUGU = 21, UBLOCK_KANNADA = 22, U_KANNADA = 22,
  UBLOCK_MALAYALAM = 23, U_MALAYALAM = 23, UBLOCK_SINHALA = 24, U_SINHALA = 24,
  UBLOCK_THAI = 25, U_THAI = 25, UBLOCK_LAO = 26, U_LAO = 26,
  UBLOCK_TIBETAN = 27, U_TIBETAN = 27, UBLOCK_MYANMAR = 28, U_MYANMAR = 28,
  UBLOCK_GEORGIAN = 29, U_GEORGIAN = 29, UBLOCK_HANGUL_JAMO = 30, U_HANGUL_JAMO = 30,
  UBLOCK_ETHIOPIC = 31, U_ETHIOPIC = 31, UBLOCK_CHEROKEE = 32, U_CHEROKEE = 32,
  UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 33, U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 33, UBLOCK_OGHAM = 34, U_OGHAM = 34,
  UBLOCK_RUNIC = 35, U_RUNIC = 35, UBLOCK_KHMER = 36, U_KHMER = 36,
  UBLOCK_MONGOLIAN = 37, U_MONGOLIAN = 37, UBLOCK_LATIN_EXTENDED_ADDITIONAL = 38, U_LATIN_EXTENDED_ADDITIONAL = 38,
  UBLOCK_GREEK_EXTENDED = 39, U_GREEK_EXTENDED = 39, UBLOCK_GENERAL_PUNCTUATION = 40, U_GENERAL_PUNCTUATION = 40,
  UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS = 41, U_SUPERSCRIPTS_AND_SUBSCRIPTS = 41, UBLOCK_CURRENCY_SYMBOLS = 42, U_CURRENCY_SYMBOLS = 42,
  UBLOCK_COMBINING_MARKS_FOR_SYMBOLS = 43, U_COMBINING_MARKS_FOR_SYMBOLS = 43, UBLOCK_LETTERLIKE_SYMBOLS = 44, U_LETTERLIKE_SYMBOLS = 44,
  UBLOCK_NUMBER_FORMS = 45, U_NUMBER_FORMS = 45, UBLOCK_ARROWS = 46, U_ARROWS = 46,
  UBLOCK_MATHEMATICAL_OPERATORS = 47, U_MATHEMATICAL_OPERATORS = 47, UBLOCK_MISCELLANEOUS_TECHNICAL = 48, U_MISCELLANEOUS_TECHNICAL = 48,
  UBLOCK_CONTROL_PICTURES = 49, U_CONTROL_PICTURES = 49, UBLOCK_OPTICAL_CHARACTER_RECOGNITION = 50, U_OPTICAL_CHARACTER_RECOGNITION = 50,
  UBLOCK_ENCLOSED_ALPHANUMERICS = 51, U_ENCLOSED_ALPHANUMERICS = 51, UBLOCK_BOX_DRAWING = 52, U_BOX_DRAWING = 52,
  UBLOCK_BLOCK_ELEMENTS = 53, U_BLOCK_ELEMENTS = 53, UBLOCK_GEOMETRIC_SHAPES = 54, U_GEOMETRIC_SHAPES = 54,
  UBLOCK_MISCELLANEOUS_SYMBOLS = 55, U_MISCELLANEOUS_SYMBOLS = 55, UBLOCK_DINGBATS = 56, U_DINGBATS = 56,
  UBLOCK_BRAILLE_PATTERNS = 57, U_BRAILLE_PATTERNS = 57, UBLOCK_CJK_RADICALS_SUPPLEMENT = 58, U_CJK_RADICALS_SUPPLEMENT = 58,
  UBLOCK_KANGXI_RADICALS = 59, U_KANGXI_RADICALS = 59, UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 60, U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 60,
  UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION = 61, U_CJK_SYMBOLS_AND_PUNCTUATION = 61, UBLOCK_HIRAGANA = 62, U_HIRAGANA = 62,
  UBLOCK_KATAKANA = 63, U_KATAKANA = 63, UBLOCK_BOPOMOFO = 64, U_BOPOMOFO = 64,
  UBLOCK_HANGUL_COMPATIBILITY_JAMO = 65, U_HANGUL_COMPATIBILITY_JAMO = 65, UBLOCK_KANBUN = 66, U_KANBUN = 66,
  UBLOCK_BOPOMOFO_EXTENDED = 67, U_BOPOMOFO_EXTENDED = 67, UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS = 68, U_ENCLOSED_CJK_LETTERS_AND_MONTHS = 68,
  UBLOCK_CJK_COMPATIBILITY = 69, U_CJK_COMPATIBILITY = 69, UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 70, U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 70,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS = 71, U_CJK_UNIFIED_IDEOGRAPHS = 71, UBLOCK_YI_SYLLABLES = 72, U_YI_SYLLABLES = 72,
  UBLOCK_YI_RADICALS = 73, U_YI_RADICALS = 73, UBLOCK_HANGUL_SYLLABLES = 74, U_HANGUL_SYLLABLES = 74,
  UBLOCK_HIGH_SURROGATES = 75, U_HIGH_SURROGATES = 75, UBLOCK_HIGH_PRIVATE_USE_SURROGATES = 76, U_HIGH_PRIVATE_USE_SURROGATES = 76,
  UBLOCK_LOW_SURROGATES = 77, U_LOW_SURROGATES = 77, UBLOCK_PRIVATE_USE = 78, UBLOCK_PRIVATE_USE_AREA = UBLOCK_PRIVATE_USE,
  U_PRIVATE_USE_AREA = 78, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS = 79, U_CJK_COMPATIBILITY_IDEOGRAPHS = 79, UBLOCK_ALPHABETIC_PRESENTATION_FORMS = 80,
  U_ALPHABETIC_PRESENTATION_FORMS = 80, UBLOCK_ARABIC_PRESENTATION_FORMS_A = 81, U_ARABIC_PRESENTATION_FORMS_A = 81, UBLOCK_COMBINING_HALF_MARKS = 82,
  U_COMBINING_HALF_MARKS = 82, UBLOCK_CJK_COMPATIBILITY_FORMS = 83, U_CJK_COMPATIBILITY_FORMS = 83, UBLOCK_SMALL_FORM_VARIANTS = 84,
  U_SMALL_FORM_VARIANTS = 84, UBLOCK_ARABIC_PRESENTATION_FORMS_B = 85, U_ARABIC_PRESENTATION_FORMS_B = 85, UBLOCK_SPECIALS = 86,
  U_SPECIALS = 86, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS = 87, U_HALFWIDTH_AND_FULLWIDTH_FORMS = 87, UBLOCK_OLD_ITALIC = 88,
  UBLOCK_GOTHIC = 89, UBLOCK_DESERET = 90, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, UBLOCK_MUSICAL_SYMBOLS = 92,
  UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, UBLOCK_TAGS = 96,
  UBLOCK_COUNT = 97, U_SCRIPT_COUNT = UBLOCK_COUNT, UBLOCK_INVALID_CODE = -1, U_CHAR_SCRIPT_COUNT = UBLOCK_COUNT,
  U_NO_SCRIPT = UBLOCK_COUNT
}
enum  UCellWidth {
  U_ZERO_WIDTH = 0, U_HALF_WIDTH = 1, U_FULL_WIDTH = 2, U_NEUTRAL_WIDTH = 3,
  U_CELL_WIDTH_COUNT
}
enum  UCharCategory {
  U_UNASSIGNED = 0, U_GENERAL_OTHER_TYPES = 0, U_UPPERCASE_LETTER = 1, U_LOWERCASE_LETTER = 2,
  U_TITLECASE_LETTER = 3, U_MODIFIER_LETTER = 4, U_OTHER_LETTER = 5, U_NON_SPACING_MARK = 6,
  U_ENCLOSING_MARK = 7, U_COMBINING_SPACING_MARK = 8, U_DECIMAL_DIGIT_NUMBER = 9, U_LETTER_NUMBER = 10,
  U_OTHER_NUMBER = 11, U_SPACE_SEPARATOR = 12, U_LINE_SEPARATOR = 13, U_PARAGRAPH_SEPARATOR = 14,
  U_CONTROL_CHAR = 15, U_FORMAT_CHAR = 16, U_PRIVATE_USE_CHAR = 17, U_SURROGATE = 18,
  U_DASH_PUNCTUATION = 19, U_START_PUNCTUATION = 20, U_END_PUNCTUATION = 21, U_CONNECTOR_PUNCTUATION = 22,
  U_OTHER_PUNCTUATION = 23, U_MATH_SYMBOL = 24, U_CURRENCY_SYMBOL = 25, U_MODIFIER_SYMBOL = 26,
  U_OTHER_SYMBOL = 27, U_INITIAL_PUNCTUATION = 28, U_FINAL_PUNCTUATION = 29, U_CHAR_CATEGORY_COUNT
}
enum  UCharDirection {
  U_LEFT_TO_RIGHT = 0, U_RIGHT_TO_LEFT = 1, U_EUROPEAN_NUMBER = 2, U_EUROPEAN_NUMBER_SEPARATOR = 3,
  U_EUROPEAN_NUMBER_TERMINATOR = 4, U_ARABIC_NUMBER = 5, U_COMMON_NUMBER_SEPARATOR = 6, U_BLOCK_SEPARATOR = 7,
  U_SEGMENT_SEPARATOR = 8, U_WHITE_SPACE_NEUTRAL = 9, U_OTHER_NEUTRAL = 10, U_LEFT_TO_RIGHT_EMBEDDING = 11,
  U_LEFT_TO_RIGHT_OVERRIDE = 12, U_RIGHT_TO_LEFT_ARABIC = 13, U_RIGHT_TO_LEFT_EMBEDDING = 14, U_RIGHT_TO_LEFT_OVERRIDE = 15,
  U_POP_DIRECTIONAL_FORMAT = 16, U_DIR_NON_SPACING_MARK = 17, U_BOUNDARY_NEUTRAL = 18, U_CHAR_DIRECTION_COUNT
}
enum  UCharNameChoice { U_UNICODE_CHAR_NAME, U_UNICODE_10_CHAR_NAME, U_EXTENDED_CHAR_NAME, U_CHAR_NAME_CHOICE_COUNT }
enum  UProperty {
  UCHAR_ALPHABETIC, UCHAR_BINARY_START = UCHAR_ALPHABETIC, UCHAR_ASCII_HEX_DIGIT, UCHAR_BIDI_CONTROL,
  UCHAR_BIDI_MIRRORED, UCHAR_DASH, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, UCHAR_DEPRECATED,
  UCHAR_DIACRITIC, UCHAR_EXTENDER, UCHAR_FULL_COMPOSITION_EXCLUSION, UCHAR_GRAPHEME_BASE,
  UCHAR_GRAPHEME_EXTEND, UCHAR_GRAPHEME_LINK, UCHAR_HEX_DIGIT, UCHAR_HYPHEN,
  UCHAR_ID_CONTINUE, UCHAR_ID_START, UCHAR_IDEOGRAPHIC, UCHAR_IDS_BINARY_OPERATOR,
  UCHAR_IDS_TRINARY_OPERATOR, UCHAR_JOIN_CONTROL, UCHAR_LOGICAL_ORDER_EXCEPTION, UCHAR_LOWERCASE,
  UCHAR_MATH, UCHAR_NONCHARACTER_CODE_POINT, UCHAR_QUOTATION_MARK, UCHAR_RADICAL,
  UCHAR_SOFT_DOTTED, UCHAR_TERMINAL_PUNCTUATION, UCHAR_UNIFIED_IDEOGRAPH, UCHAR_UPPERCASE,
  UCHAR_WHITE_SPACE, UCHAR_XID_CONTINUE, UCHAR_XID_START, UCHAR_BINARY_LIMIT
}

Functions

U_CAPI void U_EXPORT2 u_charAge (UChar32 c, UVersionInfo versionArray)
U_CAPI uint16_t U_EXPORT2 u_charCellWidth (UChar32 c)
U_CAPI int32_t U_EXPORT2 u_charDigitValue (UChar32 c)
U_CAPI UCharDirection U_EXPORT2 u_charDirection (UChar32 c)
U_CAPI UChar32 U_EXPORT2 u_charFromName (UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode)
U_CAPI UChar32 U_EXPORT2 u_charMirror (UChar32 c)
U_CAPI int32_t U_EXPORT2 u_charName (UChar32 code, UCharNameChoice nameChoice, char *buffer, int32_t bufferLength, UErrorCode *pErrorCode)
U_CAPI int8_t U_EXPORT2 u_charType (UChar32 c)
U_CAPI int32_t U_EXPORT2 u_digit (UChar32 ch, int8_t radix)
U_CAPI void U_EXPORT2 u_enumCharNames (UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice, UErrorCode *pErrorCode)
U_CAPI void U_EXPORT2 u_enumCharTypes (UCharEnumTypeRange *enumRange, const void *context)
U_CAPI UChar32 U_EXPORT2 u_foldCase (UChar32 c, uint32_t options)
U_CAPI UChar32 U_EXPORT2 u_forDigit (int32_t digit, int8_t radix)
U_CAPI uint8_t U_EXPORT2 u_getCombiningClass (UChar32 c)
U_CAPI void U_EXPORT2 u_getUnicodeVersion (UVersionInfo info)
U_CAPI UBool U_EXPORT2 u_hasBinaryProperty (UChar32 c, UProperty which)
U_CAPI UBool U_EXPORT2 u_isalnum (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isalpha (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isbase (UChar32 c)
U_CAPI UBool U_EXPORT2 u_iscntrl (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isdefined (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isdigit (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isIDIgnorable (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isIDPart (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isIDStart (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isJavaIDPart (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isJavaIDStart (UChar32 c)
U_CAPI UBool U_EXPORT2 u_islower (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isMirrored (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isprint (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isspace (UChar32 c)
U_CAPI UBool U_EXPORT2 u_istitle (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isUAlphabetic (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isULowercase (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isupper (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isUUppercase (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isUWhiteSpace (UChar32 c)
U_CAPI UBool U_EXPORT2 u_isWhitespace (UChar32 c)
U_CAPI UChar32 U_EXPORT2 u_tolower (UChar32 c)
U_CAPI UChar32 U_EXPORT2 u_totitle (UChar32 c)
U_CAPI UChar32 U_EXPORT2 u_toupper (UChar32 c)
U_CAPI UBlockCode U_EXPORT2 ublock_getCode (UChar32 ch)


Generated by  Doxygen 1.6.0   Back to index