Logo Search packages:      
Sourcecode: icu version File versions

U_CAPI UBool U_EXPORT2 u_hasBinaryProperty ( UChar32  c,
UProperty  which 
)

Check a binary Unicode property for a code point.

Unicode, especially in version 3.2, defines many more properties than the original set in UnicodeData.txt. This API is intended to reflect Unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). For details about the properties see http://www.unicode.org/ . For names of Unicode properties see the UCD file PropertyAliases.txt.

The properties APIs are intended to reflect Unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). For details about the properties see http://www.unicode.org/ . For names of Unicode properties see the UCD file PropertyAliases.txt.

Important: If ICU is built with UCD files from Unicode versions below 3.2, then properties marked with "new in Unicode 3.2" are not or not fully available.

Parameters:
c Code point to test.
which UProperty selector constant, identifies which binary property to check. Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT.
Returns:
TRUE or FALSE according to the binary Unicode property value for c. Also FALSE if which is out of bounds or if the Unicode version does not have data for the property at all, or not for this code point.
See also:
UProperty

u_getUnicodeVersion ICU 2.1

Definition at line 104 of file uprops.c.

References U_LOWERCASE_LETTER, U_MATH_SYMBOL, U_UPPERCASE_LETTER, UCHAR_ALPHABETIC, UCHAR_ASCII_HEX_DIGIT, UCHAR_BIDI_CONTROL, UCHAR_BIDI_MIRRORED, UCHAR_DASH, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, UCHAR_DEPRECATED, UCHAR_DIACRITIC, UCHAR_EXTENDER, UCHAR_FULL_COMPOSITION_EXCLUSION, UCHAR_GRAPHEME_BASE, UCHAR_GRAPHEME_EXTEND, UCHAR_GRAPHEME_LINK, UCHAR_HEX_DIGIT, UCHAR_HYPHEN, UCHAR_ID_CONTINUE, UCHAR_ID_START, UCHAR_IDEOGRAPHIC, UCHAR_IDS_BINARY_OPERATOR, UCHAR_IDS_TRINARY_OPERATOR, UCHAR_JOIN_CONTROL, UCHAR_LOGICAL_ORDER_EXCEPTION, UCHAR_LOWERCASE, UCHAR_MATH, UCHAR_NONCHARACTER_CODE_POINT, UCHAR_QUOTATION_MARK, UCHAR_RADICAL, UCHAR_SOFT_DOTTED, UCHAR_TERMINAL_PUNCTUATION, UCHAR_UNIFIED_IDEOGRAPH, UCHAR_UPPERCASE, UCHAR_WHITE_SPACE, UCHAR_XID_CONTINUE, and UCHAR_XID_START.

                                                {
    /* c is range-checked in the functions that are called from here */
    switch(which) {
    case UCHAR_ALPHABETIC:
        /* Lu+Ll+Lt+Lm+Lo+Other_Alphabetic */
        return (FLAG(u_charType(c))&(_Lu|_Ll|_Lt|_Lm|_Lo))!=0 ||
                (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_OTHER_ALPHABETIC))!=0;
    case UCHAR_ASCII_HEX_DIGIT:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_ASCII_HEX_DIGIT))!=0;
    case UCHAR_BIDI_CONTROL:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_BIDI_CONTROL))!=0;
    case UCHAR_BIDI_MIRRORED:
        return u_isMirrored(c);
    case UCHAR_DASH:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_DASH))!=0;
    case UCHAR_DEFAULT_IGNORABLE_CODE_POINT:
        /* Cf+Cc+Cs+Other_Default_Ignorable_Code_Point-White_Space */
        return (FLAG(u_charType(c))&(_Cf|_Cc|_Cs))!=0 ||
                ((u_getUnicodeProperties(c, 1)&
                    (FLAG(UPROPS_OTHER_DEFAULT_IGNORABLE_CODE_POINT)|FLAG(UPROPS_WHITE_SPACE)))==
                FLAG(UPROPS_OTHER_DEFAULT_IGNORABLE_CODE_POINT));
    case UCHAR_DEPRECATED:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_DEPRECATED))!=0;
    case UCHAR_DIACRITIC:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_DIACRITIC))!=0;
    case UCHAR_EXTENDER:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_EXTENDER))!=0;
    case UCHAR_FULL_COMPOSITION_EXCLUSION:
        return unorm_internalIsFullCompositionExclusion(c);
    case UCHAR_GRAPHEME_BASE:
        /*
         * [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend ==
         * [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-(Me+Mn+Mc+Other_Grapheme_Extend) ==
         * [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Me-Mn-Mc-Grapheme_Link-Other_Grapheme_Extend
         *
         * u_charType(c out of range) returns Cn so we need not check for the range
         */
        return (FLAG(u_charType(c))&(_Cc|_Cf|_Cs|_Co|_Cn|_Zl|_Zp|_Me|_Mn|_Mc))==0 &&
                ((u_getUnicodeProperties(c, 1)&
                    (FLAG(UPROPS_GRAPHEME_LINK)|FLAG(UPROPS_OTHER_GRAPHEME_EXTEND)))==0);
    case UCHAR_GRAPHEME_EXTEND:
        /* Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link */
        return (FLAG(u_charType(c))&(_Me|_Mn|_Mc))!=0 ||
                ((u_getUnicodeProperties(c, 1)&
                    (FLAG(UPROPS_OTHER_GRAPHEME_EXTEND)|FLAG(UPROPS_GRAPHEME_LINK)))==
                FLAG(UPROPS_OTHER_GRAPHEME_EXTEND));
    case UCHAR_GRAPHEME_LINK:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_GRAPHEME_LINK))!=0;
    case UCHAR_HEX_DIGIT:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_HEX_DIGIT))!=0;
    case UCHAR_HYPHEN:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_HYPHEN))!=0;
    case UCHAR_ID_CONTINUE:
        /* ID_Start+Mn+Mc+Nd+Pc == Lu+Ll+Lt+Lm+Lo+Nl+Mn+Mc+Nd+Pc */
        return (FLAG(u_charType(c))&(_Lu|_Ll|_Lt|_Lm|_Lo|_Nl|_Mn|_Mc|_Nd|_Pc))!=0;
    case UCHAR_ID_START:
        /* Lu+Ll+Lt+Lm+Lo+Nl */
        return (FLAG(u_charType(c))&(_Lu|_Ll|_Lt|_Lm|_Lo|_Nl))!=0;
    case UCHAR_IDEOGRAPHIC:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_IDEOGRAPHIC))!=0;
    case UCHAR_IDS_BINARY_OPERATOR:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_IDS_BINARY_OPERATOR))!=0;
    case UCHAR_IDS_TRINARY_OPERATOR:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_IDS_TRINARY_OPERATOR))!=0;
    case UCHAR_JOIN_CONTROL:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_JOIN_CONTROL))!=0;
    case UCHAR_LOGICAL_ORDER_EXCEPTION:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_LOGICAL_ORDER_EXCEPTION))!=0;
    case UCHAR_LOWERCASE:
        /* Ll+Other_Lowercase */
        return u_charType(c)==U_LOWERCASE_LETTER ||
                (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_OTHER_LOWERCASE))!=0;
    case UCHAR_MATH:
        /* Sm+Other_Math */
        return u_charType(c)==U_MATH_SYMBOL ||
                (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_OTHER_MATH))!=0;
    case UCHAR_NONCHARACTER_CODE_POINT:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_NONCHARACTER_CODE_POINT))!=0;
    case UCHAR_QUOTATION_MARK:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_QUOTATION_MARK))!=0;
    case UCHAR_RADICAL:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_RADICAL))!=0;
    case UCHAR_SOFT_DOTTED:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_SOFT_DOTTED))!=0;
    case UCHAR_TERMINAL_PUNCTUATION:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_TERMINAL_PUNCTUATION))!=0;
    case UCHAR_UNIFIED_IDEOGRAPH:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_UNIFIED_IDEOGRAPH))!=0;
    case UCHAR_UPPERCASE:
        /* Lu+Other_Uppercase */
        return u_charType(c)==U_UPPERCASE_LETTER ||
                (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_OTHER_UPPERCASE))!=0;
    case UCHAR_WHITE_SPACE:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_WHITE_SPACE))!=0;
    case UCHAR_XID_CONTINUE:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_XID_CONTINUE))!=0;
    case UCHAR_XID_START:
        return (u_getUnicodeProperties(c, 1)&FLAG(UPROPS_XID_START))!=0;
    default:
        /* not a known binary property */
        return FALSE;
    };
}


Generated by  Doxygen 1.6.0   Back to index