Logo Search packages:      
Sourcecode: icu version File versions

ustring.h File Reference


Detailed Description

C API: Unicode string handling functions.

These C API functions provide Unicode string handling.

Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h> functions. (For example, they do not check for bad arguments like NULL string pointers.) In some cases, only the thread-safe variant of such a function is implemented here (see u_strtok_r()).

Other functions provide more Unicode-specific functionality like locale-specific upper/lower-casing and string comparison in code point order.

ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units. UTF-16 encodes each Unicode code point with either one or two UChar code units. Some APIs accept a 32-bit UChar32 value for a single code point. (This is the default form of Unicode, and a forward-compatible extension of the original, fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0 in 1996.)

Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings), it is much more efficient even for random access because the code unit values for single-unit characters vs. lead units vs. trail units are completely disjoint. This means that it is easy to determine character (code point) boundaries from random offsets in the string. (It also means, e.g., that u_strstr() does not need to verify that a match was found on actual character boundaries; with some legacy encodings, strstr() may need to scan back to the start of the text to verify this.)

Unicode (UTF-16) string processing is optimized for the single-unit case. Although it is important to support supplementary characters (which use pairs of lead/trail code units called "surrogates"), their occurrence is rare. Almost all characters in modern use require only a single UChar code unit (i.e., their code point values are <=0xffff).

Definition in file ustring.h.

#include "unicode/utypes.h"

Go to the source code of this file.

Defines

#define U_STRING_DECL(var, cs, length)   static const wchar_t var[(length)+1]={ L ## cs }
#define U_STRING_INIT(var, cs, length)

Typedefs

typedef void * UBreakIterator

Functions

U_CAPI char *U_EXPORT2 u_austrcpy (char *dst, const UChar *src)
U_CAPI char *U_EXPORT2 u_austrncpy (char *dst, const UChar *src, int32_t n)
U_CAPI int32_t U_EXPORT2 u_countChar32 (const UChar *s, int32_t length)
U_CAPI int32_t U_EXPORT2 u_memcasecmp (const UChar *s1, const UChar *s2, int32_t length, uint32_t options)
U_CAPI UChar *U_EXPORT2 u_memchr (const UChar *src, UChar ch, int32_t count)
U_CAPI UChar *U_EXPORT2 u_memchr32 (const UChar *src, UChar32 ch, int32_t count)
U_CAPI int32_t U_EXPORT2 u_memcmp (const UChar *buf1, const UChar *buf2, int32_t count)
U_CAPI int32_t U_EXPORT2 u_memcmpCodePointOrder (const UChar *s1, const UChar *s2, int32_t count)
U_CAPI UChar *U_EXPORT2 u_memcpy (UChar *dest, const UChar *src, int32_t count)
U_CAPI UChar *U_EXPORT2 u_memmove (UChar *dest, const UChar *src, int32_t count)
U_CAPI UChar *U_EXPORT2 u_memset (UChar *dest, UChar c, int32_t count)
U_CAPI int32_t U_EXPORT2 u_strcasecmp (const UChar *s1, const UChar *s2, uint32_t options)
U_CAPI UChar *U_EXPORT2 u_strcat (UChar *dst, const UChar *src)
U_CAPI UChar *U_EXPORT2 u_strchr (const UChar *s, UChar c)
U_CAPI UChar *U_EXPORT2 u_strchr32 (const UChar *s, UChar32 c)
U_CAPI int32_t U_EXPORT2 u_strcmp (const UChar *s1, const UChar *s2)
U_CAPI int32_t U_EXPORT2 u_strcmpCodePointOrder (const UChar *s1, const UChar *s2)
U_CAPI UChar *U_EXPORT2 u_strcpy (UChar *dst, const UChar *src)
U_CAPI int32_t U_EXPORT2 u_strcspn (const UChar *string, const UChar *matchSet)
U_CAPI int32_t U_EXPORT2 u_strFoldCase (UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, uint32_t options, UErrorCode *pErrorCode)
U_CAPI UChar *U_EXPORT2 u_strFromUTF32 (UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UErrorCode *pErrorCode)
U_CAPI UChar *U_EXPORT2 u_strFromUTF8 (UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
U_CAPI UChar *U_EXPORT2 u_strFromWCS (UChar *dest, int32_t destCapacity, int32_t *pDestLength, const wchar_t *src, int32_t srcLength, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2 u_strlen (const UChar *s)
U_CAPI int32_t U_EXPORT2 u_strncasecmp (const UChar *s1, const UChar *s2, int32_t n, uint32_t options)
U_CAPI UChar *U_EXPORT2 u_strncat (UChar *dst, const UChar *src, int32_t n)
U_CAPI int32_t U_EXPORT2 u_strncmp (const UChar *ucs1, const UChar *ucs2, int32_t n)
U_CAPI int32_t U_EXPORT2 u_strncmpCodePointOrder (const UChar *s1, const UChar *s2, int32_t n)
U_CAPI UChar *U_EXPORT2 u_strncpy (UChar *dst, const UChar *src, int32_t n)
U_CAPI UChar *U_EXPORT2 u_strpbrk (const UChar *string, const UChar *matchSet)
U_CAPI int32_t U_EXPORT2 u_strspn (const UChar *string, const UChar *matchSet)
U_CAPI UChar *U_EXPORT2 u_strstr (const UChar *s, const UChar *substring)
U_CAPI UChar *U_EXPORT2 u_strtok_r (UChar *src, const UChar *delim, UChar **saveState)
U_CAPI int32_t U_EXPORT2 u_strToLower (UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2 u_strToTitle (UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UBreakIterator *titleIter, const char *locale, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2 u_strToUpper (UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
U_CAPI UChar32 *U_EXPORT2 u_strToUTF32 (UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
U_CAPI char *U_EXPORT2 u_strToUTF8 (char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
U_CAPI wchar_t *U_EXPORT2 u_strToWCS (wchar_t *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
U_CAPI UChar *U_EXPORT2 u_uastrcpy (UChar *dst, const char *src)
U_CAPI UChar *U_EXPORT2 u_uastrncpy (UChar *dst, const char *src, int32_t n)
U_CAPI int32_t U_EXPORT2 u_unescape (const char *src, UChar *dest, int32_t destCapacity)
U_CDECL_END U_CAPI UChar32
U_EXPORT2 
u_unescapeAt (UNESCAPE_CHAR_AT charAt, int32_t *offset, int32_t length, void *context)

Variables

U_CDECL_BEGIN typedef UChar(* UNESCAPE_CHAR_AT )(int32_t offset, void *context)


Generated by  Doxygen 1.6.0   Back to index