Logo Search packages:      
Sourcecode: icu version File versions

ubrk.h File Reference


Detailed Description

C API: BreakIterator.

BreakIterator C API

The BreakIterator C API defines methods for finding the location of boundaries in text. Pointer to a UBreakIterator maintain a current position and scan over text returning the index of characters where boundaries occur.

Line boundary analysis determines where a text string can be broken when line-wrapping. The mechanism correctly handles punctuation and hyphenated words.

Sentence boundary analysis allows selection with correct interpretation of periods within numbers and abbreviations, and trailing punctuation marks such as quotation marks and parentheses.

Word boundary analysis is used by search and replace functions, as well as within text editing applications that allow the user to select words with a double click. Word selection provides correct interpretation of punctuation marks within and following words. Characters that are not part of a word, such as symbols or punctuation marks, have word-breaks on both sides.

Character boundary analysis allows users to interact with characters as they expect to, for example, when moving the cursor through a text string. Character boundary analysis provides correct navigation of through character strings, regardless of how the character is stored. For example, an accented character might be stored as a base character and a diacritical mark. What users consider to be a character can differ between languages.

Title boundary analysis locates all positions, typically starts of words, that should be set to Title Case when title casing the text.

This is the interface for all text boundaries.

Examples:

Helper function to output text

 
    void printTextRange(UChar* str, int32_t start, int32_t end ) {
         UChar* result;
         UChar* temp;
         const char* res;
         temp=(UChar*)malloc(sizeof(UChar) * ((u_strlen(str)-start)+1));
         result=(UChar*)malloc(sizeof(UChar) * ((end-start)+1));
         u_strcpy(temp, &str[start]);
         u_strncpy(result, temp, end-start);
         res=(char*)malloc(sizeof(char) * (u_strlen(result)+1));
         u_austrcpy(res, result);
         printf("%s\n", res);
    }
Print each element in order:
 
    void printEachForward( UBreakIterator* boundary, UChar* str) {
       int32_t end;
       int32_t start = ubrk_first(boundary);
       for (end = ubrk_next(boundary)); end != UBRK_DONE; start = end, end = ubrk_next(boundary)) {
             printTextRange(str, start, end );
         }
    }
Print each element in reverse order:
 
    void printEachBackward( UBreakIterator* boundary, UChar* str) {
       int32_t start;
       int32_t end = ubrk_last(boundary);
       for (start = ubrk_previous(boundary); start != UBRK_DONE;  end = start, start =ubrk_previous(boundary)) {
             printTextRange( str, start, end );
         }
    }
Print first element
 
    void printFirst(UBreakIterator* boundary, UChar* str) {
        int32_t end;
        int32_t start = ubrk_first(boundary);
        end = ubrk_next(boundary);
        printTextRange( str, start, end );
    }
Print last element
 
    void printLast(UBreakIterator* boundary, UChar* str) {
        int32_t start;
        int32_t end = ubrk_last(boundary);
        start = ubrk_previous(boundary);
        printTextRange(str, start, end );
    }
Print the element at a specified position
 
    void printAt(UBreakIterator* boundary, int32_t pos , UChar* str) {
        int32_t start;
        int32_t end = ubrk_following(boundary, pos);
        start = ubrk_previous(boundary);
        printTextRange(str, start, end );
    }
Creating and using text boundaries
 
       void BreakIterator_Example( void ) {
           UBreakIterator* boundary;
           UChar *stringToExamine;
           stringToExamine=(UChar*)malloc(sizeof(UChar) * (strlen("Aaa bbb ccc. Ddd eee fff.")+1) );
           u_uastrcpy(stringToExamine, "Aaa bbb ccc. Ddd eee fff.");
           printf("Examining: "Aaa bbb ccc. Ddd eee fff.");

           //print each sentence in forward and reverse order
           boundary = ubrk_open(UBRK_SENTENCE, "en_us", stringToExamine, u_strlen(stringToExamine), &status);
           printf("----- forward: -----------\n");
           printEachForward(boundary, stringToExamine);
           printf("----- backward: ----------\n");
           printEachBackward(boundary, stringToExamine);
           ubrk_close(boundary);

           //print each word in order
           boundary = ubrk_open(UBRK_WORD, "en_us", stringToExamine, u_strlen(stringToExamine), &status);
           printf("----- forward: -----------\n");
           printEachForward(boundary, stringToExamine);
           printf("----- backward: ----------\n");
           printEachBackward(boundary, stringToExamine);
           //print first element
           printf("----- first: -------------\n");
           printFirst(boundary, stringToExamine);
           //print last element
           printf("----- last: --------------\n");
           printLast(boundary, stringToExamine);
           //print word at charpos 10
           printf("----- at pos 10: ---------\n");
           printAt(boundary, 10 , stringToExamine);

           ubrk_close(boundary);
       }

Definition in file ubrk.h.

#include "unicode/utypes.h"

Go to the source code of this file.

Defines

#define U_BRK_SAFECLONE_BUFFERSIZE   512
#define UBRK_DONE   ((int32_t) -1)

Typedefs

typedef void * UBreakIterator
typedef enum UBreakIteratorType UBreakIteratorType

Enumerations

enum  UBreakIteratorType {
  UBRK_CHARACTER, UBRK_WORD, UBRK_LINE, UBRK_SENTENCE,
  UBRK_TITLE
}

Functions

U_CAPI void U_EXPORT2 ubrk_close (UBreakIterator *bi)
U_CAPI int32_t U_EXPORT2 ubrk_countAvailable (void)
U_CAPI int32_t U_EXPORT2 ubrk_current (const UBreakIterator *bi)
U_CAPI int32_t U_EXPORT2 ubrk_first (UBreakIterator *bi)
U_CAPI int32_t U_EXPORT2 ubrk_following (UBreakIterator *bi, int32_t offset)
U_CAPI const char *U_EXPORT2 ubrk_getAvailable (int32_t index)
U_CAPI UBool U_EXPORT2 ubrk_isBoundary (UBreakIterator *bi, int32_t offset)
U_CAPI int32_t U_EXPORT2 ubrk_last (UBreakIterator *bi)
U_CAPI int32_t U_EXPORT2 ubrk_next (UBreakIterator *bi)
U_CAPI UBreakIterator *U_EXPORT2 ubrk_open (UBreakIteratorType type, const char *locale, const UChar *text, int32_t textLength, UErrorCode *status)
U_CAPI UBreakIterator *U_EXPORT2 ubrk_openRules (const UChar *rules, int32_t rulesLength, const UChar *text, int32_t textLength, UErrorCode *status)
U_CAPI int32_t U_EXPORT2 ubrk_preceding (UBreakIterator *bi, int32_t offset)
U_CAPI int32_t U_EXPORT2 ubrk_previous (UBreakIterator *bi)
U_CAPI UBreakIterator *U_EXPORT2 ubrk_safeClone (const UBreakIterator *bi, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
U_CAPI void U_EXPORT2 ubrk_setText (UBreakIterator *bi, const UChar *text, int32_t textLength, UErrorCode *status)


Generated by  Doxygen 1.6.0   Back to index