Logo Search packages:      
Sourcecode: icu version File versions  Download package

U_STABLE UChar* U_EXPORT2 u_strFindFirst ( const UChar *  s,
int32_t  length,
const UChar *  substring,
int32_t  subLength 
)

Find the first occurrence of a substring in a string. The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.

Parameters:
s The string to search.
length The length of s (number of UChars), or -1 if it is NUL-terminated.
substring The substring to find (NUL-terminated).
subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
Returns:
A pointer to the first occurrence of substring in s, or s itself if the substring is empty, or NULL if substring is not in s. ICU 2.4
See also:
u_strstr

u_strFindLast

Definition at line 52 of file ustring.c.

References NULL, U16_IS_SURROGATE, and u_strlen().

                                                    {
    const UChar *start, *p, *q, *subLimit;
    UChar c, cs, cq;

    if(sub==NULL || subLength<-1) {
        return (UChar *)s;
    }
    if(s==NULL || length<-1) {
        return NULL;
    }

    start=s;

    if(length<0 && subLength<0) {
        /* both strings are NUL-terminated */
        if((cs=*sub++)==0) {
            return (UChar *)s;
        }
        if(*sub==0 && !U16_IS_SURROGATE(cs)) {
            /* the substring consists of a single, non-surrogate BMP code point */
            return u_strchr(s, cs);
        }

        while((c=*s++)!=0) {
            if(c==cs) {
                /* found first substring UChar, compare rest */
                p=s;
                q=sub;
                for(;;) {
                    if((cq=*q)==0) {
                        if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
                            return (UChar *)(s-1); /* well-formed match */
                        } else {
                            break; /* no match because surrogate pair is split */
                        }
                    }
                    if((c=*p)==0) {
                        return NULL; /* no match, and none possible after s */
                    }
                    if(c!=cq) {
                        break; /* no match */
                    }
                    ++p;
                    ++q;
                }
            }
        }

        /* not found */
        return NULL;
    }

    if(subLength<0) {
        subLength=u_strlen(sub);
    }
    if(subLength==0) {
        return (UChar *)s;
    }

    /* get sub[0] to search for it fast */
    cs=*sub++;
    --subLength;
    subLimit=sub+subLength;

    if(subLength==0 && !U16_IS_SURROGATE(cs)) {
        /* the substring consists of a single, non-surrogate BMP code point */
        return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length);
    }

    if(length<0) {
        /* s is NUL-terminated */
        while((c=*s++)!=0) {
            if(c==cs) {
                /* found first substring UChar, compare rest */
                p=s;
                q=sub;
                for(;;) {
                    if(q==subLimit) {
                        if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
                            return (UChar *)(s-1); /* well-formed match */
                        } else {
                            break; /* no match because surrogate pair is split */
                        }
                    }
                    if((c=*p)==0) {
                        return NULL; /* no match, and none possible after s */
                    }
                    if(c!=*q) {
                        break; /* no match */
                    }
                    ++p;
                    ++q;
                }
            }
        }
    } else {
        const UChar *limit, *preLimit;

        /* subLength was decremented above */
        if(length<=subLength) {
            return NULL; /* s is shorter than sub */
        }

        limit=s+length;

        /* the substring must start before preLimit */
        preLimit=limit-subLength;

        while(s!=preLimit) {
            c=*s++;
            if(c==cs) {
                /* found first substring UChar, compare rest */
                p=s;
                q=sub;
                for(;;) {
                    if(q==subLimit) {
                        if(isMatchAtCPBoundary(start, s-1, p, limit)) {
                            return (UChar *)(s-1); /* well-formed match */
                        } else {
                            break; /* no match because surrogate pair is split */
                        }
                    }
                    if(*p!=*q) {
                        break; /* no match */
                    }
                    ++p;
                    ++q;
                }
            }
        }
    }

    /* not found */
    return NULL;
}


Generated by  Doxygen 1.6.0   Back to index