Logo Search packages:      
Sourcecode: icu version File versions  Download package

U_CDECL_END U_NAMESPACE_BEGIN CompactTrieHeader * CompactTrieDictionary::compactMutableTrieDictionary ( const MutableTrieDictionary &  dict,
UErrorCode status 
) [static, private]

Convert a MutableTrieDictionary into a compact data blob.

Parameters:
dict The dictionary to convert.
status A status code recording the success of the call.
Returns:
A single data blob starting with a CompactTrieHeader.

Definition at line 1019 of file triedict.cpp.

References UVector::addElement(), UVector32::elementAti(), FALSE, NULL, UVector32::push(), UVector32::setElementAt(), UVector32::setSize(), UVector32::size(), UVector::size(), TRUE, U_FAILURE, U_ILLEGAL_ARGUMENT_ERROR, U_MEMORY_ALLOCATION_ERROR, and void().

Referenced by CompactTrieDictionary().

                                                     {
    if (U_FAILURE(status)) {
        return NULL;
    }
#ifdef DEBUG_TRIE_DICT
    struct tms timing;
    struct tms previous;
    (void) ::times(&previous);
#endif
    UStack nodes(_deleteBuildNode, NULL, status);      // Index of nodes

    // Add node 0, used as the NULL pointer/sentinel.
    nodes.addElement((int32_t)0, status);

    // Start by creating the special empty node we use to indicate that the parent
    // terminates a word. This must be node 1, because the builder assumes
    // that.
    if (U_FAILURE(status)) {
        return NULL;
    }
    BuildCompactTrieNode *terminal = new BuildCompactTrieNode(TRUE, FALSE, nodes, status);
    if (terminal == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
    }

    // This call does all the work of building the new trie structure. The root
    // will be node 2.
    BuildCompactTrieNode *root = compactOneNode(dict.fTrie, FALSE, nodes, status);
#ifdef DEBUG_TRIE_DICT
    (void) ::times(&timing);
    fprintf(stderr, "Compact trie built, %d nodes, time user %f system %f\n",
        nodes.size(), (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
        (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
    previous = timing;
#endif

    // Now coalesce all duplicate nodes.
    coalesceDuplicates(nodes, status);
#ifdef DEBUG_TRIE_DICT
    (void) ::times(&timing);
    fprintf(stderr, "Duplicates coalesced, time user %f system %f\n",
        (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
        (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
    previous = timing;
#endif

    // Next, build the output trie.
    // First we compute all the sizes and build the node ID translation table.
    uint32_t totalSize = offsetof(CompactTrieHeader,offsets);
    int32_t count = nodes.size();
    int32_t nodeCount = 1;              // The sentinel node we already have
    BuildCompactTrieNode *node;
    int32_t i;
    UVector32 translate(count, status); // Should be no growth needed after this
    translate.push(0, status);          // The sentinel node
    
    if (U_FAILURE(status)) {
        return NULL;
    }

    for (i = 1; i < count; ++i) {
        node = (BuildCompactTrieNode *)nodes[i];
        if (node->fNodeID == i) {
            // Only one node out of each duplicate set is used
            if (i >= translate.size()) {
                // Logically extend the mapping table
                translate.setSize(i+1);
            }
            translate.setElementAt(nodeCount++, i);
            totalSize += node->size();
        }
    }
    
    // Check for overflowing 16 bits worth of nodes.
    if (nodeCount > 0x10000) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }
    
    // Add enough room for the offsets.
    totalSize += nodeCount*sizeof(uint32_t);
#ifdef DEBUG_TRIE_DICT
    (void) ::times(&timing);
    fprintf(stderr, "Sizes/mapping done, time user %f system %f\n",
        (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
        (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
    previous = timing;
    fprintf(stderr, "%d nodes, %d unique, %d bytes\n", nodes.size(), nodeCount, totalSize);
#endif
    uint8_t *bytes = (uint8_t *)uprv_malloc(totalSize);
    if (bytes == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return NULL;
    }

    CompactTrieHeader *header = (CompactTrieHeader *)bytes;
    header->size = totalSize;
    header->nodeCount = nodeCount;
    header->offsets[0] = 0;                     // Sentinel
    header->root = translate.elementAti(root->fNodeID);
#ifdef DEBUG_TRIE_DICT
    if (header->root == 0) {
        fprintf(stderr, "ERROR: root node %d translate to physical zero\n", root->fNodeID);
    }
#endif
    uint32_t offset = offsetof(CompactTrieHeader,offsets)+(nodeCount*sizeof(uint32_t));
    nodeCount = 1;
    // Now write the data
    for (i = 1; i < count; ++i) {
        node = (BuildCompactTrieNode *)nodes[i];
        if (node->fNodeID == i) {
            header->offsets[nodeCount++] = offset;
            node->write(bytes, offset, translate);
        }
    }
#ifdef DEBUG_TRIE_DICT
    (void) ::times(&timing);
    fprintf(stderr, "Trie built, time user %f system %f\n",
        (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
        (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
    previous = timing;
    fprintf(stderr, "Final offset is %d\n", offset);
    
    // Collect statistics on node types and sizes
    int hCount = 0;
    int vCount = 0;
    size_t hSize = 0;
    size_t vSize = 0;
    size_t hItemCount = 0;
    size_t vItemCount = 0;
    uint32_t previousOff = offset;
    for (uint16_t nodeIdx = nodeCount-1; nodeIdx >= 2; --nodeIdx) {
        const CompactTrieNode *node = getCompactNode(header, nodeIdx);
        if (node->flagscount & kVerticalNode) {
            vCount += 1;
            vItemCount += (node->flagscount & kCountMask);
            vSize += previousOff-header->offsets[nodeIdx];
        }
        else {
            hCount += 1;
            hItemCount += (node->flagscount & kCountMask);
            hSize += previousOff-header->offsets[nodeIdx];
        }
        previousOff = header->offsets[nodeIdx];
    }
    fprintf(stderr, "Horizontal nodes: %d total, average %f bytes with %f items\n", hCount,
                (double)hSize/hCount, (double)hItemCount/hCount);
    fprintf(stderr, "Vertical nodes: %d total, average %f bytes with %f items\n", vCount,
                (double)vSize/vCount, (double)vItemCount/vCount);
#endif

    if (U_FAILURE(status)) {
        uprv_free(bytes);
        header = NULL;
    }
    else {
        header->magic = COMPACT_TRIE_MAGIC_1;
    }
    return header;
}


Generated by  Doxygen 1.6.0   Back to index