Logo Search packages:      
Sourcecode: icu version File versions

ucnvisci.c

/*  
**********************************************************************
*   Copyright (C) 2000-2001, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   file name:  ucnvisci.c
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2001JUN26
*   created by: Ram Viswanadha
*   
*   Date        Name        Description
*   24/7/2001   Ram         Added support for EXT character handling
*/

#include "unicode/utypes.h"
#include "cmemory.h"
#include "unicode/ucnv_err.h"
#include "ucnv_bld.h"
#include "unicode/ucnv.h"
#include "ucnv_cnv.h"
#include "unicode/ucnv_cb.h"
#include "cstring.h"

#define UCNV_OPTIONS_VERSION_MASK 0xf
#define NUKTA               0x093c
#define HALANT              0x094d    
#define ZWNJ                0x200c /* Zero Width Non Joiner */
#define ZWJ                 0x200d /* Zero width Joiner */
#define INVALID_CHAR        0xffff   
#define ATR                 0xEF   /* Attribute code */
#define EXT                 0xF0   /* Extension code */
#define DANDA               0x0964
#define DOUBLE_DANDA        0x0965
#define ISCII_NUKTA         0xE9
#define ISCII_HALANT        0xE8
#define ISCII_DANDA         0xEA
#define ISCII_INV           0xD9
#define INDIC_BLOCK_BEGIN   0x0900
#define INDIC_BLOCK_END     0x0D7F 
#define INDIC_RANGE         (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
#define VOCALLIC_RR         0x0931
#define LF                  0x0A
#define ASCII_END           0x9f
#define NO_CHAR_MARKER      0xFFFE
#define TELUGU_DELTA        DELTA * TELUGU
#define DEV_ABBR_SIGN       0x0970
#define DEV_ANUDATTA        0x0952
#define EXT_RANGE_BEGIN     0xA1
#define EXT_RANGE_END       0xEE


typedef enum  {
    DEVANAGARI =0,
    BENGALI,
    GURMUKHI,
    GUJARATI,
    ORIYA,
    TAMIL,
    TELUGU,
    KANNADA,
    MALAYALAM,
    DELTA=0x80
}UniLang;


/**
 * Enumeration for switching code pages if <ATX>+<one of below values>
 * is encountered
 */
typedef enum {
    DEF =0x40,
    RMN =0x41,
    DEV =0x42,
    BNG =0x43,
    TML =0x44,
    TLG =0x45,
    ASM =0x46,
    ORI =0x47,
    KND =0x48,
    MLM =0x49,
    GJR =0x4A,
    PNJ =0x4B,
    ARB =0x71,
    PES =0x72,
    URD =0x73,
    SND =0x74,
    KSM =0x75,
    PST =0x76
}ISCIILang;

typedef enum{
    DEV_MASK =0x80,
    PNJ_MASK =0x40,
    GJR_MASK =0x20,
    ORI_MASK =0x10,
    BNG_MASK =0x08,
    KND_MASK =0x04,
    MLM_MASK =0x02,
    TML_MASK =0x01,
    ZERO     =0x00
}MaskEnum;

typedef struct{
    UChar contextCharToUnicode;      /* previous Unicode codepoint for contextual analysis */
    UChar contextCharFromUnicode;    /* previous Unicode codepoint for contextual analysis */
    uint16_t defDeltaToUnicode;      /* delta for switching to default state when DEF is encountered  */ 
    uint16_t currentDeltaFromUnicode;/* current delta in Indic block */
    uint16_t currentDeltaToUnicode;  /* current delta in Indic block */  
    MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */
    MaskEnum currentMaskToUnicode;   /* mask for current state in toUnicode */
    MaskEnum defMaskToUnicode;       /* mask for default state in toUnicode */
    UBool isFirstBuffer;
    char name[30];
}UConverterDataISCII; 

static const uint16_t lookupInitialData[][3]={
    { DEVANAGARI, DEV_MASK,  DEV },
    { BENGALI,    BNG_MASK,  BNG },
    { GURMUKHI,   PNJ_MASK,  PNJ },
    { GUJARATI,   GJR_MASK,  GJR },
    { ORIYA,      ORI_MASK,  ORI },
    { TAMIL,      TML_MASK,  TML },
    { TELUGU,     KND_MASK,  TLG },
    { KANNADA,    KND_MASK,  KND },
    { MALAYALAM,  MLM_MASK,  MLM }
};
    
static void 
_ISCIIOpen(UConverter *cnv, const char *name,const char *locale,uint32_t options, UErrorCode *errorCode){
    cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISCII));

    if(cnv->extraInfo != NULL) {
        int len=0;
        UConverterDataISCII *converterData=(UConverterDataISCII *) cnv->extraInfo;
        converterData->contextCharToUnicode=NO_CHAR_MARKER;
        cnv->toUnicodeStatus = missingCharMarker;
        converterData->contextCharFromUnicode=0x0000;
        /* check if the version requested is supported */
        if((options & UCNV_OPTIONS_VERSION_MASK) < 9){
            /* initialize state variables */
            converterData->currentDeltaFromUnicode=converterData->currentDeltaToUnicode=
            converterData->defDeltaToUnicode=
                    (uint16_t)(lookupInitialData[options & UCNV_OPTIONS_VERSION_MASK][0] * DELTA);

            converterData->currentMaskFromUnicode = converterData->currentMaskToUnicode = 
            converterData->defMaskToUnicode=lookupInitialData[options & UCNV_OPTIONS_VERSION_MASK][1];
            
            converterData->isFirstBuffer=TRUE;
            uprv_strcpy(converterData->name,"ISCII,version=");
            len = uprv_strlen(converterData->name);
            converterData->name[len]= (char)((options & UCNV_OPTIONS_VERSION_MASK) + '0');
            converterData->name[len+1]=0;
        }else{
            uprv_free(cnv->extraInfo);
            *errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        }

    }else{
        *errorCode =U_MEMORY_ALLOCATION_ERROR;
    }
}
static void 
_ISCIIClose(UConverter *cnv){
    if(cnv->extraInfo && !cnv->isCopyLocal){
        uprv_free(cnv->extraInfo);
    }
}

static const char* 
_ISCIIgetName(const UConverter* cnv){
    if(cnv->extraInfo){
        UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;
        return myData->name;
    }
    return NULL;
}

static void 
_ISCIIReset(UConverter *cnv, UConverterResetChoice choice){
    UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);
    if(choice<=UCNV_RESET_TO_UNICODE) {
        cnv->toUnicodeStatus = missingCharMarker;
        cnv->mode=0;
        data->currentDeltaToUnicode=data->defDeltaToUnicode;
        data->currentMaskToUnicode = data->defMaskToUnicode;
        data->contextCharToUnicode=NO_CHAR_MARKER;
    }
    if(choice!=UCNV_RESET_TO_UNICODE) {
        cnv->fromUSurrogateLead=0x0000; 
        data->contextCharFromUnicode=0x00;
        data->currentMaskFromUnicode=data->defDeltaToUnicode;
        data->currentDeltaFromUnicode=data->defDeltaToUnicode;
    }
    data->isFirstBuffer=TRUE;

}

/** 
 * The values in validity table are indexed by the lower bits of Unicode 
 * range 0x0900 - 0x09ff. The values have a structure like: 
 *       ---------------------------------------------------------------
 *      | DEV   | PNJ   | GJR   | ORI   | BNG   | TLG   | MLM   | TML   |
 *      |       |       |       |       | ASM   | KND   |       |       |  
 *       ---------------------------------------------------------------
 * If a code point is valid in a particular script 
 * then that bit is turned on
 * 
 * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
 * to represent these languages
 * 
 * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
 * and combine and use 1 bit to represent these languages.
 *
 * TODO: It is probably easier to understand and maintain to change this
 * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
 */

static const uint8_t validityTable[128] = {
/* This state table is tool generated please donot edit unless you know exactly what you are doing */
/*ISCII:Valid:Unicode */
/*0xa0 : 0x00: 0x900  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0xa1 : 0xb8: 0x901  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
/*0xa2 : 0xfe: 0x902  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xa3 : 0xbf: 0x903  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0x00 : 0x00: 0x904  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0xa4 : 0xff: 0x905  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xa5 : 0xff: 0x906  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xa6 : 0xff: 0x907  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xa7 : 0xff: 0x908  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xa8 : 0xff: 0x909  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xa9 : 0xff: 0x90a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xaa : 0xfe: 0x90b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0x00 : 0x00: 0x90c  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xae : 0x80: 0x90d  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0xab : 0x87: 0x90e  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
/*0xac : 0xff: 0x90f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xad : 0xff: 0x910  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xb2 : 0x80: 0x911  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0xaf : 0x87: 0x912  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
/*0xb0 : 0xff: 0x913  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xb1 : 0xff: 0x914  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xb3 : 0xff: 0x915  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xb4 : 0xfe: 0x916  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xb5 : 0xfe: 0x917  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xb6 : 0xfe: 0x918  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xb7 : 0xff: 0x919  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xb8 : 0xff: 0x91a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xb9 : 0xfe: 0x91b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xba : 0xff: 0x91c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xbb : 0xfe: 0x91d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xbc : 0xff: 0x91e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xbd : 0xff: 0x91f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xbe : 0xfe: 0x920  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xbf : 0xfe: 0x921  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xc0 : 0xfe: 0x922  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xc1 : 0xff: 0x923  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xc2 : 0xff: 0x924  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xc3 : 0xfe: 0x925  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xc4 : 0xfe: 0x926  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xc5 : 0xfe: 0x927  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xc6 : 0xff: 0x928  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xc7 : 0x81: 0x929  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + TML_MASK ,
/*0xc8 : 0xff: 0x92a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xc9 : 0xfe: 0x92b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xca : 0xfe: 0x92c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xcb : 0xfe: 0x92d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xcc : 0xfe: 0x92e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xcd : 0xff: 0x92f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xcf : 0xff: 0x930  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xd0 : 0x87: 0x931  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
/*0xd1 : 0xff: 0x932  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xd2 : 0xb7: 0x933  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
/*0xd3 : 0x83: 0x934  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
/*0xd4 : 0xff: 0x935  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xd5 : 0xfe: 0x936  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0xd6 : 0xbf: 0x937  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xd7 : 0xff: 0x938  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xd8 : 0xff: 0x939  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0x00 : 0x00: 0x93A  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0x00 : 0x00: 0x93B  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0xe9 : 0xda: 0x93c  */ DEV_MASK + PNJ_MASK + ZERO     + ORI_MASK + BNG_MASK + ZERO     + MLM_MASK + ZERO     ,
/*0x00 : 0x00: 0x93d  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0xda : 0xff: 0x93e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xdb : 0xff: 0x93f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xdc : 0xff: 0x940  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xdd : 0xff: 0x941  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xde : 0xff: 0x942  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xdf : 0xbe: 0x943  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0x00 : 0x00: 0x944  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0xe3 : 0x80: 0x945  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0xe0 : 0x87: 0x946  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
/*0xe1 : 0xff: 0x947  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xe2 : 0xff: 0x948  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xe7 : 0x80: 0x949  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0xe4 : 0x87: 0x94a  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
/*0xe5 : 0xff: 0x94b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xe6 : 0xff: 0x94c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xe8 : 0xff: 0x94d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xec : 0x00: 0x94e  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0xed : 0x00: 0x94f  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0x00 : 0x00: 0x950  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0x00 : 0x00: 0x951  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0x00 : 0x00: 0x952  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0x00 : 0x00: 0x953  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0x00 : 0x00: 0x954  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0x00 : 0x00: 0x955  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + ZERO     + ZERO     ,
/*0x00 : 0x00: 0x956  */ ZERO     + ZERO     + ZERO     + ORI_MASK + ZERO     + KND_MASK + ZERO     + ZERO     ,
/*0x00 : 0x00: 0x957  */ ZERO     + ZERO     + ZERO     + ORI_MASK + ZERO     + ZERO     + MLM_MASK + ZERO     ,
/*0x00 : 0x00: 0x958  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0x00 : 0x00: 0x959  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0x00 : 0x00: 0x95a  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0x00 : 0x00: 0x95b  */ DEV_MASK + PNJ_MASK + ZERO     + ORI_MASK + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0x00 : 0x00: 0x95c  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
/*0x00 : 0x00: 0x95d  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
/*0x00 : 0x00: 0x95e  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
/*0xce : 0x98: 0x95f  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
/*0x00 : 0x00: 0x960  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0x00 : 0x00: 0x961  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
/*0x00 : 0x00: 0x962  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
/*0x00 : 0x00: 0x963  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
/*0xea : 0xf8: 0x964  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
/*0xeaea : 0x00: 0x965*/ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
/*0xf1 : 0xff: 0x966  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xf2 : 0xff: 0x967  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xf3 : 0xff: 0x968  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xf4 : 0xff: 0x969  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xf5 : 0xff: 0x96a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xf6 : 0xff: 0x96b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xf7 : 0xff: 0x96c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xf8 : 0xff: 0x96d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xf9 : 0xff: 0x96e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0xfa : 0xff: 0x96f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
/*0x00 : 0x80: 0x970  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,

/*
 * The length of the array is 128 to provide values for 0x900..0x97f.
 * The last 15 entries for 0x971..0x97f of the validity table are all zero
 * because no Indic script uses such Unicode code points.
 */
/*0x00 : 0x00: 0x9yz  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO
};

static const uint16_t fromUnicodeTable[128]={   
    0x00a0 ,/* 0x0900 */ 
    0x00a1 ,/* 0x0901 */ 
    0x00a2 ,/* 0x0902 */ 
    0x00a3 ,/* 0x0903 */ 
    0xFFFF ,/* 0x0904 */ 
    0x00a4 ,/* 0x0905 */ 
    0x00a5 ,/* 0x0906 */ 
    0x00a6 ,/* 0x0907 */ 
    0x00a7 ,/* 0x0908 */ 
    0x00a8 ,/* 0x0909 */ 
    0x00a9 ,/* 0x090a */ 
    0x00aa ,/* 0x090b */ 
    0xA6E9 ,/* 0x090c */ 
    0x00ae ,/* 0x090d */ 
    0x00ab ,/* 0x090e */ 
    0x00ac ,/* 0x090f */ 
    0x00ad ,/* 0x0910 */ 
    0x00b2 ,/* 0x0911 */ 
    0x00af ,/* 0x0912 */ 
    0x00b0 ,/* 0x0913 */ 
    0x00b1 ,/* 0x0914 */ 
    0x00b3 ,/* 0x0915 */ 
    0x00b4 ,/* 0x0916 */ 
    0x00b5 ,/* 0x0917 */ 
    0x00b6 ,/* 0x0918 */ 
    0x00b7 ,/* 0x0919 */ 
    0x00b8 ,/* 0x091a */ 
    0x00b9 ,/* 0x091b */ 
    0x00ba ,/* 0x091c */ 
    0x00bb ,/* 0x091d */ 
    0x00bc ,/* 0x091e */ 
    0x00bd ,/* 0x091f */ 
    0x00be ,/* 0x0920 */ 
    0x00bf ,/* 0x0921 */ 
    0x00c0 ,/* 0x0922 */ 
    0x00c1 ,/* 0x0923 */ 
    0x00c2 ,/* 0x0924 */ 
    0x00c3 ,/* 0x0925 */ 
    0x00c4 ,/* 0x0926 */ 
    0x00c5 ,/* 0x0927 */ 
    0x00c6 ,/* 0x0928 */ 
    0x00c7 ,/* 0x0929 */ 
    0x00c8 ,/* 0x092a */ 
    0x00c9 ,/* 0x092b */ 
    0x00ca ,/* 0x092c */ 
    0x00cb ,/* 0x092d */ 
    0x00cc ,/* 0x092e */ 
    0x00cd ,/* 0x092f */ 
    0x00cf ,/* 0x0930 */ 
    0x00d0 ,/* 0x0931 */ 
    0x00d1 ,/* 0x0932 */ 
    0x00d2 ,/* 0x0933 */ 
    0x00d3 ,/* 0x0934 */ 
    0x00d4 ,/* 0x0935 */ 
    0x00d5 ,/* 0x0936 */ 
    0x00d6 ,/* 0x0937 */ 
    0x00d7 ,/* 0x0938 */ 
    0x00d8 ,/* 0x0939 */ 
    0xFFFF ,/* 0x093A */ 
    0xFFFF ,/* 0x093B */ 
    0x00e9 ,/* 0x093c */ 
    0xEAE9 ,/* 0x093d */ 
    0x00da ,/* 0x093e */ 
    0x00db ,/* 0x093f */ 
    0x00dc ,/* 0x0940 */ 
    0x00dd ,/* 0x0941 */ 
    0x00de ,/* 0x0942 */ 
    0x00df ,/* 0x0943 */ 
    0xDFE9 ,/* 0x0944 */ 
    0x00e3 ,/* 0x0945 */ 
    0x00e0 ,/* 0x0946 */ 
    0x00e1 ,/* 0x0947 */ 
    0x00e2 ,/* 0x0948 */ 
    0x00e7 ,/* 0x0949 */ 
    0x00e4 ,/* 0x094a */ 
    0x00e5 ,/* 0x094b */ 
    0x00e6 ,/* 0x094c */ 
    0x00e8 ,/* 0x094d */ 
    0x00ec ,/* 0x094e */ 
    0x00ed ,/* 0x094f */ 
    0xA1E9 ,/* 0x0950 */ /* OM Symbol */
    0xFFFF ,/* 0x0951 */ 
    0xF0B8 ,/* 0x0952 */ 
    0xFFFF ,/* 0x0953 */ 
    0xFFFF ,/* 0x0954 */ 
    0xFFFF ,/* 0x0955 */ 
    0xFFFF ,/* 0x0956 */ 
    0xFFFF ,/* 0x0957 */ 
    0xb3e9 ,/* 0x0958 */ 
    0xb4e9 ,/* 0x0959 */ 
    0xb5e9 ,/* 0x095a */ 
    0xbae9 ,/* 0x095b */ 
    0xbfe9 ,/* 0x095c */ 
    0xC0E9 ,/* 0x095d */ 
    0xc9e9 ,/* 0x095e */ 
    0x00ce ,/* 0x095f */ 
    0xAAe9 ,/* 0x0960 */ 
    0xA7E9 ,/* 0x0961 */ 
    0xDBE9 ,/* 0x0962 */ 
    0xDCE9 ,/* 0x0963 */ 
    0x00ea ,/* 0x0964 */ 
    0xeaea ,/* 0x0965 */ 
    0x00f1 ,/* 0x0966 */ 
    0x00f2 ,/* 0x0967 */ 
    0x00f3 ,/* 0x0968 */ 
    0x00f4 ,/* 0x0969 */ 
    0x00f5 ,/* 0x096a */ 
    0x00f6 ,/* 0x096b */ 
    0x00f7 ,/* 0x096c */ 
    0x00f8 ,/* 0x096d */ 
    0x00f9 ,/* 0x096e */ 
    0x00fa ,/* 0x096f */ 
    0xF0BF ,/* 0x0970 */
    0xFFFF ,/* 0x0971 */
    0xFFFF ,/* 0x0972 */
    0xFFFF ,/* 0x0973 */
    0xFFFF ,/* 0x0974 */
    0xFFFF ,/* 0x0975 */
    0xFFFF ,/* 0x0976 */
    0xFFFF ,/* 0x0977 */
    0xFFFF ,/* 0x0978 */
    0xFFFF ,/* 0x0979 */
    0xFFFF ,/* 0x097a */
    0xFFFF ,/* 0x097b */
    0xFFFF ,/* 0x097c */
    0xFFFF ,/* 0x097d */
    0xFFFF ,/* 0x097e */
    0xFFFF ,/* 0x097f */
};
static const uint16_t toUnicodeTable[256]={
    0x0000,/* 0x00 */
    0x0001,/* 0x01 */
    0x0002,/* 0x02 */
    0x0003,/* 0x03 */
    0x0004,/* 0x04 */
    0x0005,/* 0x05 */
    0x0006,/* 0x06 */
    0x0007,/* 0x07 */
    0x0008,/* 0x08 */
    0x0009,/* 0x09 */
    0x000a,/* 0x0a */
    0x000b,/* 0x0b */
    0x000c,/* 0x0c */
    0x000d,/* 0x0d */
    0x000e,/* 0x0e */
    0x000f,/* 0x0f */
    0x0010,/* 0x10 */
    0x0011,/* 0x11 */
    0x0012,/* 0x12 */
    0x0013,/* 0x13 */
    0x0014,/* 0x14 */
    0x0015,/* 0x15 */
    0x0016,/* 0x16 */
    0x0017,/* 0x17 */
    0x0018,/* 0x18 */
    0x0019,/* 0x19 */
    0x001a,/* 0x1a */
    0x001b,/* 0x1b */
    0x001c,/* 0x1c */
    0x001d,/* 0x1d */
    0x001e,/* 0x1e */
    0x001f,/* 0x1f */
    0x0020,/* 0x20 */
    0x0021,/* 0x21 */
    0x0022,/* 0x22 */
    0x0023,/* 0x23 */
    0x0024,/* 0x24 */
    0x0025,/* 0x25 */
    0x0026,/* 0x26 */
    0x0027,/* 0x27 */
    0x0028,/* 0x28 */
    0x0029,/* 0x29 */
    0x002a,/* 0x2a */
    0x002b,/* 0x2b */
    0x002c,/* 0x2c */
    0x002d,/* 0x2d */
    0x002e,/* 0x2e */
    0x002f,/* 0x2f */
    0x0030,/* 0x30 */
    0x0031,/* 0x31 */
    0x0032,/* 0x32 */
    0x0033,/* 0x33 */
    0x0034,/* 0x34 */
    0x0035,/* 0x35 */
    0x0036,/* 0x36 */
    0x0037,/* 0x37 */
    0x0038,/* 0x38 */
    0x0039,/* 0x39 */
    0x003A,/* 0x3A */
    0x003B,/* 0x3B */
    0x003c,/* 0x3c */
    0x003d,/* 0x3d */
    0x003e,/* 0x3e */
    0x003f,/* 0x3f */
    0x0040,/* 0x40 */
    0x0041,/* 0x41 */
    0x0042,/* 0x42 */
    0x0043,/* 0x43 */
    0x0044,/* 0x44 */
    0x0045,/* 0x45 */
    0x0046,/* 0x46 */
    0x0047,/* 0x47 */
    0x0048,/* 0x48 */
    0x0049,/* 0x49 */
    0x004a,/* 0x4a */
    0x004b,/* 0x4b */
    0x004c,/* 0x4c */
    0x004d,/* 0x4d */
    0x004e,/* 0x4e */
    0x004f,/* 0x4f */
    0x0050,/* 0x50 */
    0x0051,/* 0x51 */
    0x0052,/* 0x52 */
    0x0053,/* 0x53 */
    0x0054,/* 0x54 */
    0x0055,/* 0x55 */
    0x0056,/* 0x56 */
    0x0057,/* 0x57 */
    0x0058,/* 0x58 */
    0x0059,/* 0x59 */
    0x005a,/* 0x5a */
    0x005b,/* 0x5b */
    0x005c,/* 0x5c */
    0x005d,/* 0x5d */
    0x005e,/* 0x5e */
    0x005f,/* 0x5f */
    0x0060,/* 0x60 */
    0x0061,/* 0x61 */
    0x0062,/* 0x62 */
    0x0063,/* 0x63 */
    0x0064,/* 0x64 */
    0x0065,/* 0x65 */
    0x0066,/* 0x66 */
    0x0067,/* 0x67 */
    0x0068,/* 0x68 */
    0x0069,/* 0x69 */
    0x006a,/* 0x6a */
    0x006b,/* 0x6b */
    0x006c,/* 0x6c */
    0x006d,/* 0x6d */
    0x006e,/* 0x6e */
    0x006f,/* 0x6f */
    0x0070,/* 0x70 */
    0x0071,/* 0x71 */
    0x0072,/* 0x72 */
    0x0073,/* 0x73 */
    0x0074,/* 0x74 */
    0x0075,/* 0x75 */
    0x0076,/* 0x76 */
    0x0077,/* 0x77 */
    0x0078,/* 0x78 */
    0x0079,/* 0x79 */
    0x007a,/* 0x7a */
    0x007b,/* 0x7b */
    0x007c,/* 0x7c */
    0x007d,/* 0x7d */
    0x007e,/* 0x7e */
    0x007f,/* 0x7f */
    0x0080,/* 0x80 */
    0x0081,/* 0x81 */
    0x0082,/* 0x82 */
    0x0083,/* 0x83 */
    0x0084,/* 0x84 */
    0x0085,/* 0x85 */
    0x0086,/* 0x86 */
    0x0087,/* 0x87 */
    0x0088,/* 0x88 */
    0x0089,/* 0x89 */
    0x008a,/* 0x8a */
    0x008b,/* 0x8b */
    0x008c,/* 0x8c */
    0x008d,/* 0x8d */
    0x008e,/* 0x8e */
    0x008f,/* 0x8f */
    0x0090,/* 0x90 */
    0x0091,/* 0x91 */
    0x0092,/* 0x92 */
    0x0093,/* 0x93 */
    0x0094,/* 0x94 */
    0x0095,/* 0x95 */
    0x0096,/* 0x96 */
    0x0097,/* 0x97 */
    0x0098,/* 0x98 */
    0x0099,/* 0x99 */
    0x009a,/* 0x9a */
    0x009b,/* 0x9b */
    0x009c,/* 0x9c */
    0x009d,/* 0x9d */
    0x009e,/* 0x9e */
    0x009f,/* 0x9f */
    0x0900,/* 0xa0 */
    0x0901,/* 0xa1 */
    0x0902,/* 0xa2 */
    0x0903,/* 0xa3 */
    0x0905,/* 0xa4 */
    0x0906,/* 0xa5 */
    0x0907,/* 0xa6 */
    0x0908,/* 0xa7 */
    0x0909,/* 0xa8 */
    0x090a,/* 0xa9 */
    0x090b,/* 0xaa */
    0x090e,/* 0xab */
    0x090f,/* 0xac */
    0x0910,/* 0xad */
    0x090d,/* 0xae */
    0x0912,/* 0xaf */
    0x0913,/* 0xb0 */
    0x0914,/* 0xb1 */
    0x0911,/* 0xb2 */
    0x0915,/* 0xb3 */
    0x0916,/* 0xb4 */
    0x0917,/* 0xb5 */
    0x0918,/* 0xb6 */
    0x0919,/* 0xb7 */
    0x091a,/* 0xb8 */
    0x091b,/* 0xb9 */
    0x091c,/* 0xba */
    0x091d,/* 0xbb */
    0x091e,/* 0xbc */
    0x091f,/* 0xbd */
    0x0920,/* 0xbe */
    0x0921,/* 0xbf */
    0x0922,/* 0xc0 */
    0x0923,/* 0xc1 */
    0x0924,/* 0xc2 */
    0x0925,/* 0xc3 */
    0x0926,/* 0xc4 */
    0x0927,/* 0xc5 */
    0x0928,/* 0xc6 */
    0x0929,/* 0xc7 */
    0x092a,/* 0xc8 */
    0x092b,/* 0xc9 */
    0x092c,/* 0xca */
    0x092d,/* 0xcb */
    0x092e,/* 0xcc */
    0x092f,/* 0xcd */
    0x095f,/* 0xce */
    0x0930,/* 0xcf */
    0x0931,/* 0xd0 */
    0x0932,/* 0xd1 */
    0x0933,/* 0xd2 */
    0x0934,/* 0xd3 */
    0x0935,/* 0xd4 */
    0x0936,/* 0xd5 */
    0x0937,/* 0xd6 */
    0x0938,/* 0xd7 */
    0x0939,/* 0xd8 */
    0x200D,/* 0xd9 */
    0x093e,/* 0xda */
    0x093f,/* 0xdb */
    0x0940,/* 0xdc */
    0x0941,/* 0xdd */
    0x0942,/* 0xde */
    0x0943,/* 0xdf */
    0x0946,/* 0xe0 */
    0x0947,/* 0xe1 */
    0x0948,/* 0xe2 */
    0x0945,/* 0xe3 */
    0x094a,/* 0xe4 */
    0x094b,/* 0xe5 */
    0x094c,/* 0xe6 */
    0x0949,/* 0xe7 */
    0x094d,/* 0xe8 */
    0x093c,/* 0xe9 */
    0x0964,/* 0xea */
    0xFFFF,/* 0xeb */
    0xFFFF,/* 0xec */
    0xFFFF,/* 0xed */
    0xFFFF,/* 0xee */
    0xFFFF,/* 0xef */
    0xFFFF,/* 0xf0 */
    0x0966,/* 0xf1 */
    0x0967,/* 0xf2 */
    0x0968,/* 0xf3 */
    0x0969,/* 0xf4 */
    0x096a,/* 0xf5 */
    0x096b,/* 0xf6 */
    0x096c,/* 0xf7 */
    0x096d,/* 0xf8 */
    0x096e,/* 0xf9 */
    0x096f,/* 0xfa */
    0xFFFF,/* 0xfb */
    0xFFFF,/* 0xfc */
    0xFFFF,/* 0xfd */
    0xFFFF,/* 0xfe */
    0xFFFF /* 0xff */
};

static const uint16_t nuktaSpecialCases[][2]={
    { 16 /*length of array*/   , 0      },
    { 0xA6 , 0x090c },
    { 0xEA , 0x093D },
    { 0xDF , 0x0944 },
    { 0xA1 , 0x0950 },
    { 0xb3 , 0x0958 },
    { 0xb4 , 0x0959 },
    { 0xb5 , 0x095a },
    { 0xba , 0x095b },
    { 0xbf , 0x095c },
    { 0xC0 , 0x095d },
    { 0xc9 , 0x095e },
    { 0xAA , 0x0960 },
    { 0xA7 , 0x0961 },
    { 0xDB , 0x0962 },
    { 0xDC , 0x0963 }, 
};

#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){       \
      /* write the targetUniChar  to target */                                                  \
    if(target <targetLimit){                                                                    \
        if(targetByteUnit <= 0xFF){                                                             \
            *(target)++ = (uint8_t)(targetByteUnit);                                            \
            if(offsets){                                                                        \
                *(offsets++) = source - args->source-1;                                         \
            }                                                                                   \
        }else{                                                                                  \
            *(target)++ = (uint8_t)(targetByteUnit>>8);                                         \
            if(offsets){                                                                        \
                *(offsets++) = source - args->source-1;                                         \
            }                                                                                   \
            if(target < targetLimit){                                                           \
                *(target)++ = (uint8_t)  targetByteUnit;                                        \
                if(offsets){                                                                    \
                    *(offsets++) = source - args->source-1;                                     \
                }                                                                               \
            }else{                                                                              \
                args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
                            (uint8_t) (targetByteUnit);                                         \
                *err = U_BUFFER_OVERFLOW_ERROR;                                                 \
            }                                                                                   \
        }                                                                                       \
    }else{                                                                                      \
        if(targetByteUnit & 0xFF00){                                                            \
            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
                        (uint8_t) (targetByteUnit >>8);                                         \
        }                                                                                       \
        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =            \
                        (uint8_t) (targetByteUnit);                                             \
        *err = U_BUFFER_OVERFLOW_ERROR;                                                         \
    }                                                                                           \
}          

/* Rules:
 *    Explicit Halant :  
 *                      <HALANT> + <ZWNJ>
 *    Soft Halant :
 *                      <HALANT> + <ZWJ>                    
 */

static void 
UConverter_fromUnicode_ISCII_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
                                                      UErrorCode * err){
    const UChar *source = args->source;
    const UChar *sourceLimit = args->sourceLimit;
    unsigned char *target = (unsigned char *) args->target;
    unsigned char *targetLimit = (unsigned char *) args->targetLimit;
    int32_t* offsets = args->offsets;
    uint32_t targetByteUnit = 0x0000;
    UChar32 sourceChar = 0x0000;
    UConverterCallbackReason reason;
    UBool useFallback;
    UConverterDataISCII *converterData;
    uint16_t newDelta=0;
    uint16_t range = 0;
    UBool deltaChanged = FALSE;

    if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
        *err = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
    /* initialize data */
    converterData=(UConverterDataISCII*)args->converter->extraInfo;
    useFallback = args->converter->useFallback;
    newDelta=converterData->currentDeltaFromUnicode;
    range = (uint16_t)(newDelta/DELTA);
    
    if(args->converter->fromUSurrogateLead!=0 && target <targetLimit) {
        goto getTrail;
    }

    /*writing the char to the output stream */
    while(source < sourceLimit){    

        targetByteUnit = missingCharMarker;

        sourceChar = *source++;

        /*check if input is in ASCII and C0 control codes range*/
        if (sourceChar <= ASCII_END) {
            WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);
            if(U_FAILURE(*err)){
                break;
            }
            if(sourceChar == LF){                         
                targetByteUnit = ATR<<8;
                targetByteUnit += (uint8_t) lookupInitialData[range][2];
                args->converter->fromUnicodeStatus=sourceChar;
                /* now append ATR and language code */
                WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
                if(U_FAILURE(*err)){
                    break;
                }
            }
            continue;
        }
        switch(sourceChar){
        case ZWNJ:
            /* contextChar has HALANT */
            if(converterData->contextCharFromUnicode){
                converterData->contextCharFromUnicode = 0x00;
                targetByteUnit = ISCII_HALANT;
            }else{
                /* consume ZWNJ and continue */
                converterData->contextCharFromUnicode = 0x00;
                continue;
            }
            break;
        case ZWJ:
            /* contextChar has HALANT */
            if(converterData->contextCharFromUnicode){
                targetByteUnit = ISCII_NUKTA;               
            }else{
                targetByteUnit =ISCII_INV;
            }
            converterData->contextCharFromUnicode = 0x00;
            break;
       default:
            /* is the sourceChar in the INDIC_RANGE? */
            if((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE){
                /* Danda and Double Danda are valid in Northern scripts.. since Unicode 
                 * does not include these codepoints in all Northern scrips we need to 
                 * filter them out
                 */
                if(sourceChar!= DANDA && sourceChar != DOUBLE_DANDA){
                    /* find out to which block the souceChar belongs*/ 
                    range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);
                    newDelta =(uint16_t)(range*DELTA);

                    /* Now are we in the same block as the previous? */
                    if(newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer){
                        converterData->currentDeltaFromUnicode = newDelta;
                        converterData->currentMaskFromUnicode = lookupInitialData[range][1];
                        deltaChanged =TRUE;
                        converterData->isFirstBuffer=FALSE;
                    }
                    /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
                    /* now subtract the new delta from sourceChar*/
                    sourceChar -= converterData->currentDeltaFromUnicode ;
                }

                /* get the target byte unit */                  
                targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];
                            
                /* is the code point valid in current script? */
                if((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0){
                    /* Vocallic RR is assigne in ISCII Telugu and Unicode */
                    if(converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) && sourceChar!=VOCALLIC_RR){
                        targetByteUnit=missingCharMarker;
                    }
                }
           
                if(deltaChanged){
                    /* we are in a script block which is different than 
                     * previous sourceChar's script block write ATR and language codes 
                     */
                    uint16_t temp=0;              
                    temp =(uint16_t)(ATR<<8);
                    temp += (uint16_t)((uint8_t) lookupInitialData[range][2]);
                    /* reset */
                    deltaChanged=FALSE;
                    /* now append ATR and language code */
                    WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);
                    if(U_FAILURE(*err)){
                        break;
                    }
                }
            }
            /* reset context char */
            converterData->contextCharFromUnicode = 0x00;
            break;
        }


        if(targetByteUnit != missingCharMarker){
            if(targetByteUnit==ISCII_HALANT){
                converterData->contextCharFromUnicode = (UChar)targetByteUnit;
            }
             /* write targetByteUnit to target*/
             WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
             if(U_FAILURE(*err)){
                  break;
             }
        }
        else{
            /* oops.. the code point is unassingned
             * set the error and reason
             */
            reason =UCNV_UNASSIGNED;
            *err =U_INVALID_CHAR_FOUND;

            /*check if the char is a First surrogate*/
            if(UTF_IS_SURROGATE(sourceChar)) {
                if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
                    args->converter->fromUSurrogateLead=(UChar)sourceChar;
getTrail:
                    /*look ahead to find the trail surrogate*/
                    if(source <  sourceLimit) {
                        /* test the following code unit */
                        UChar trail= (*source);
                        if(UTF_IS_SECOND_SURROGATE(trail)) {
                            source++;
                            sourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUSurrogateLead, trail);
                            args->converter->fromUSurrogateLead=0x00;
                            reason =UCNV_UNASSIGNED;
                            *err =U_INVALID_CHAR_FOUND;
                            /* convert this surrogate code point */
                            /* exit this condition tree */
                        } else {
                            /* this is an unmatched lead code unit (1st surrogate) */
                            /* callback(illegal) */
                            sourceChar =  args->converter->fromUSurrogateLead;
                            reason=UCNV_ILLEGAL;
                            *err=U_ILLEGAL_CHAR_FOUND;
                        }
                    } else {
                        /* no more input */
                        *err = U_ZERO_ERROR;
                        break;
                    }
                } else {
                    /* this is an unmatched trail code unit (2nd surrogate) */
                    /* callback(illegal) */
                    reason=UCNV_ILLEGAL;
                    *err=U_ILLEGAL_CHAR_FOUND;
                }
            }
            {
                /*variables for callback */
                const UChar* saveSource =NULL;
                char* saveTarget =NULL;
                int32_t* saveOffsets =NULL;
                int currentOffset =0;
                int saveIndex =0;

                args->converter->invalidUCharLength = 0;

                if(sourceChar>0xffff){
                    /* we have got a surrogate pair... dissable and populate the invalidUCharBuffer */
                    args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] 
                        =(uint16_t)(((sourceChar)>>10)+0xd7c0);
                    args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] 
                        =(uint16_t)(((sourceChar)&0x3ff)|0xdc00);
                }
                else{
                    args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] 
                        =(UChar)sourceChar;
                }
                
                if(offsets){
                    currentOffset = *(offsets-1)+1;
                }
                saveSource = args->source;
                saveTarget = args->target;
                saveOffsets = args->offsets;
                args->target = (char*)target;
                args->source = source;
                args->offsets = offsets;

                /*copies current values for the ErrorFunctor to update */
                /*Calls the ErrorFunctor */
                args->converter->fromUCharErrorBehaviour ( args->converter->fromUContext, 
                              args, 
                              args->converter->invalidUCharBuffer, 
                              args->converter->invalidUCharLength, 
                             (UChar32) (sourceChar), 
                              reason, 
                              err);

                saveIndex = args->target - (char*)target;
                if(args->offsets){
                    args->offsets = saveOffsets;
                    while(saveIndex-->0){
                         *offsets = currentOffset;
                          offsets++;
                    }
                }
                target = (unsigned char*)args->target;
                args->source=saveSource;
                args->target=saveTarget;
                args->offsets=saveOffsets;
                args->converter->fromUSurrogateLead=0x00;

                if (U_FAILURE (*err)){
                    break;
                }
            }
        }


    }/* end while(mySourceIndex<mySourceLength) */


    /*If at the end of conversion we are still carrying state information
     *flush is TRUE, we can deduce that the input stream is truncated
     */
    if (args->converter->fromUSurrogateLead !=0 && (source == sourceLimit) && args->flush){
        *err = U_TRUNCATED_CHAR_FOUND;
    }
    /* Reset the state of converter if we consumed 
     * the source and flush is true
     */
    if( (source == sourceLimit) && args->flush){
       /*reset converter*/
        _ISCIIReset(args->converter,UCNV_RESET_FROM_UNICODE);
    }

    /*save the state and return */
    args->source = source;
    args->target = (char*)target;
}

static const int32_t lookupTable[][2]={
    { ZERO,       ZERO     },     /*DEFALT*/
    { ZERO,       ZERO     },     /*ROMAN*/
    { DEVANAGARI, DEV_MASK },
    { BENGALI,    BNG_MASK },
    { TAMIL,      TML_MASK },
    { TELUGU,     KND_MASK },
    { BENGALI,    BNG_MASK },
    { ORIYA,      ORI_MASK },
    { KANNADA,    KND_MASK },
    { GUJARATI,   GJR_MASK },
    { GURMUKHI,   PNJ_MASK },
};

#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\
    /* add offset to current Indic Block */                                              \
    if(targetUniChar>ASCII_END &&                                                        \
           targetUniChar != ZWJ &&                                                       \
           targetUniChar != ZWNJ &&                                                      \
           targetUniChar != DANDA &&                                                     \
           targetUniChar != DOUBLE_DANDA){                                               \
                                                                                         \
           targetUniChar+=(uint16_t)(delta);                                             \
    }                                                                                    \
    /* now write the targetUniChar */                                                    \
    if(target<args->targetLimit){                                                        \
        *(target)++ = (UChar)targetUniChar;                                              \
        if(offsets){                                                                     \
            *(offsets)++ = offset;                                                       \
        }                                                                                \
    }else{                                                                               \
        args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] =   \
            (UChar)targetUniChar;                                                        \
        *err = U_BUFFER_OVERFLOW_ERROR;                                                  \
    }                                                                                    \
}                                                                                        
                                                                                         
#define GET_MAPPING(sourceChar,targetUniChar,data){                                      \
    targetUniChar = toUnicodeTable[(sourceChar)] ;                                       \
    /* is the code point valid in current script? */                                     \
    if(sourceChar> ASCII_END &&                                                          \
            (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode)==0){    \
        /* Vocallic RR is assigne in ISCII Telugu and Unicode */                         \
        if(data->currentDeltaToUnicode!=(TELUGU_DELTA) &&                                \
                    targetUniChar!=VOCALLIC_RR){                                         \
            targetUniChar=missingCharMarker;                                             \
        }                                                                                \
    }                                                                                    \
}

/***********
 *  Rules for ISCII to Unicode converter
 *  ISCII is stateful encoding. To convert ISCII bytes to Unicode,
 *  which has both precomposed and decomposed forms characters 
 *  pre-context and post-context need to be considered.
 *  
 *  Post context
 *  i)  ATR : Attribute code is used to declare the font and script switching. 
 *      Currently we only switch scripts and font codes consumed without generating an error
 *  ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure, 
 *      obsolete characters
 *  Pre context  
 *  i)  Halant: if preceeded by a halant then it is a explicit halant
 *  ii) Nukta : 
 *       a) if preceeded by a halant then it is a soft halant
 *       b) if preceeded by specific consonants and the ligatures have pre-composed
 *          characters in Unicode then convert to pre-composed characters
 *  iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
 * 
 */

static void 
UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                                                            UErrorCode* err){
    const char *source = ( char *) args->source;
    UChar *target = args->target;
    const char *sourceLimit = args->sourceLimit;
    uint32_t targetUniChar = 0x0000;
    uint8_t sourceChar = 0x0000;
    UConverterDataISCII* data;
    UConverterCallbackReason reason;
    UChar32* toUnicodeStatus=NULL;
    UChar* contextCharToUnicode = NULL;

    if ((args->converter == NULL) || (target < args->target) || (source < args->source)){
        *err = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
    
    data = (UConverterDataISCII*)(args->converter->extraInfo);
    contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */
    toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/

    while(source< args->sourceLimit){

        targetUniChar = missingCharMarker;
        
        if(target < args->targetLimit){
            sourceChar = (unsigned char)*(source)++;

            /* look at the post-context preform special processing */
            if(*contextCharToUnicode==ATR){
             
                /* If we have ATR in *contextCharToUnicode then we need to change our
                 * state to the Indic Script specified by sourceChar
                 */

                /* check if the sourceChar is supported script range*/
                if((uint8_t)(PNJ-sourceChar)<=PNJ-DEV){
                    data->currentDeltaToUnicode = 
                        (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);
                    data->currentMaskToUnicode = 
                        lookupTable[sourceChar & 0x0F][1] ;
                }
                else if(sourceChar==DEF){
                    /* switch back to default */
                    data->currentDeltaToUnicode = data->defDeltaToUnicode;
                    data->currentMaskToUnicode = data->defMaskToUnicode;
                }else{
                    /* these are display codes consume and continue */
                }
                /* reset */
                *contextCharToUnicode=NO_CHAR_MARKER;              
                
                continue;

            }else if(*contextCharToUnicode==EXT){
                /* check if sourceChar is in 0xA1-0xEE range */
                if((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)){
                    /* We currently support only Anudatta and Devanagari abbreviation sign */
                    if(sourceChar==0xBF || sourceChar == 0xB8){
                        targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
                        
                        /* find out if the mapping is valid in this state */                                            
                        if(validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode){
                            
                            *contextCharToUnicode= NO_CHAR_MARKER;

                            /* write to target */
                            WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),
                                                 targetUniChar,data->currentDeltaToUnicode,err);

                            continue;
                        }
                    }
                    /* byte unit is unassigned */
                    targetUniChar = missingCharMarker;
                    *err= U_INVALID_CHAR_FOUND;
                    reason = UCNV_UNASSIGNED;
                }else{
                    /* only 0xA1 - 0xEE are legal after EXT char */
                    reason= UCNV_ILLEGAL;
                    *err = U_ILLEGAL_CHAR_FOUND;
                }
                goto CALLBACK;
            }

            /* look at the pre-context and perform special processing */
            switch(sourceChar){
            case EXT: /*falls through*/
            case ATR:
                *contextCharToUnicode = (UChar)sourceChar;
                
                if(*toUnicodeStatus != missingCharMarker){

                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),
                                    *toUnicodeStatus,data->currentDeltaToUnicode,err);
                    *toUnicodeStatus = missingCharMarker;
                }
                continue;
            case ISCII_DANDA:
                /* handle double danda*/
                if(*contextCharToUnicode== ISCII_DANDA){
                    targetUniChar = DOUBLE_DANDA;
                    /* clear the context */
                    *contextCharToUnicode = NO_CHAR_MARKER;
                    *toUnicodeStatus = missingCharMarker;
                }else{
                    GET_MAPPING(sourceChar,targetUniChar,data);
                    *contextCharToUnicode = sourceChar;
                }
                break;
            case ISCII_HALANT:
                /* handle explicit halant */
                if(*contextCharToUnicode == ISCII_HALANT){
                    targetUniChar = ZWNJ;
                    /* clear the context */
                    *contextCharToUnicode = NO_CHAR_MARKER;
                }else{
                    GET_MAPPING(sourceChar,targetUniChar,data);
                    *contextCharToUnicode = sourceChar;
                }
                break;
            case ISCII_NUKTA:
                /* handle soft halant */
                if(*contextCharToUnicode == ISCII_HALANT){
                    targetUniChar = ZWJ;
                    /* clear the context */
                    *contextCharToUnicode = NO_CHAR_MARKER;
                    break;
                }else{
                    /* try to handle <CHAR> + ISCII_NUKTA special mappings */
                    int i=1;
                    UBool found =FALSE;
                    for( ;i<nuktaSpecialCases[0][0];i++){
                        if(nuktaSpecialCases[i][0]==(uint8_t)*contextCharToUnicode){
                            targetUniChar=nuktaSpecialCases[i][1];
                            found =TRUE;
                            break;
                        }
                    }
                    if(found){
                        /* find out if the mapping is valid in this state */                                            
                        if(validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode){       
                            targetUniChar += data->currentDeltaToUnicode ;
                            *contextCharToUnicode= NO_CHAR_MARKER;
                            *toUnicodeStatus = missingCharMarker;
                            break;
                        }
                        /* else fall through to default */
                    }
                    /* else fall through to default */
                }                
            default:
                GET_MAPPING(sourceChar,targetUniChar,data);
                *contextCharToUnicode = sourceChar;
                break;
            }
            

            if(*toUnicodeStatus != missingCharMarker){
                /* write the previously mapped codepoint */
                WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),
                                *toUnicodeStatus,data->currentDeltaToUnicode,err);
                *toUnicodeStatus = missingCharMarker;
            }


            if(targetUniChar != missingCharMarker ){
                /* now save the targetUniChar for delayed write */
                *toUnicodeStatus = (UChar) targetUniChar;
            }else{
            
                /* we reach here only if targetUniChar == missingCharMarker 
                 * so assign codes to reason and err
                 */
                reason = UCNV_UNASSIGNED;
                *err = U_INVALID_CHAR_FOUND;
CALLBACK:
                 {
                    const char *saveSource = args->source;
                    UChar *saveTarget = args->target;
                    int32_t *saveOffsets = NULL;
                    int32_t currentOffset;
                    int32_t saveIndex = target - args->target;

                    args->converter->invalidCharLength=0;

                    currentOffset= source - args->source -1;
                                       
                    args->converter->invalidCharBuffer[args->converter->invalidCharLength++] =
                        (char) sourceChar;

                    if(args->offsets){
                        saveOffsets=args->offsets;
                        args->offsets = args->offsets+(target - args->target);
                    }

                    args->target =target;
                    target =saveTarget;
                    args->source = source;

                    args->converter->fromCharErrorBehaviour ( 
                         args->converter->toUContext, 
                         args, 
                         args->converter->invalidCharBuffer, 
                         args->converter->invalidCharLength, 
                         reason, 
                         err);

                    if(args->offsets){
                        args->offsets = saveOffsets;

                        for (;saveIndex < (args->target - target);saveIndex++) {
                          *(args->offsets)++ = currentOffset;
                        }
                    }
                    target=args->target;
                    args->source  = saveSource;
                    args->target  = saveTarget;
                }
            }

        }
        else{
            *err =U_BUFFER_OVERFLOW_ERROR;
            break;
        }
    }
    if((args->flush==TRUE)
            && (source == sourceLimit) 
            && data->contextCharToUnicode !=0){
        /* if we have ATR in context it is an error */
        if(data->contextCharToUnicode==ATR || data->contextCharToUnicode==EXT){
            *err = U_TRUNCATED_CHAR_FOUND;
        }else{
            WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),
                            *toUnicodeStatus,data->currentDeltaToUnicode,err);
           *toUnicodeStatus = missingCharMarker;
        }

    }
    /* Reset the state of converter if we consumed 
     * the source and flush is true
     */
    if( (source == sourceLimit) && args->flush){
        /*reset converter*/
        _ISCIIReset(args->converter,UCNV_RESET_TO_UNICODE);
    }
    args->target = target;
    args->source = source;
}

/* structure for SafeClone calculations */
struct cloneStruct
{
    UConverter cnv;
    UConverterDataISCII mydata;
};


static UConverter * 
_ISCII_SafeClone(const UConverter *cnv, 
              void *stackBuffer, 
              int32_t *pBufferSize, 
              UErrorCode *status)
{
    struct cloneStruct * localClone;
    int32_t bufferSizeNeeded = sizeof(struct cloneStruct);

    if (U_FAILURE(*status)){
        return 0;
    }

    if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
        *pBufferSize = bufferSizeNeeded;
        return 0;
    }

    localClone = (struct cloneStruct *)stackBuffer;
    uprv_memcpy(&localClone->cnv, cnv, sizeof(UConverter));
    localClone->cnv.isCopyLocal = TRUE;

    uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
    localClone->cnv.extraInfo = &localClone->mydata;

    return &localClone->cnv;
}

static const UConverterImpl _ISCIIImpl={

    UCNV_ISCII,
    
    NULL,
    NULL,
    
    _ISCIIOpen,
    _ISCIIClose,
    _ISCIIReset,
    
    UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
    UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
    UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
    UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
    NULL,
    
    NULL,
    _ISCIIgetName,
    NULL,
    _ISCII_SafeClone
};

static const UConverterStaticData _ISCIIStaticData={
    sizeof(UConverterStaticData),
        "ISCII",
         0, 
         UCNV_IBM, 
         UCNV_ISCII, 
         1, 
         4,
        { 0x1a, 0, 0, 0 },
        0x1,
        FALSE, 
        FALSE,
        0x0,
        0x0,
        { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */

};
   
const UConverterSharedData _ISCIIData={
    sizeof(UConverterSharedData),
        ~((uint32_t) 0),
        NULL, 
        NULL, 
        &_ISCIIStaticData, 
        FALSE, 
        &_ISCIIImpl, 
        0
};


Generated by  Doxygen 1.6.0   Back to index