/* ******************************************************************************* * * Copyright (C) 1999-2003, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: store.c * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2003-02-06 * created by: Ram Viswanadha * */ #include #include #include "unicode/utypes.h" #include "unicode/uchar.h" #include "cmemory.h" #include "cstring.h" #include "filestrm.h" #include "unicode/udata.h" #include "utrie.h" #include "unicode/uset.h" #include "unewdata.h" #include "genidna.h" #ifdef WIN32 # pragma warning(disable: 4100) #endif #define DO_DEBUG_OUT 0 /** This is a simple Trie with the following structure 16-bit IDN sets: Each 16-bit IDN word contains: 0..2 Category flags Contains the enum values IDNStates 3..4 Contains the length of the mapping If length of the mapping is < 2 the length is stored If length of the mapping is > 2 then _IDNA_LENGTH_IN_MAPPING_TABLE enum is stored and the length of mapping is stored in the first index in the data array 5..16 Contains the index into the data array that contains the mapping If it contains _IDNA_MAP_TO_NOTHING, then the codepoint is stripped from the input */ /* file data ---------------------------------------------------------------- */ /* indexes[] value names */ #if UCONFIG_NO_IDNA /* dummy UDataInfo cf. udata.h */ static UDataInfo dataInfo = { sizeof(UDataInfo), 0, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, U_SIZEOF_UCHAR, 0, { 0, 0, 0, 0 }, /* dummy dataFormat */ { 0, 0, 0, 0 }, /* dummy formatVersion */ { 0, 0, 0, 0 } /* dummy dataVersion */ }; #else static int32_t indexes[_IDNA_INDEX_TOP]={ 0 }; static uint16_t mappingData[_IDNA_MAPPING_DATA_SIZE]={0}; /* UDataInfo cf. udata.h */ static UDataInfo dataInfo={ sizeof(UDataInfo), 0, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, U_SIZEOF_UCHAR, 0, { 0x49, 0x44, 0x4e, 0x41 }, /* dataFormat="IDNA" */ { 2, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */ { 3, 2, 0, 0 } /* dataVersion (Unicode version) */ }; void setUnicodeVersion(const char *v) { UVersionInfo version; u_versionFromString(version, v); uprv_memcpy(dataInfo.dataVersion, version, 4); } static UNewTrie idnTrie={ {0},0,0,0,0,0,0,0,0,{0} }; static int32_t currentIndex = 1; /* the current index into the data trie */ static int32_t maxLength = 0; /* maximum length of mapping string */ #define MAX_DATA_LENGTH 11500 extern void init() { /* initialize the two tries */ if(NULL==utrie_open(&idnTrie, NULL, MAX_DATA_LENGTH, 0, FALSE)) { fprintf(stderr, "error: failed to initialize tries\n"); exit(U_MEMORY_ALLOCATION_ERROR); } } static void store(uint32_t codepoint, uint32_t* mapping, int32_t length, uint32_t flags, UErrorCode* status){ uint32_t trieWord = 0; int32_t i =0; if(flags == _IDNA_MAP_TO_NOTHING){ trieWord = flags << 5; }else{ if(length==0){ trieWord = flags; }else{ int32_t adjustedLen = 0; int32_t i=0; /* int32_t delta; if(length==1 && (delta=(int32_t)codepoint-(int32_t)mapping[0])>=-4096 && delta<=4095) { printf("mapping of U+%04lx to U+%04lx could fit into a 13-bit delta (0x%lx)\n", codepoint, mapping[0], delta); } */ /* set the 0..2 bits the flags */ trieWord = flags; /* figure out the real length */ for(i=0; i 0xFFFF){ adjustedLen +=2; }else{ adjustedLen++; } } length = adjustedLen; /* set the 3..4 bits the length */ if(length > 2){ trieWord += _IDNA_LENGTH_IN_MAPPING_TABLE << 3; }else{ trieWord += (uint32_t)((length)<<3); } if(length > maxLength) maxLength = length; /* get the current index in the data array * and store in 5..15 bits */ trieWord += currentIndex << 5; /* load mapping into the data array */ i = 0; if(trieWord > 0xFFFF){ fprintf(stderr,"size of trie word is greater than 0xFFFF.\n"); } /* set the length in mapping table */ if(length > 2){ mappingData[currentIndex++] = (uint16_t)length; } while(i