/************************************************* * Character Set Handling Source File * * (C) 1999-2007 Jack Lloyd * *************************************************/ #include #include #include #include #include namespace Botan { namespace Charset { namespace { /************************************************* * Convert from UCS-2 to ISO 8859-1 * *************************************************/ std::string ucs2_to_latin1(const std::string& ucs2) { if(ucs2.size() % 2 == 1) throw Decoding_Error("UCS-2 string has an odd number of bytes"); std::string latin1; for(u32bit j = 0; j != ucs2.size(); j += 2) { const byte c1 = ucs2[j]; const byte c2 = ucs2[j+1]; if(c1 != 0) throw Decoding_Error("UCS-2 has non-Latin1 characters"); latin1 += static_cast(c2); } return latin1; } /************************************************* * Convert from UTF-8 to ISO 8859-1 * *************************************************/ std::string utf8_to_latin1(const std::string& utf8) { std::string iso8859; u32bit position = 0; while(position != utf8.size()) { const byte c1 = static_cast(utf8[position++]); if(c1 <= 0x7F) iso8859 += static_cast(c1); else if(c1 >= 0xC0 && c1 <= 0xC7) { if(position == utf8.size()) throw Decoding_Error("UTF-8: sequence truncated"); const byte c2 = static_cast(utf8[position++]); const byte iso_char = ((c1 & 0x07) << 6) | (c2 & 0x3F); if(iso_char <= 0x7F) throw Decoding_Error("UTF-8: sequence longer than needed"); iso8859 += static_cast(iso_char); } else throw Decoding_Error("UTF-8: Unicode chars not in Latin1 used"); } return iso8859; } /************************************************* * Convert from ISO 8859-1 to UTF-8 * *************************************************/ std::string latin1_to_utf8(const std::string& iso8859) { std::string utf8; for(u32bit j = 0; j != iso8859.size(); ++j) { const byte c = static_cast(iso8859[j]); if(c <= 0x7F) utf8 += static_cast(c); else { utf8 += static_cast((0xC0 | (c >> 6))); utf8 += static_cast((0x80 | (c & 0x3F))); } } return utf8; } } /************************************************* * Perform character set transcoding * *************************************************/ std::string transcode(const std::string& str, Character_Set to, Character_Set from) { if(to == LOCAL_CHARSET) to = LATIN1_CHARSET; if(from == LOCAL_CHARSET) from = LATIN1_CHARSET; if(to == from) return str; if(from == LATIN1_CHARSET && to == UTF8_CHARSET) return latin1_to_utf8(str); if(from == UTF8_CHARSET && to == LATIN1_CHARSET) return utf8_to_latin1(str); if(from == UCS2_CHARSET && to == LATIN1_CHARSET) return ucs2_to_latin1(str); throw Invalid_Argument("Unknown transcoding operation from " + to_string(from) + " to " + to_string(to)); } /************************************************* * Check if a character represents a digit * *************************************************/ bool is_digit(char c) { if(c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || c == '6' || c == '7' || c == '8' || c == '9') return true; return false; } /************************************************* * Check if a character represents whitespace * *************************************************/ bool is_space(char c) { if(c == ' ' || c == '\t' || c == '\n' || c == '\r') return true; return false; } /************************************************* * Convert a character to a digit * *************************************************/ byte char2digit(char c) { switch(c) { case '0': return 0; case '1': return 1; case '2': return 2; case '3': return 3; case '4': return 4; case '5': return 5; case '6': return 6; case '7': return 7; case '8': return 8; case '9': return 9; } throw Invalid_Argument("char2digit: Input is not a digit character"); } /************************************************* * Convert a digit to a character * *************************************************/ char digit2char(byte b) { switch(b) { case 0: return '0'; case 1: return '1'; case 2: return '2'; case 3: return '3'; case 4: return '4'; case 5: return '5'; case 6: return '6'; case 7: return '7'; case 8: return '8'; case 9: return '9'; } throw Invalid_Argument("digit2char: Input is not a digit"); } /************************************************* * Case-insensitive character comparison * *************************************************/ bool caseless_cmp(char a, char b) { return (std::tolower(static_cast(a)) == std::tolower(static_cast(b))); } } /************************************************* * Hex Encoder Lookup Tables * *************************************************/ const byte Hex_Encoder::BIN_TO_HEX_UPPER[16] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; const byte Hex_Encoder::BIN_TO_HEX_LOWER[16] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 }; /************************************************* * Base64 Encoder Lookup Table * *************************************************/ const byte Base64_Encoder::BIN_TO_BASE64[64] = { 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B, 0x2F }; /************************************************* * Hex Decoder Lookup Table * *************************************************/ const byte Hex_Decoder::HEX_TO_BIN[256] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; /************************************************* * Base64 Decoder Lookup Table * *************************************************/ const byte Base64_Decoder::BASE64_TO_BIN[256] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x3E, 0x80, 0x80, 0x80, 0x3F, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; }