diff options
Diffstat (limited to 'src/utils/charset.cpp')
-rw-r--r-- | src/utils/charset.cpp | 201 |
1 files changed, 0 insertions, 201 deletions
diff --git a/src/utils/charset.cpp b/src/utils/charset.cpp deleted file mode 100644 index 7ee637f80..000000000 --- a/src/utils/charset.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -* Character Set Handling -* (C) 1999-2007 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#include <botan/charset.h> -#include <botan/parsing.h> -#include <botan/exceptn.h> -#include <cctype> - -namespace Botan { - -namespace Charset { - -namespace { - -/* -* Convert from UCS-2 to ISO 8859-1 -*/ -std::string ucs2_to_latin1(const std::string& ucs2) - { - if(ucs2.size() % 2 == 1) - throw Decoding_Error("UCS-2 string has an odd number of bytes"); - - std::string latin1; - - for(size_t i = 0; i != ucs2.size(); i += 2) - { - const byte c1 = ucs2[i]; - const byte c2 = ucs2[i+1]; - - if(c1 != 0) - throw Decoding_Error("UCS-2 has non-Latin1 characters"); - - latin1 += static_cast<char>(c2); - } - - return latin1; - } - -/* -* Convert from UTF-8 to ISO 8859-1 -*/ -std::string utf8_to_latin1(const std::string& utf8) - { - std::string iso8859; - - size_t position = 0; - while(position != utf8.size()) - { - const byte c1 = static_cast<byte>(utf8[position++]); - - if(c1 <= 0x7F) - iso8859 += static_cast<char>(c1); - else if(c1 >= 0xC0 && c1 <= 0xC7) - { - if(position == utf8.size()) - throw Decoding_Error("UTF-8: sequence truncated"); - - const byte c2 = static_cast<byte>(utf8[position++]); - const byte iso_char = ((c1 & 0x07) << 6) | (c2 & 0x3F); - - if(iso_char <= 0x7F) - throw Decoding_Error("UTF-8: sequence longer than needed"); - - iso8859 += static_cast<char>(iso_char); - } - else - throw Decoding_Error("UTF-8: Unicode chars not in Latin1 used"); - } - - return iso8859; - } - -/* -* Convert from ISO 8859-1 to UTF-8 -*/ -std::string latin1_to_utf8(const std::string& iso8859) - { - std::string utf8; - for(size_t i = 0; i != iso8859.size(); ++i) - { - const byte c = static_cast<byte>(iso8859[i]); - - if(c <= 0x7F) - utf8 += static_cast<char>(c); - else - { - utf8 += static_cast<char>((0xC0 | (c >> 6))); - utf8 += static_cast<char>((0x80 | (c & 0x3F))); - } - } - return utf8; - } - -} - -/* -* Perform character set transcoding -*/ -std::string transcode(const std::string& str, - Character_Set to, Character_Set from) - { - if(to == LOCAL_CHARSET) - to = LATIN1_CHARSET; - if(from == LOCAL_CHARSET) - from = LATIN1_CHARSET; - - if(to == from) - return str; - - if(from == LATIN1_CHARSET && to == UTF8_CHARSET) - return latin1_to_utf8(str); - if(from == UTF8_CHARSET && to == LATIN1_CHARSET) - return utf8_to_latin1(str); - if(from == UCS2_CHARSET && to == LATIN1_CHARSET) - return ucs2_to_latin1(str); - - throw Invalid_Argument("Unknown transcoding operation from " + - std::to_string(from) + " to " + std::to_string(to)); - } - -/* -* Check if a character represents a digit -*/ -bool is_digit(char c) - { - if(c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || - c == '5' || c == '6' || c == '7' || c == '8' || c == '9') - return true; - return false; - } - -/* -* Check if a character represents whitespace -*/ -bool is_space(char c) - { - if(c == ' ' || c == '\t' || c == '\n' || c == '\r') - return true; - return false; - } - -/* -* Convert a character to a digit -*/ -byte char2digit(char c) - { - switch(c) - { - case '0': return 0; - case '1': return 1; - case '2': return 2; - case '3': return 3; - case '4': return 4; - case '5': return 5; - case '6': return 6; - case '7': return 7; - case '8': return 8; - case '9': return 9; - } - - throw Invalid_Argument("char2digit: Input is not a digit character"); - } - -/* -* Convert a digit to a character -*/ -char digit2char(byte b) - { - switch(b) - { - case 0: return '0'; - case 1: return '1'; - case 2: return '2'; - case 3: return '3'; - case 4: return '4'; - case 5: return '5'; - case 6: return '6'; - case 7: return '7'; - case 8: return '8'; - case 9: return '9'; - } - - throw Invalid_Argument("digit2char: Input is not a digit"); - } - -/* -* Case-insensitive character comparison -*/ -bool caseless_cmp(char a, char b) - { - return (std::tolower(static_cast<unsigned char>(a)) == - std::tolower(static_cast<unsigned char>(b))); - } - -} - -} |