aboutsummaryrefslogtreecommitdiffstats
path: root/src/utils/charset.cpp
diff options
context:
space:
mode:
authorlloyd <[email protected]>2008-09-29 00:00:47 +0000
committerlloyd <[email protected]>2008-09-29 00:00:47 +0000
commitab81a9d95264914ab34cb4e7134f827e3128c326 (patch)
tree708c6681dd9fc7e6c1e1638db1ba67baea4134dc /src/utils/charset.cpp
parent45c4fcfce66d894dda6b1d29a92fa47515b944af (diff)
Move charset.cpp to utils with header
Diffstat (limited to 'src/utils/charset.cpp')
-rw-r--r--src/utils/charset.cpp199
1 files changed, 199 insertions, 0 deletions
diff --git a/src/utils/charset.cpp b/src/utils/charset.cpp
new file mode 100644
index 000000000..73c520be1
--- /dev/null
+++ b/src/utils/charset.cpp
@@ -0,0 +1,199 @@
+/*************************************************
+* Character Set Handling Source File *
+* (C) 1999-2007 Jack Lloyd *
+*************************************************/
+
+#include <botan/charset.h>
+#include <botan/parsing.h>
+#include <botan/exceptn.h>
+#include <cctype>
+
+namespace Botan {
+
+namespace Charset {
+
+namespace {
+
+/*************************************************
+* Convert from UCS-2 to ISO 8859-1 *
+*************************************************/
+std::string ucs2_to_latin1(const std::string& ucs2)
+ {
+ if(ucs2.size() % 2 == 1)
+ throw Decoding_Error("UCS-2 string has an odd number of bytes");
+
+ std::string latin1;
+
+ for(u32bit j = 0; j != ucs2.size(); j += 2)
+ {
+ const byte c1 = ucs2[j];
+ const byte c2 = ucs2[j+1];
+
+ if(c1 != 0)
+ throw Decoding_Error("UCS-2 has non-Latin1 characters");
+
+ latin1 += static_cast<char>(c2);
+ }
+
+ return latin1;
+ }
+
+/*************************************************
+* Convert from UTF-8 to ISO 8859-1 *
+*************************************************/
+std::string utf8_to_latin1(const std::string& utf8)
+ {
+ std::string iso8859;
+
+ u32bit position = 0;
+ while(position != utf8.size())
+ {
+ const byte c1 = static_cast<byte>(utf8[position++]);
+
+ if(c1 <= 0x7F)
+ iso8859 += static_cast<char>(c1);
+ else if(c1 >= 0xC0 && c1 <= 0xC7)
+ {
+ if(position == utf8.size())
+ throw Decoding_Error("UTF-8: sequence truncated");
+
+ const byte c2 = static_cast<byte>(utf8[position++]);
+ const byte iso_char = ((c1 & 0x07) << 6) | (c2 & 0x3F);
+
+ if(iso_char <= 0x7F)
+ throw Decoding_Error("UTF-8: sequence longer than needed");
+
+ iso8859 += static_cast<char>(iso_char);
+ }
+ else
+ throw Decoding_Error("UTF-8: Unicode chars not in Latin1 used");
+ }
+
+ return iso8859;
+ }
+
+/*************************************************
+* Convert from ISO 8859-1 to UTF-8 *
+*************************************************/
+std::string latin1_to_utf8(const std::string& iso8859)
+ {
+ std::string utf8;
+ for(u32bit j = 0; j != iso8859.size(); ++j)
+ {
+ const byte c = static_cast<byte>(iso8859[j]);
+
+ if(c <= 0x7F)
+ utf8 += static_cast<char>(c);
+ else
+ {
+ utf8 += static_cast<char>((0xC0 | (c >> 6)));
+ utf8 += static_cast<char>((0x80 | (c & 0x3F)));
+ }
+ }
+ return utf8;
+ }
+
+}
+
+/*************************************************
+* Perform character set transcoding *
+*************************************************/
+std::string transcode(const std::string& str,
+ Character_Set to, Character_Set from)
+ {
+ if(to == LOCAL_CHARSET)
+ to = LATIN1_CHARSET;
+ if(from == LOCAL_CHARSET)
+ from = LATIN1_CHARSET;
+
+ if(to == from)
+ return str;
+
+ if(from == LATIN1_CHARSET && to == UTF8_CHARSET)
+ return latin1_to_utf8(str);
+ if(from == UTF8_CHARSET && to == LATIN1_CHARSET)
+ return utf8_to_latin1(str);
+ if(from == UCS2_CHARSET && to == LATIN1_CHARSET)
+ return ucs2_to_latin1(str);
+
+ throw Invalid_Argument("Unknown transcoding operation from " +
+ to_string(from) + " to " + to_string(to));
+ }
+
+/*************************************************
+* Check if a character represents a digit *
+*************************************************/
+bool is_digit(char c)
+ {
+ if(c == '0' || c == '1' || c == '2' || c == '3' || c == '4' ||
+ c == '5' || c == '6' || c == '7' || c == '8' || c == '9')
+ return true;
+ return false;
+ }
+
+/*************************************************
+* Check if a character represents whitespace *
+*************************************************/
+bool is_space(char c)
+ {
+ if(c == ' ' || c == '\t' || c == '\n' || c == '\r')
+ return true;
+ return false;
+ }
+
+/*************************************************
+* Convert a character to a digit *
+*************************************************/
+byte char2digit(char c)
+ {
+ switch(c)
+ {
+ case '0': return 0;
+ case '1': return 1;
+ case '2': return 2;
+ case '3': return 3;
+ case '4': return 4;
+ case '5': return 5;
+ case '6': return 6;
+ case '7': return 7;
+ case '8': return 8;
+ case '9': return 9;
+ }
+
+ throw Invalid_Argument("char2digit: Input is not a digit character");
+ }
+
+/*************************************************
+* Convert a digit to a character *
+*************************************************/
+char digit2char(byte b)
+ {
+ switch(b)
+ {
+ case 0: return '0';
+ case 1: return '1';
+ case 2: return '2';
+ case 3: return '3';
+ case 4: return '4';
+ case 5: return '5';
+ case 6: return '6';
+ case 7: return '7';
+ case 8: return '8';
+ case 9: return '9';
+ }
+
+ throw Invalid_Argument("digit2char: Input is not a digit");
+ }
+
+/*************************************************
+* Case-insensitive character comparison *
+*************************************************/
+bool caseless_cmp(char a, char b)
+ {
+ return (std::tolower(static_cast<unsigned char>(a)) ==
+ std::tolower(static_cast<unsigned char>(b)));
+ }
+
+}
+
+}