Move lib into src

author: lloyd <[email protected]> 2014-01-10 03:41:59 +0000
committer: lloyd <[email protected]> 2014-01-10 03:41:59 +0000
commit: 6894dca64c04936d07048c0e8cbf7e25858548c3 (patch)
tree: 5d572bfde9fe667dab14e3f04b5285a85d8acd95 /src/lib/utils/charset.cpp
parent: 9efa3be92442afb3d0b69890a36c7f122df18eda (diff)
1 files changed, 201 insertions, 0 deletions
diff --git a/src/lib/utils/charset.cpp b/src/lib/utils/charset.cpp
new file mode 100644
index 000000000..7ee637f80
--- /dev/null
+++ b/src/lib/utils/charset.cpp
@@ -0,0 +1,201 @@
+/*
+* Character Set Handling
+* (C) 1999-2007 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#include <botan/charset.h>
+#include <botan/parsing.h>
+#include <botan/exceptn.h>
+#include <cctype>
+
+namespace Botan {
+
+namespace Charset {
+
+namespace {
+
+/*
+* Convert from UCS-2 to ISO 8859-1
+*/
+std::string ucs2_to_latin1(const std::string& ucs2)
+   {
+   if(ucs2.size() % 2 == 1)
+      throw Decoding_Error("UCS-2 string has an odd number of bytes");
+
+   std::string latin1;
+
+   for(size_t i = 0; i != ucs2.size(); i += 2)
+      {
+      const byte c1 = ucs2[i];
+      const byte c2 = ucs2[i+1];
+
+      if(c1 != 0)
+         throw Decoding_Error("UCS-2 has non-Latin1 characters");
+
+      latin1 += static_cast<char>(c2);
+      }
+
+   return latin1;
+   }
+
+/*
+* Convert from UTF-8 to ISO 8859-1
+*/
+std::string utf8_to_latin1(const std::string& utf8)
+   {
+   std::string iso8859;
+
+   size_t position = 0;
+   while(position != utf8.size())
+      {
+      const byte c1 = static_cast<byte>(utf8[position++]);
+
+      if(c1 <= 0x7F)
+         iso8859 += static_cast<char>(c1);
+      else if(c1 >= 0xC0 && c1 <= 0xC7)
+         {
+         if(position == utf8.size())
+            throw Decoding_Error("UTF-8: sequence truncated");
+
+         const byte c2 = static_cast<byte>(utf8[position++]);
+         const byte iso_char = ((c1 & 0x07) << 6) | (c2 & 0x3F);
+
+         if(iso_char <= 0x7F)
+            throw Decoding_Error("UTF-8: sequence longer than needed");
+
+         iso8859 += static_cast<char>(iso_char);
+         }
+      else
+         throw Decoding_Error("UTF-8: Unicode chars not in Latin1 used");
+      }
+
+   return iso8859;
+   }
+
+/*
+* Convert from ISO 8859-1 to UTF-8
+*/
+std::string latin1_to_utf8(const std::string& iso8859)
+   {
+   std::string utf8;
+   for(size_t i = 0; i != iso8859.size(); ++i)
+      {
+      const byte c = static_cast<byte>(iso8859[i]);
+
+      if(c <= 0x7F)
+         utf8 += static_cast<char>(c);
+      else
+         {
+         utf8 += static_cast<char>((0xC0 | (c >> 6)));
+         utf8 += static_cast<char>((0x80 | (c & 0x3F)));
+         }
+      }
+   return utf8;
+   }
+
+}
+
+/*
+* Perform character set transcoding
+*/
+std::string transcode(const std::string& str,
+                      Character_Set to, Character_Set from)
+   {
+   if(to == LOCAL_CHARSET)
+      to = LATIN1_CHARSET;
+   if(from == LOCAL_CHARSET)
+      from = LATIN1_CHARSET;
+
+   if(to == from)
+      return str;
+
+   if(from == LATIN1_CHARSET && to == UTF8_CHARSET)
+      return latin1_to_utf8(str);
+   if(from == UTF8_CHARSET && to == LATIN1_CHARSET)
+      return utf8_to_latin1(str);
+   if(from == UCS2_CHARSET && to == LATIN1_CHARSET)
+      return ucs2_to_latin1(str);
+
+   throw Invalid_Argument("Unknown transcoding operation from " +
+                          std::to_string(from) + " to " + std::to_string(to));
+   }
+
+/*
+* Check if a character represents a digit
+*/
+bool is_digit(char c)
+   {
+   if(c == '0' || c == '1' || c == '2' || c == '3' || c == '4' ||
+      c == '5' || c == '6' || c == '7' || c == '8' || c == '9')
+      return true;
+   return false;
+   }
+
+/*
+* Check if a character represents whitespace
+*/
+bool is_space(char c)
+   {
+   if(c == ' ' || c == '\t' || c == '\n' || c == '\r')
+      return true;
+   return false;
+   }
+
+/*
+* Convert a character to a digit
+*/
+byte char2digit(char c)
+   {
+   switch(c)
+      {
+      case '0': return 0;
+      case '1': return 1;
+      case '2': return 2;
+      case '3': return 3;
+      case '4': return 4;
+      case '5': return 5;
+      case '6': return 6;
+      case '7': return 7;
+      case '8': return 8;
+      case '9': return 9;
+      }
+
+   throw Invalid_Argument("char2digit: Input is not a digit character");
+   }
+
+/*
+* Convert a digit to a character
+*/
+char digit2char(byte b)
+   {
+   switch(b)
+      {
+      case 0: return '0';
+      case 1: return '1';
+      case 2: return '2';
+      case 3: return '3';
+      case 4: return '4';
+      case 5: return '5';
+      case 6: return '6';
+      case 7: return '7';
+      case 8: return '8';
+      case 9: return '9';
+      }
+
+   throw Invalid_Argument("digit2char: Input is not a digit");
+   }
+
+/*
+* Case-insensitive character comparison
+*/
+bool caseless_cmp(char a, char b)
+   {
+   return (std::tolower(static_cast<unsigned char>(a)) ==
+           std::tolower(static_cast<unsigned char>(b)));
+   }
+
+}
+
+}
author	lloyd <[email protected]>	2014-01-10 03:41:59 +0000
committer	lloyd <[email protected]>	2014-01-10 03:41:59 +0000
commit	6894dca64c04936d07048c0e8cbf7e25858548c3 (patch)
tree	5d572bfde9fe667dab14e3f04b5285a85d8acd95 /src/lib/utils/charset.cpp
parent	9efa3be92442afb3d0b69890a36c7f122df18eda (diff)