Add default character set conversion classes

author: lloyd <[email protected]> 2006-06-19 09:18:22 +0000
committer: lloyd <[email protected]> 2006-06-19 09:18:22 +0000
commit: 6413b5d29a781a231d2f331e4191b68fb88a27d9 (patch)
tree: 507ebe91b21a573ab05b348c92c886b6347b0e2a
parent: 4bbacf4aae4b8b98d02ea887593492c1b9dacdc7 (diff)
2 files changed, 120 insertions, 0 deletions
diff --git a/include/def_char.h b/include/def_char.h
new file mode 100644
index 000000000..c4abfb36b
--- /dev/null
+++ b/include/def_char.h
@@ -0,0 +1,25 @@
+/*************************************************
+* Default Character Set Handling Header File     *
+* (C) 1999-2006 The Botan Project                *
+*************************************************/
+
+#ifndef BOTAN_DEFAULT_CHARSET_H__
+#define BOTAN_DEFAULT_CHARSET_H__
+
+#include <botan/charset.h>
+
+namespace Botan {
+
+/*************************************************
+* Default Character Set Transcoder Object        *
+*************************************************/
+class Default_Charset_Transcoder : public Charset_Transcoder
+   {
+   public:
+      std::string transcode(const std::string&,
+                            Character_Set, Character_Set) const;
+   };
+
+}
+
+#endif
diff --git a/src/def_char.cpp b/src/def_char.cpp
new file mode 100644
index 000000000..a5aca6d86
--- /dev/null
+++ b/src/def_char.cpp
@@ -0,0 +1,95 @@
+/*************************************************
+* Default Character Set Handling Source File     *
+* (C) 1999-2006 The Botan Project                *
+*************************************************/
+
+#include <botan/def_char.h>
+#include <botan/exceptn.h>
+#include <botan/parsing.h>
+
+namespace Botan {
+
+namespace {
+
+/*************************************************
+* Convert from UTF-8 to ISO 8859-1               *
+*************************************************/
+std::string utf8_to_latin1(const std::string& utf8)
+   {
+   std::string iso8859;
+
+   u32bit position = 0;
+   while(position != utf8.size())
+      {
+      const byte c1 = (byte)utf8[position++];
+
+      if(c1 <= 0x7F)
+         iso8859 += (char)c1;
+      else if(c1 >= 0xC0 && c1 <= 0xC7)
+         {
+         if(position == utf8.size())
+            throw Decoding_Error("UTF-8: sequence truncated");
+
+         const byte c2 = (byte)utf8[position++];
+         const byte iso_char = ((c1 & 0x07) << 6) | (c2 & 0x3F);
+
+         if(iso_char <= 0x7F)
+            throw Decoding_Error("UTF-8: sequence longer than needed");
+
+         iso8859 += (char)iso_char;
+         }
+      else
+         throw Decoding_Error("UTF-8: Unicode chars not in Latin1 used");
+      }
+
+   return iso8859;
+   }
+
+/*************************************************
+* Convert from ISO 8859-1 to UTF-8               *
+*************************************************/
+std::string latin1_to_utf8(const std::string& iso8859)
+   {
+   std::string utf8;
+   for(u32bit j = 0; j != iso8859.size(); ++j)
+      {
+      const byte c = (byte)iso8859[j];
+
+      if(c <= 0x7F)
+         utf8 += (char)c;
+      else
+         {
+         utf8 += (char)(0xC0 | (c >> 6));
+         utf8 += (char)(0x80 | (c & 0x3F));
+         }
+      }
+   return utf8;
+   }
+
+}
+
+/*************************************************
+* Transcode between character sets               *
+*************************************************/
+std::string Default_Charset_Transcoder::transcode(const std::string& str,
+                                                  Character_Set to,
+                                                  Character_Set from) const
+   {
+   if(to == LOCAL_CHARSET)
+      to = LATIN1_CHARSET;
+   if(from == LOCAL_CHARSET)
+      from = LATIN1_CHARSET;
+
+   if(to == from)
+      return str;
+
+   if(from == LATIN1_CHARSET && to == UTF8_CHARSET)
+      return latin1_to_utf8(str);
+   if(from == UTF8_CHARSET && to == LATIN1_CHARSET)
+      return utf8_to_latin1(str);
+
+   throw Invalid_Argument("Unknown transcoding operation from " +
+                          to_string(from) + " to " + to_string(to));
+   }
+
+}
author	lloyd <[email protected]>	2006-06-19 09:18:22 +0000
committer	lloyd <[email protected]>	2006-06-19 09:18:22 +0000
commit	6413b5d29a781a231d2f331e4191b68fb88a27d9 (patch)
tree	507ebe91b21a573ab05b348c92c886b6347b0e2a
parent	4bbacf4aae4b8b98d02ea887593492c1b9dacdc7 (diff)