15 files changed, 86 insertions, 123 deletions
diff --git a/doc/examples/asn1.cpp b/doc/examples/asn1.cpp
index d3b4a65b9..81d3b4b5d 100644
--- a/doc/examples/asn1.cpp
+++ b/doc/examples/asn1.cpp
@@ -226,7 +226,9 @@ void decode(BER_Decoder& decoder, u32bit level)
          ASN1_String str;
          data.decode(str);
          if(UTF8_TERMINAL)
-            emit(type_name(type_tag), level, length, iso2utf(str.iso_8859()));
+            emit(type_name(type_tag), level, length,
+                 Charset::transcode(str.iso_8859(),
+                                    LATIN1_CHARSET, UTF8_CHARSET));
          else
             emit(type_name(type_tag), level, length, str.iso_8859());
          }
diff --git a/include/charset.h b/include/charset.h
index a79530f05..59ac730c1 100644
--- a/include/charset.h
+++ b/include/charset.h
@@ -13,7 +13,7 @@
 namespace Botan {
 
 /*************************************************
-* Character Set Transcoder Object                *
+* Character Set Transcoder Interface             *
 *************************************************/
 class Charset_Transcoder
    {
@@ -24,21 +24,21 @@ class Charset_Transcoder
       virtual ~Charset_Transcoder() {}
    };
 
+namespace Charset {
+
 /*************************************************
 * Character Set Handling                         *
 *************************************************/
+std::string transcode(const std::string&, Character_Set, Character_Set);
+
 bool is_digit(char);
 bool is_space(char);
-char to_lower(char);
+bool caseless_cmp(char, char);
 
 byte char2digit(char);
 char digit2char(byte);
 
-std::string local2iso(const std::string&);
-std::string iso2local(const std::string&);
-
-std::string utf2iso(const std::string&);
-std::string iso2utf(const std::string&);
+}
 
 }
 
diff --git a/src/asn1_alt.cpp b/src/asn1_alt.cpp
index 2bc6f01db..6f5ce6c11 100644
--- a/src/asn1_alt.cpp
+++ b/src/asn1_alt.cpp
@@ -187,7 +187,9 @@ void AlternativeName::decode_from(BER_Decoder& source)
          }
       else if(tag == 1 || tag == 2 || tag == 6)
          {
-         const std::string value = iso2local(ASN1::to_string(obj));
+         const std::string value = Charset::transcode(ASN1::to_string(obj),
+                                                      LATIN1_CHARSET,
+                                                      LOCAL_CHARSET);
 
          if(tag == 1) add_attribute("RFC822", value);
          if(tag == 2) add_attribute("DNS", value);
diff --git a/src/asn1_str.cpp b/src/asn1_str.cpp
index f3c8584ed..c67480267 100644
--- a/src/asn1_str.cpp
+++ b/src/asn1_str.cpp
@@ -73,7 +73,8 @@ bool is_string_type(ASN1_Tag tag)
 *************************************************/
 ASN1_String::ASN1_String(const std::string& str, ASN1_Tag t) : tag(t)
    {
-   iso_8859_str = local2iso(str);
+   iso_8859_str = Charset::transcode(str, LOCAL_CHARSET, LATIN1_CHARSET);
+
    if(tag == DIRECTORY_STRING)
       tag = choose_encoding(iso_8859_str);
 
@@ -93,7 +94,7 @@ ASN1_String::ASN1_String(const std::string& str, ASN1_Tag t) : tag(t)
 *************************************************/
 ASN1_String::ASN1_String(const std::string& str)
    {
-   iso_8859_str = local2iso(str);
+   iso_8859_str = Charset::transcode(str, LOCAL_CHARSET, LATIN1_CHARSET);
    tag = choose_encoding(iso_8859_str);
    }
 
@@ -110,7 +111,7 @@ std::string ASN1_String::iso_8859() const
 *************************************************/
 std::string ASN1_String::value() const
    {
-   return iso2local(iso_8859_str);
+   return Charset::transcode(iso_8859_str, LATIN1_CHARSET, LOCAL_CHARSET);
    }
 
 /*************************************************
@@ -128,7 +129,7 @@ void ASN1_String::encode_into(DER_Encoder& encoder) const
    {
    std::string value = iso_8859();
    if(tagging() == UTF8_STRING)
-      value = iso2utf(value);
+      value = Charset::transcode(value, LATIN1_CHARSET, UTF8_CHARSET);
    encoder.add_object(tagging(), UNIVERSAL, value);
    }
 
@@ -140,6 +141,7 @@ namespace {
 // FIXME: inline this
 std::string convert_string(BER_Object obj, ASN1_Tag type)
    {
+   // FIMXE: add a UNC16_CHARSET transcoder op
    if(type == BMP_STRING)
       {
       if(obj.value.size() % 2 == 1)
@@ -156,12 +158,18 @@ std::string convert_string(BER_Object obj, ASN1_Tag type)
 
          value += (char)c2;
          }
-      return iso2local(value);
+      return Charset::transcode(value, LATIN1_CHARSET, LOCAL_CHARSET);
       }
    else if(type == UTF8_STRING)
-      return iso2local(utf2iso(ASN1::to_string(obj)));
+      {
+      return Charset::transcode(ASN1::to_string(obj), UTF8_CHARSET,
+                                LOCAL_CHARSET);
+      }
    else
-      return iso2local(ASN1::to_string(obj));
+      {
+      return Charset::transcode(ASN1::to_string(obj),
+                                LATIN1_CHARSET, LOCAL_CHARSET);
+      }
    }
 
 }
diff --git a/src/asn1_tm.cpp b/src/asn1_tm.cpp
index 5428ac424..25b434609 100644
--- a/src/asn1_tm.cpp
+++ b/src/asn1_tm.cpp
@@ -87,7 +87,7 @@ void X509_Time::set_to(const std::string& time_str)
 
    for(u32bit j = 0; j != time_str.size(); ++j)
       {
-      if(is_digit(time_str[j]))
+      if(Charset::is_digit(time_str[j]))
          current += time_str[j];
       else
          {
@@ -176,7 +176,9 @@ void X509_Time::encode_into(DER_Encoder& der) const
    {
    if(tag != GENERALIZED_TIME && tag != UTC_TIME)
       throw Invalid_Argument("X509_Time: Bad encoding tag");
-   der.add_object(tag, UNIVERSAL, local2iso(as_string()));
+   der.add_object(tag, UNIVERSAL,
+                  Charset::transcode(as_string(),
+                                     LOCAL_CHARSET, LATIN1_CHARSET));
    }
 
 /*************************************************
@@ -306,7 +308,9 @@ s32bit validity_check(const X509_Time& start, const X509_Time& end,
 void X509_Time::decode_from(BER_Decoder& source)
    {
    BER_Object ber_time = source.get_next_object();
-   set_to(iso2local(ASN1::to_string(ber_time)), ber_time.type_tag);
+   set_to(Charset::transcode(ASN1::to_string(ber_time),
+                             LATIN1_CHARSET, LOCAL_CHARSET),
+          ber_time.type_tag);
    }
 
 }
diff --git a/src/base64.cpp b/src/base64.cpp
index 22c6d78c5..9511525a6 100644
--- a/src/base64.cpp
+++ b/src/base64.cpp
@@ -169,8 +169,12 @@ void Base64_Decoder::decode_and_send(const byte block[], u32bit length)
 *************************************************/
 void Base64_Decoder::handle_bad_char(byte c)
    {
-   if(checking == NONE) return;
-   if((checking == IGNORE_WS) && is_space(c)) return;
+   if(checking == NONE)
+      return;
+
+   if((checking == IGNORE_WS) && Charset::is_space(c))
+      return;
+
    throw Decoding_Error("Base64_Decoder: Invalid base64 character: " + c);
    }
 
diff --git a/src/big_code.cpp b/src/big_code.cpp
index 235665f7f..1a9502ee8 100644
--- a/src/big_code.cpp
+++ b/src/big_code.cpp
@@ -30,7 +30,7 @@ void BigInt::encode(byte output[], const BigInt& n, Base base)
       const u32bit output_size = n.encoded_size(Octal);
       for(u32bit j = 0; j != output_size; ++j)
          {
-         output[output_size - 1 - j] = digit2char(copy % 8);
+         output[output_size - 1 - j] = Charset::digit2char(copy % 8);
          copy /= 8;
          }
       }
@@ -43,7 +43,8 @@ void BigInt::encode(byte output[], const BigInt& n, Base base)
       for(u32bit j = 0; j != output_size; ++j)
          {
          divide(copy, 10, copy, remainder);
-         output[output_size - 1 - j] = digit2char(remainder.word_at(0));
+         output[output_size - 1 - j] =
+            Charset::digit2char(remainder.word_at(0));
          if(copy.is_zero())
             break;
          }
@@ -123,7 +124,7 @@ BigInt BigInt::decode(const byte buf[], u32bit length, Base base)
       const u32bit RADIX = ((base == Decimal) ? 10 : 8);
       for(u32bit j = 0; j != length; ++j)
          {
-         byte x = char2digit(buf[j]);
+         byte x = Charset::char2digit(buf[j]);
          if(x >= RADIX)
             {
             if(RADIX == 10)
diff --git a/src/charset.cpp b/src/charset.cpp
index 7aeccaf98..3a28cce92 100644
--- a/src/charset.cpp
+++ b/src/charset.cpp
@@ -6,10 +6,22 @@
 #include <botan/charset.h>
 #include <botan/hex.h>
 #include <botan/base64.h>
+#include <botan/libstate.h>
 #include <ctype.h>
 
 namespace Botan {
 
+namespace Charset {
+
+/*************************************************
+* Perform character set transcoding              *
+*************************************************/
+std::string transcode(const std::string& str,
+                      Character_Set to, Character_Set from)
+   {
+   return global_state().transcode(str, to, from);
+   }
+
 /*************************************************
 * Check if a character represents a digit        *
 *************************************************/
@@ -70,28 +82,14 @@ char digit2char(byte b)
    }
 
 /*************************************************
-* Return the lower-case representation           *
-*************************************************/
-char to_lower(char c)
-   {
-   return tolower((unsigned char)c);
-   }
-
-/*************************************************
-* Convert from local charset to ISO 8859-1       *
+* Case-insensitive character comparison          *
 *************************************************/
-std::string local2iso(const std::string& str)
+bool caseless_cmp(char a, char b)
    {
-   return str;
+   return (tolower((unsigned char)a) == tolower((unsigned char)b));
    }
 
-/*************************************************
-* Convert from ISO 8859-1 to local charset       *
-*************************************************/
-std::string iso2local(const std::string& str)
-   {
-   return str;
-   }
+}
 
 /*************************************************
 * Hex Encoder Lookup Tables                      *
diff --git a/src/conf.cpp b/src/conf.cpp
index 6fa616b3c..0266444fe 100644
--- a/src/conf.cpp
+++ b/src/conf.cpp
@@ -60,7 +60,7 @@ u32bit get_time(const std::string& name)
 
    u32bit scale = 1;
 
-   if(is_digit(suffix))
+   if(Charset::is_digit(suffix))
       value += suffix;
    else if(suffix == 's')
       scale = 1;
diff --git a/src/hex.cpp b/src/hex.cpp
index d6b710656..42825b334 100644
--- a/src/hex.cpp
+++ b/src/hex.cpp
@@ -130,8 +130,12 @@ bool Hex_Decoder::is_valid(byte in)
 *************************************************/
 void Hex_Decoder::handle_bad_char(byte c)
    {
-   if(checking == NONE) return;
-   if((checking == IGNORE_WS) && is_space(c)) return;
+   if(checking == NONE)
+      return;
+
+   if((checking == IGNORE_WS) && Charset::is_space(c))
+      return;
+
    throw Decoding_Error("Hex_Decoder: Invalid hex character: " +
                         to_string(c));
    }
diff --git a/src/inifile.cpp b/src/inifile.cpp
index 44830ef22..22b8aa1a3 100644
--- a/src/inifile.cpp
+++ b/src/inifile.cpp
@@ -34,7 +34,7 @@ std::string strip_whitespace(const std::string& line)
          return new_line;
       if(c == '\\' && !is_escaped) { is_escaped = true; continue; }
 
-      if(is_space(c) && !in_quote && !in_string && !is_escaped)
+      if(Charset::is_space(c) && !in_quote && !in_string && !is_escaped)
          continue;
 
       new_line += c;
diff --git a/src/init_def.cpp b/src/init_def.cpp
index 3b071d7eb..9d07d730b 100644
--- a/src/init_def.cpp
+++ b/src/init_def.cpp
@@ -12,6 +12,7 @@
 #include <botan/eng_def.h>
 #include <botan/fips140.h>
 #include <botan/x931_rng.h>
+#include <botan/def_char.h>
 
 namespace Botan {
 
@@ -107,6 +108,8 @@ void initialize(const std::string& arg_string)
       }
    global_state().add_engine(new Default_Engine);
 
+   global_state().set_transcoder(new Default_Charset_Transcoder);
+
    global_state().set_prng(new ANSI_X931_RNG);
    std::vector<EntropySource*> sources = Modules::get_entropy_sources();
    for(u32bit j = 0; j != sources.size(); ++j)
diff --git a/src/openpgp.cpp b/src/openpgp.cpp
index fec292d1e..abb10a5cb 100644
--- a/src/openpgp.cpp
+++ b/src/openpgp.cpp
@@ -116,7 +116,7 @@ SecureVector<byte> decode(DataSource& source, std::string& label,
 
       end_of_headers = true;
       for(u32bit j = 0; j != this_header.length(); ++j)
-         if(!is_space(this_header[j]))
+         if(!Charset::is_space(this_header[j]))
             end_of_headers = false;
 
       if(!end_of_headers)
diff --git a/src/parsing.cpp b/src/parsing.cpp
index 574b7eaec..4939cd082 100644
--- a/src/parsing.cpp
+++ b/src/parsing.cpp
@@ -20,7 +20,7 @@ u32bit to_u32bit(const std::string& number)
       {
       const u32bit OVERFLOW_MARK = 0xFFFFFFFF / 10;
 
-      byte digit = char2digit(*j);
+      byte digit = Charset::char2digit(*j);
 
       if((n > OVERFLOW_MARK) || (n == OVERFLOW_MARK && digit > 5))
          throw Decoding_Error("to_u32bit: Integer overflow");
@@ -41,7 +41,7 @@ std::string to_string(u64bit n, u32bit min_len)
       {
       while(n > 0)
          {
-         lenstr = digit2char(n % 10) + lenstr;
+         lenstr = Charset::digit2char(n % 10) + lenstr;
          n /= 10;
          }
       }
@@ -179,31 +179,31 @@ bool x500_name_cmp(const std::string& name1, const std::string& name2)
    std::string::const_iterator p1 = name1.begin();
    std::string::const_iterator p2 = name2.begin();
 
-   while((p1 != name1.end()) && is_space(*p1)) ++p1;
-   while((p2 != name2.end()) && is_space(*p2)) ++p2;
+   while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1;
+   while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2;
 
    while(p1 != name1.end() && p2 != name2.end())
       {
-      if(is_space(*p1))
+      if(Charset::is_space(*p1))
          {
-         if(!is_space(*p2))
+         if(!Charset::is_space(*p2))
             return false;
 
-         while((p1 != name1.end()) && is_space(*p1)) ++p1;
-         while((p2 != name2.end()) && is_space(*p2)) ++p2;
+         while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1;
+         while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2;
 
          if(p1 == name1.end() && p2 == name2.end())
             return true;
          }
 
-      if(to_lower(*p1) != to_lower(*p2))
+      if(!Charset::caseless_cmp(*p1, *p2))
          return false;
       ++p1;
       ++p2;
       }
 
-   while((p1 != name1.end()) && is_space(*p1)) ++p1;
-   while((p2 != name2.end()) && is_space(*p2)) ++p2;
+   while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1;
+   while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2;
 
    if((p1 != name1.end()) || (p2 != name2.end()))
       return false;
@@ -211,61 +211,6 @@ bool x500_name_cmp(const std::string& name1, const std::string& name2)
    }
 
 /*************************************************
-* Convert from UTF-8 to ISO 8859-1               *
-*************************************************/
-std::string utf2iso(const std::string& utf8)
-   {
-   std::string iso8859;
-
-   u32bit position = 0;
-   while(position != utf8.size())
-      {
-      const byte c1 = (byte)utf8[position++];
-
-      if(c1 <= 0x7F)
-         iso8859 += (char)c1;
-      else if(c1 >= 0xC0 && c1 <= 0xC7)
-         {
-         if(position == utf8.size())
-            throw Decoding_Error("UTF-8: sequence truncated");
-
-         const byte c2 = (byte)utf8[position++];
-         const byte iso_char = ((c1 & 0x07) << 6) | (c2 & 0x3F);
-
-         if(iso_char <= 0x7F)
-            throw Decoding_Error("UTF-8: sequence longer than needed");
-
-         iso8859 += (char)iso_char;
-         }
-      else
-         throw Decoding_Error("UTF-8: Unicode chars not in Latin1 used");
-      }
-
-   return iso8859;
-   }
-
-/*************************************************
-* Convert from ISO 8859-1 to UTF-8               *
-*************************************************/
-std::string iso2utf(const std::string& iso8859)
-   {
-   std::string utf8;
-   for(u32bit j = 0; j != iso8859.size(); ++j)
-      {
-      const byte c = (byte)iso8859[j];
-
-      if(c <= 0x7F)
-         utf8 += (char)c;
-      else
-         {
-         utf8 += (char)(0xC0 | (c >> 6));
-         utf8 += (char)(0x80 | (c & 0x3F));
-         }
-      }
-   return utf8;
-   }
-
-/*************************************************
 * Parse and compute an arithmetic expression     *
 *************************************************/
 u32bit parse_expr(const std::string& expr)
diff --git a/src/x509find.cpp b/src/x509find.cpp
index 2ba430caf..f78c037b5 100644
--- a/src/x509find.cpp
+++ b/src/x509find.cpp
@@ -20,21 +20,13 @@ namespace {
 typedef bool (*compare_fn)(const std::string&, const std::string&);
 
 /*************************************************
-* Predicate for caseless searching               *
-*************************************************/
-bool caseless_cmp(char a, char b)
-   {
-   return (to_lower(a) == to_lower(b));
-   }
-
-/*************************************************
 * Compare based on case-insensive substrings     *
 *************************************************/
 bool substring_match(const std::string& searching_for,
                      const std::string& found)
    {
    if(std::search(found.begin(), found.end(), searching_for.begin(),
-                  searching_for.end(), caseless_cmp) != found.end())
+                  searching_for.end(), Charset::caseless_cmp) != found.end())
       return true;
    return false;
    }
@@ -48,7 +40,7 @@ bool ignore_case(const std::string& searching_for, const std::string& found)
       return false;
 
    return std::equal(found.begin(), found.end(),
-                     searching_for.begin(), caseless_cmp);
+                     searching_for.begin(), Charset::caseless_cmp);
    }
 
 /*************************************************