diff options
author | lloyd <[email protected]> | 2006-06-19 09:18:22 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2006-06-19 09:18:22 +0000 |
commit | 6413b5d29a781a231d2f331e4191b68fb88a27d9 (patch) | |
tree | 507ebe91b21a573ab05b348c92c886b6347b0e2a | |
parent | 4bbacf4aae4b8b98d02ea887593492c1b9dacdc7 (diff) |
Add default character set conversion classes
-rw-r--r-- | include/def_char.h | 25 | ||||
-rw-r--r-- | src/def_char.cpp | 95 |
2 files changed, 120 insertions, 0 deletions
diff --git a/include/def_char.h b/include/def_char.h new file mode 100644 index 000000000..c4abfb36b --- /dev/null +++ b/include/def_char.h @@ -0,0 +1,25 @@ +/************************************************* +* Default Character Set Handling Header File * +* (C) 1999-2006 The Botan Project * +*************************************************/ + +#ifndef BOTAN_DEFAULT_CHARSET_H__ +#define BOTAN_DEFAULT_CHARSET_H__ + +#include <botan/charset.h> + +namespace Botan { + +/************************************************* +* Default Character Set Transcoder Object * +*************************************************/ +class Default_Charset_Transcoder : public Charset_Transcoder + { + public: + std::string transcode(const std::string&, + Character_Set, Character_Set) const; + }; + +} + +#endif diff --git a/src/def_char.cpp b/src/def_char.cpp new file mode 100644 index 000000000..a5aca6d86 --- /dev/null +++ b/src/def_char.cpp @@ -0,0 +1,95 @@ +/************************************************* +* Default Character Set Handling Source File * +* (C) 1999-2006 The Botan Project * +*************************************************/ + +#include <botan/def_char.h> +#include <botan/exceptn.h> +#include <botan/parsing.h> + +namespace Botan { + +namespace { + +/************************************************* +* Convert from UTF-8 to ISO 8859-1 * +*************************************************/ +std::string utf8_to_latin1(const std::string& utf8) + { + std::string iso8859; + + u32bit position = 0; + while(position != utf8.size()) + { + const byte c1 = (byte)utf8[position++]; + + if(c1 <= 0x7F) + iso8859 += (char)c1; + else if(c1 >= 0xC0 && c1 <= 0xC7) + { + if(position == utf8.size()) + throw Decoding_Error("UTF-8: sequence truncated"); + + const byte c2 = (byte)utf8[position++]; + const byte iso_char = ((c1 & 0x07) << 6) | (c2 & 0x3F); + + if(iso_char <= 0x7F) + throw Decoding_Error("UTF-8: sequence longer than needed"); + + iso8859 += (char)iso_char; + } + else + throw Decoding_Error("UTF-8: Unicode chars not in Latin1 used"); + } + + return iso8859; + } + +/************************************************* +* Convert from ISO 8859-1 to UTF-8 * +*************************************************/ +std::string latin1_to_utf8(const std::string& iso8859) + { + std::string utf8; + for(u32bit j = 0; j != iso8859.size(); ++j) + { + const byte c = (byte)iso8859[j]; + + if(c <= 0x7F) + utf8 += (char)c; + else + { + utf8 += (char)(0xC0 | (c >> 6)); + utf8 += (char)(0x80 | (c & 0x3F)); + } + } + return utf8; + } + +} + +/************************************************* +* Transcode between character sets * +*************************************************/ +std::string Default_Charset_Transcoder::transcode(const std::string& str, + Character_Set to, + Character_Set from) const + { + if(to == LOCAL_CHARSET) + to = LATIN1_CHARSET; + if(from == LOCAL_CHARSET) + from = LATIN1_CHARSET; + + if(to == from) + return str; + + if(from == LATIN1_CHARSET && to == UTF8_CHARSET) + return latin1_to_utf8(str); + if(from == UTF8_CHARSET && to == LATIN1_CHARSET) + return utf8_to_latin1(str); + + throw Invalid_Argument("Unknown transcoding operation from " + + to_string(from) + " to " + to_string(to)); + } + +} |