diff options
-rw-r--r-- | doc/examples/asn1.cpp | 4 | ||||
-rw-r--r-- | include/charset.h | 14 | ||||
-rw-r--r-- | src/asn1_alt.cpp | 4 | ||||
-rw-r--r-- | src/asn1_str.cpp | 22 | ||||
-rw-r--r-- | src/asn1_tm.cpp | 10 | ||||
-rw-r--r-- | src/base64.cpp | 8 | ||||
-rw-r--r-- | src/big_code.cpp | 7 | ||||
-rw-r--r-- | src/charset.cpp | 34 | ||||
-rw-r--r-- | src/conf.cpp | 2 | ||||
-rw-r--r-- | src/hex.cpp | 8 | ||||
-rw-r--r-- | src/inifile.cpp | 2 | ||||
-rw-r--r-- | src/init_def.cpp | 3 | ||||
-rw-r--r-- | src/openpgp.cpp | 2 | ||||
-rw-r--r-- | src/parsing.cpp | 77 | ||||
-rw-r--r-- | src/x509find.cpp | 12 |
15 files changed, 86 insertions, 123 deletions
diff --git a/doc/examples/asn1.cpp b/doc/examples/asn1.cpp index d3b4a65b9..81d3b4b5d 100644 --- a/doc/examples/asn1.cpp +++ b/doc/examples/asn1.cpp @@ -226,7 +226,9 @@ void decode(BER_Decoder& decoder, u32bit level) ASN1_String str; data.decode(str); if(UTF8_TERMINAL) - emit(type_name(type_tag), level, length, iso2utf(str.iso_8859())); + emit(type_name(type_tag), level, length, + Charset::transcode(str.iso_8859(), + LATIN1_CHARSET, UTF8_CHARSET)); else emit(type_name(type_tag), level, length, str.iso_8859()); } diff --git a/include/charset.h b/include/charset.h index a79530f05..59ac730c1 100644 --- a/include/charset.h +++ b/include/charset.h @@ -13,7 +13,7 @@ namespace Botan { /************************************************* -* Character Set Transcoder Object * +* Character Set Transcoder Interface * *************************************************/ class Charset_Transcoder { @@ -24,21 +24,21 @@ class Charset_Transcoder virtual ~Charset_Transcoder() {} }; +namespace Charset { + /************************************************* * Character Set Handling * *************************************************/ +std::string transcode(const std::string&, Character_Set, Character_Set); + bool is_digit(char); bool is_space(char); -char to_lower(char); +bool caseless_cmp(char, char); byte char2digit(char); char digit2char(byte); -std::string local2iso(const std::string&); -std::string iso2local(const std::string&); - -std::string utf2iso(const std::string&); -std::string iso2utf(const std::string&); +} } diff --git a/src/asn1_alt.cpp b/src/asn1_alt.cpp index 2bc6f01db..6f5ce6c11 100644 --- a/src/asn1_alt.cpp +++ b/src/asn1_alt.cpp @@ -187,7 +187,9 @@ void AlternativeName::decode_from(BER_Decoder& source) } else if(tag == 1 || tag == 2 || tag == 6) { - const std::string value = iso2local(ASN1::to_string(obj)); + const std::string value = Charset::transcode(ASN1::to_string(obj), + LATIN1_CHARSET, + LOCAL_CHARSET); if(tag == 1) add_attribute("RFC822", value); if(tag == 2) add_attribute("DNS", value); diff --git a/src/asn1_str.cpp b/src/asn1_str.cpp index f3c8584ed..c67480267 100644 --- a/src/asn1_str.cpp +++ b/src/asn1_str.cpp @@ -73,7 +73,8 @@ bool is_string_type(ASN1_Tag tag) *************************************************/ ASN1_String::ASN1_String(const std::string& str, ASN1_Tag t) : tag(t) { - iso_8859_str = local2iso(str); + iso_8859_str = Charset::transcode(str, LOCAL_CHARSET, LATIN1_CHARSET); + if(tag == DIRECTORY_STRING) tag = choose_encoding(iso_8859_str); @@ -93,7 +94,7 @@ ASN1_String::ASN1_String(const std::string& str, ASN1_Tag t) : tag(t) *************************************************/ ASN1_String::ASN1_String(const std::string& str) { - iso_8859_str = local2iso(str); + iso_8859_str = Charset::transcode(str, LOCAL_CHARSET, LATIN1_CHARSET); tag = choose_encoding(iso_8859_str); } @@ -110,7 +111,7 @@ std::string ASN1_String::iso_8859() const *************************************************/ std::string ASN1_String::value() const { - return iso2local(iso_8859_str); + return Charset::transcode(iso_8859_str, LATIN1_CHARSET, LOCAL_CHARSET); } /************************************************* @@ -128,7 +129,7 @@ void ASN1_String::encode_into(DER_Encoder& encoder) const { std::string value = iso_8859(); if(tagging() == UTF8_STRING) - value = iso2utf(value); + value = Charset::transcode(value, LATIN1_CHARSET, UTF8_CHARSET); encoder.add_object(tagging(), UNIVERSAL, value); } @@ -140,6 +141,7 @@ namespace { // FIXME: inline this std::string convert_string(BER_Object obj, ASN1_Tag type) { + // FIMXE: add a UNC16_CHARSET transcoder op if(type == BMP_STRING) { if(obj.value.size() % 2 == 1) @@ -156,12 +158,18 @@ std::string convert_string(BER_Object obj, ASN1_Tag type) value += (char)c2; } - return iso2local(value); + return Charset::transcode(value, LATIN1_CHARSET, LOCAL_CHARSET); } else if(type == UTF8_STRING) - return iso2local(utf2iso(ASN1::to_string(obj))); + { + return Charset::transcode(ASN1::to_string(obj), UTF8_CHARSET, + LOCAL_CHARSET); + } else - return iso2local(ASN1::to_string(obj)); + { + return Charset::transcode(ASN1::to_string(obj), + LATIN1_CHARSET, LOCAL_CHARSET); + } } } diff --git a/src/asn1_tm.cpp b/src/asn1_tm.cpp index 5428ac424..25b434609 100644 --- a/src/asn1_tm.cpp +++ b/src/asn1_tm.cpp @@ -87,7 +87,7 @@ void X509_Time::set_to(const std::string& time_str) for(u32bit j = 0; j != time_str.size(); ++j) { - if(is_digit(time_str[j])) + if(Charset::is_digit(time_str[j])) current += time_str[j]; else { @@ -176,7 +176,9 @@ void X509_Time::encode_into(DER_Encoder& der) const { if(tag != GENERALIZED_TIME && tag != UTC_TIME) throw Invalid_Argument("X509_Time: Bad encoding tag"); - der.add_object(tag, UNIVERSAL, local2iso(as_string())); + der.add_object(tag, UNIVERSAL, + Charset::transcode(as_string(), + LOCAL_CHARSET, LATIN1_CHARSET)); } /************************************************* @@ -306,7 +308,9 @@ s32bit validity_check(const X509_Time& start, const X509_Time& end, void X509_Time::decode_from(BER_Decoder& source) { BER_Object ber_time = source.get_next_object(); - set_to(iso2local(ASN1::to_string(ber_time)), ber_time.type_tag); + set_to(Charset::transcode(ASN1::to_string(ber_time), + LATIN1_CHARSET, LOCAL_CHARSET), + ber_time.type_tag); } } diff --git a/src/base64.cpp b/src/base64.cpp index 22c6d78c5..9511525a6 100644 --- a/src/base64.cpp +++ b/src/base64.cpp @@ -169,8 +169,12 @@ void Base64_Decoder::decode_and_send(const byte block[], u32bit length) *************************************************/ void Base64_Decoder::handle_bad_char(byte c) { - if(checking == NONE) return; - if((checking == IGNORE_WS) && is_space(c)) return; + if(checking == NONE) + return; + + if((checking == IGNORE_WS) && Charset::is_space(c)) + return; + throw Decoding_Error("Base64_Decoder: Invalid base64 character: " + c); } diff --git a/src/big_code.cpp b/src/big_code.cpp index 235665f7f..1a9502ee8 100644 --- a/src/big_code.cpp +++ b/src/big_code.cpp @@ -30,7 +30,7 @@ void BigInt::encode(byte output[], const BigInt& n, Base base) const u32bit output_size = n.encoded_size(Octal); for(u32bit j = 0; j != output_size; ++j) { - output[output_size - 1 - j] = digit2char(copy % 8); + output[output_size - 1 - j] = Charset::digit2char(copy % 8); copy /= 8; } } @@ -43,7 +43,8 @@ void BigInt::encode(byte output[], const BigInt& n, Base base) for(u32bit j = 0; j != output_size; ++j) { divide(copy, 10, copy, remainder); - output[output_size - 1 - j] = digit2char(remainder.word_at(0)); + output[output_size - 1 - j] = + Charset::digit2char(remainder.word_at(0)); if(copy.is_zero()) break; } @@ -123,7 +124,7 @@ BigInt BigInt::decode(const byte buf[], u32bit length, Base base) const u32bit RADIX = ((base == Decimal) ? 10 : 8); for(u32bit j = 0; j != length; ++j) { - byte x = char2digit(buf[j]); + byte x = Charset::char2digit(buf[j]); if(x >= RADIX) { if(RADIX == 10) diff --git a/src/charset.cpp b/src/charset.cpp index 7aeccaf98..3a28cce92 100644 --- a/src/charset.cpp +++ b/src/charset.cpp @@ -6,10 +6,22 @@ #include <botan/charset.h> #include <botan/hex.h> #include <botan/base64.h> +#include <botan/libstate.h> #include <ctype.h> namespace Botan { +namespace Charset { + +/************************************************* +* Perform character set transcoding * +*************************************************/ +std::string transcode(const std::string& str, + Character_Set to, Character_Set from) + { + return global_state().transcode(str, to, from); + } + /************************************************* * Check if a character represents a digit * *************************************************/ @@ -70,28 +82,14 @@ char digit2char(byte b) } /************************************************* -* Return the lower-case representation * -*************************************************/ -char to_lower(char c) - { - return tolower((unsigned char)c); - } - -/************************************************* -* Convert from local charset to ISO 8859-1 * +* Case-insensitive character comparison * *************************************************/ -std::string local2iso(const std::string& str) +bool caseless_cmp(char a, char b) { - return str; + return (tolower((unsigned char)a) == tolower((unsigned char)b)); } -/************************************************* -* Convert from ISO 8859-1 to local charset * -*************************************************/ -std::string iso2local(const std::string& str) - { - return str; - } +} /************************************************* * Hex Encoder Lookup Tables * diff --git a/src/conf.cpp b/src/conf.cpp index 6fa616b3c..0266444fe 100644 --- a/src/conf.cpp +++ b/src/conf.cpp @@ -60,7 +60,7 @@ u32bit get_time(const std::string& name) u32bit scale = 1; - if(is_digit(suffix)) + if(Charset::is_digit(suffix)) value += suffix; else if(suffix == 's') scale = 1; diff --git a/src/hex.cpp b/src/hex.cpp index d6b710656..42825b334 100644 --- a/src/hex.cpp +++ b/src/hex.cpp @@ -130,8 +130,12 @@ bool Hex_Decoder::is_valid(byte in) *************************************************/ void Hex_Decoder::handle_bad_char(byte c) { - if(checking == NONE) return; - if((checking == IGNORE_WS) && is_space(c)) return; + if(checking == NONE) + return; + + if((checking == IGNORE_WS) && Charset::is_space(c)) + return; + throw Decoding_Error("Hex_Decoder: Invalid hex character: " + to_string(c)); } diff --git a/src/inifile.cpp b/src/inifile.cpp index 44830ef22..22b8aa1a3 100644 --- a/src/inifile.cpp +++ b/src/inifile.cpp @@ -34,7 +34,7 @@ std::string strip_whitespace(const std::string& line) return new_line; if(c == '\\' && !is_escaped) { is_escaped = true; continue; } - if(is_space(c) && !in_quote && !in_string && !is_escaped) + if(Charset::is_space(c) && !in_quote && !in_string && !is_escaped) continue; new_line += c; diff --git a/src/init_def.cpp b/src/init_def.cpp index 3b071d7eb..9d07d730b 100644 --- a/src/init_def.cpp +++ b/src/init_def.cpp @@ -12,6 +12,7 @@ #include <botan/eng_def.h> #include <botan/fips140.h> #include <botan/x931_rng.h> +#include <botan/def_char.h> namespace Botan { @@ -107,6 +108,8 @@ void initialize(const std::string& arg_string) } global_state().add_engine(new Default_Engine); + global_state().set_transcoder(new Default_Charset_Transcoder); + global_state().set_prng(new ANSI_X931_RNG); std::vector<EntropySource*> sources = Modules::get_entropy_sources(); for(u32bit j = 0; j != sources.size(); ++j) diff --git a/src/openpgp.cpp b/src/openpgp.cpp index fec292d1e..abb10a5cb 100644 --- a/src/openpgp.cpp +++ b/src/openpgp.cpp @@ -116,7 +116,7 @@ SecureVector<byte> decode(DataSource& source, std::string& label, end_of_headers = true; for(u32bit j = 0; j != this_header.length(); ++j) - if(!is_space(this_header[j])) + if(!Charset::is_space(this_header[j])) end_of_headers = false; if(!end_of_headers) diff --git a/src/parsing.cpp b/src/parsing.cpp index 574b7eaec..4939cd082 100644 --- a/src/parsing.cpp +++ b/src/parsing.cpp @@ -20,7 +20,7 @@ u32bit to_u32bit(const std::string& number) { const u32bit OVERFLOW_MARK = 0xFFFFFFFF / 10; - byte digit = char2digit(*j); + byte digit = Charset::char2digit(*j); if((n > OVERFLOW_MARK) || (n == OVERFLOW_MARK && digit > 5)) throw Decoding_Error("to_u32bit: Integer overflow"); @@ -41,7 +41,7 @@ std::string to_string(u64bit n, u32bit min_len) { while(n > 0) { - lenstr = digit2char(n % 10) + lenstr; + lenstr = Charset::digit2char(n % 10) + lenstr; n /= 10; } } @@ -179,31 +179,31 @@ bool x500_name_cmp(const std::string& name1, const std::string& name2) std::string::const_iterator p1 = name1.begin(); std::string::const_iterator p2 = name2.begin(); - while((p1 != name1.end()) && is_space(*p1)) ++p1; - while((p2 != name2.end()) && is_space(*p2)) ++p2; + while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1; + while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2; while(p1 != name1.end() && p2 != name2.end()) { - if(is_space(*p1)) + if(Charset::is_space(*p1)) { - if(!is_space(*p2)) + if(!Charset::is_space(*p2)) return false; - while((p1 != name1.end()) && is_space(*p1)) ++p1; - while((p2 != name2.end()) && is_space(*p2)) ++p2; + while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1; + while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2; if(p1 == name1.end() && p2 == name2.end()) return true; } - if(to_lower(*p1) != to_lower(*p2)) + if(!Charset::caseless_cmp(*p1, *p2)) return false; ++p1; ++p2; } - while((p1 != name1.end()) && is_space(*p1)) ++p1; - while((p2 != name2.end()) && is_space(*p2)) ++p2; + while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1; + while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2; if((p1 != name1.end()) || (p2 != name2.end())) return false; @@ -211,61 +211,6 @@ bool x500_name_cmp(const std::string& name1, const std::string& name2) } /************************************************* -* Convert from UTF-8 to ISO 8859-1 * -*************************************************/ -std::string utf2iso(const std::string& utf8) - { - std::string iso8859; - - u32bit position = 0; - while(position != utf8.size()) - { - const byte c1 = (byte)utf8[position++]; - - if(c1 <= 0x7F) - iso8859 += (char)c1; - else if(c1 >= 0xC0 && c1 <= 0xC7) - { - if(position == utf8.size()) - throw Decoding_Error("UTF-8: sequence truncated"); - - const byte c2 = (byte)utf8[position++]; - const byte iso_char = ((c1 & 0x07) << 6) | (c2 & 0x3F); - - if(iso_char <= 0x7F) - throw Decoding_Error("UTF-8: sequence longer than needed"); - - iso8859 += (char)iso_char; - } - else - throw Decoding_Error("UTF-8: Unicode chars not in Latin1 used"); - } - - return iso8859; - } - -/************************************************* -* Convert from ISO 8859-1 to UTF-8 * -*************************************************/ -std::string iso2utf(const std::string& iso8859) - { - std::string utf8; - for(u32bit j = 0; j != iso8859.size(); ++j) - { - const byte c = (byte)iso8859[j]; - - if(c <= 0x7F) - utf8 += (char)c; - else - { - utf8 += (char)(0xC0 | (c >> 6)); - utf8 += (char)(0x80 | (c & 0x3F)); - } - } - return utf8; - } - -/************************************************* * Parse and compute an arithmetic expression * *************************************************/ u32bit parse_expr(const std::string& expr) diff --git a/src/x509find.cpp b/src/x509find.cpp index 2ba430caf..f78c037b5 100644 --- a/src/x509find.cpp +++ b/src/x509find.cpp @@ -20,21 +20,13 @@ namespace { typedef bool (*compare_fn)(const std::string&, const std::string&); /************************************************* -* Predicate for caseless searching * -*************************************************/ -bool caseless_cmp(char a, char b) - { - return (to_lower(a) == to_lower(b)); - } - -/************************************************* * Compare based on case-insensive substrings * *************************************************/ bool substring_match(const std::string& searching_for, const std::string& found) { if(std::search(found.begin(), found.end(), searching_for.begin(), - searching_for.end(), caseless_cmp) != found.end()) + searching_for.end(), Charset::caseless_cmp) != found.end()) return true; return false; } @@ -48,7 +40,7 @@ bool ignore_case(const std::string& searching_for, const std::string& found) return false; return std::equal(found.begin(), found.end(), - searching_for.begin(), caseless_cmp); + searching_for.begin(), Charset::caseless_cmp); } /************************************************* |