blob: 69b00e6208a85b2abb09958fa1a7341fc595a533 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
/*************************************************
* Default Character Set Handling Source File *
* (C) 1999-2007 The Botan Project *
*************************************************/
#include <botan/def_char.h>
#include <botan/exceptn.h>
#include <botan/parsing.h>
namespace Botan {
namespace {
/*************************************************
* Convert from UCS-2 to ISO 8859-1 *
*************************************************/
std::string ucs2_to_latin1(const std::string& ucs2)
{
if(ucs2.size() % 2 == 1)
throw Decoding_Error("UCS-2 string has an odd number of bytes");
std::string latin1;
for(u32bit j = 0; j != ucs2.size(); j += 2)
{
const byte c1 = ucs2[j];
const byte c2 = ucs2[j+1];
if(c1 != 0)
throw Decoding_Error("UCS-2 has non-Latin1 characters");
latin1 += (char)c2;
}
return latin1;
}
/*************************************************
* Convert from UTF-8 to ISO 8859-1 *
*************************************************/
std::string utf8_to_latin1(const std::string& utf8)
{
std::string iso8859;
u32bit position = 0;
while(position != utf8.size())
{
const byte c1 = (byte)utf8[position++];
if(c1 <= 0x7F)
iso8859 += (char)c1;
else if(c1 >= 0xC0 && c1 <= 0xC7)
{
if(position == utf8.size())
throw Decoding_Error("UTF-8: sequence truncated");
const byte c2 = (byte)utf8[position++];
const byte iso_char = ((c1 & 0x07) << 6) | (c2 & 0x3F);
if(iso_char <= 0x7F)
throw Decoding_Error("UTF-8: sequence longer than needed");
iso8859 += (char)iso_char;
}
else
throw Decoding_Error("UTF-8: Unicode chars not in Latin1 used");
}
return iso8859;
}
/*************************************************
* Convert from ISO 8859-1 to UTF-8 *
*************************************************/
std::string latin1_to_utf8(const std::string& iso8859)
{
std::string utf8;
for(u32bit j = 0; j != iso8859.size(); ++j)
{
const byte c = (byte)iso8859[j];
if(c <= 0x7F)
utf8 += (char)c;
else
{
utf8 += (char)(0xC0 | (c >> 6));
utf8 += (char)(0x80 | (c & 0x3F));
}
}
return utf8;
}
}
/*************************************************
* Transcode between character sets *
*************************************************/
std::string Default_Charset_Transcoder::transcode(const std::string& str,
Character_Set to,
Character_Set from) const
{
if(to == LOCAL_CHARSET)
to = LATIN1_CHARSET;
if(from == LOCAL_CHARSET)
from = LATIN1_CHARSET;
if(to == from)
return str;
if(from == LATIN1_CHARSET && to == UTF8_CHARSET)
return latin1_to_utf8(str);
if(from == UTF8_CHARSET && to == LATIN1_CHARSET)
return utf8_to_latin1(str);
if(from == UCS2_CHARSET && to == LATIN1_CHARSET)
return ucs2_to_latin1(str);
throw Invalid_Argument("Unknown transcoding operation from " +
to_string(from) + " to " + to_string(to));
}
}
|