1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
|
/*
* Runtime CPU detection for ARM
* (C) 2009,2010,2013,2017 Jack Lloyd
*
* Botan is released under the Simplified BSD License (see license.txt)
*/
#include <botan/cpuid.h>
#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
#if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL)
#include <sys/auxv.h>
#elif defined(BOTAN_TARGET_OS_IS_IOS)
#include <sys/types.h>
#include <sys/sysctl.h>
#else
#include <botan/internal/os_utils.h>
#endif
#endif
namespace Botan {
#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
#if defined(BOTAN_TARGET_OS_IS_IOS)
namespace {
uint64_t flags_by_ios_machine_type(const std::string& machine)
{
/*
* This relies on a map of known machine names to features. This
* will quickly grow out of date as new products are introduced, but
* is apparently the best we can do for iOS.
*/
struct version_info {
std::string name;
size_t min_version_neon;
size_t min_version_armv8;
};
static const version_info min_versions[] = {
{ "iPhone", 2, 6 },
{ "iPad", 1, 4 },
{ "iPod", 4, 7 },
{ "AppleTV", 2, 5 },
};
if(machine.size() < 3)
return 0;
auto comma = machine.find(',');
// Simulator, or something we don't know about
if(comma == std::string::npos)
return 0;
std::string product = machine.substr(0, comma);
size_t version = 0;
size_t place = 1;
while(product.size() > 1 && ::isdigit(product.back()))
{
const size_t digit = product.back() - '0';
version += digit * place;
place *= 10;
product.pop_back();
}
if(version == 0)
return 0;
for(const version_info& info : min_versions)
{
if(info.name != product)
continue;
if(version >= info.min_version_armv8)
{
return CPUID::CPUID_ARM_AES_BIT |
CPUID::CPUID_ARM_PMULL_BIT |
CPUID::CPUID_ARM_SHA1_BIT |
CPUID::CPUID_ARM_SHA2_BIT |
CPUID::CPUID_ARM_NEON_BIT;
}
if(version >= info.min_version_neon)
return CPUID::CPUID_ARM_NEON_BIT;
}
// Some other product we don't know about
return 0;
}
}
#endif
uint64_t CPUID::detect_cpu_features(size_t* cache_line_size)
{
uint64_t detected_features = 0;
#if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL)
/*
* On systems with getauxval these bits should normally be defined
* in bits/auxv.h but some buggy? glibc installs seem to miss them.
* These following values are all fixed, for the Linux ELF format,
* so we just hardcode them in ARM_hwcap_bit enum.
*/
enum ARM_hwcap_bit {
#if defined(BOTAN_TARGET_ARCH_IS_ARM32)
NEON_bit = (1 << 12),
AES_bit = (1 << 0),
PMULL_bit = (1 << 1),
SHA1_bit = (1 << 2),
SHA2_bit = (1 << 3),
ARCH_hwcap_neon = 16, // AT_HWCAP
ARCH_hwcap_crypto = 26, // AT_HWCAP2
#elif defined(BOTAN_TARGET_ARCH_IS_ARM64)
NEON_bit = (1 << 1),
AES_bit = (1 << 3),
PMULL_bit = (1 << 4),
SHA1_bit = (1 << 5),
SHA2_bit = (1 << 6),
ARCH_hwcap_neon = 16, // AT_HWCAP
ARCH_hwcap_crypto = 16, // AT_HWCAP
#endif
};
#if defined(AT_DCACHEBSIZE)
const unsigned long dcache_line = ::getauxval(AT_DCACHEBSIZE);
// plausibility check
if(dcache_line == 32 || dcache_line == 64 || dcache_line == 128)
*cache_line_size = static_cast<size_t>(dcache_line);
#endif
const unsigned long hwcap_neon = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_neon);
if(hwcap_neon & ARM_hwcap_bit::NEON_bit)
detected_features |= CPUID::CPUID_ARM_NEON_BIT;
/*
On aarch64 this ends up calling getauxval twice with AT_HWCAP
It doesn't seem worth optimizing this out, since getauxval is
just reading a field in the ELF header.
*/
const unsigned long hwcap_crypto = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_crypto);
if(hwcap_crypto & ARM_hwcap_bit::AES_bit)
detected_features |= CPUID::CPUID_ARM_AES_BIT;
if(hwcap_crypto & ARM_hwcap_bit::PMULL_bit)
detected_features |= CPUID::CPUID_ARM_PMULL_BIT;
if(hwcap_crypto & ARM_hwcap_bit::SHA1_bit)
detected_features |= CPUID::CPUID_ARM_SHA1_BIT;
if(hwcap_crypto & ARM_hwcap_bit::SHA2_bit)
detected_features |= CPUID::CPUID_ARM_SHA2_BIT;
#elif defined(BOTAN_TARGET_OS_IS_IOS)
char machine[64] = { 0 };
size_t size = sizeof(machine) - 1;
::sysctlbyname("hw.machine", machine, &size, nullptr, 0);
detected_features = flags_by_ios_machine_type(machine);
#elif defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_ARM64)
/*
No getauxval API available, fall back on probe functions. We only
bother with Aarch64 here to simplify the code and because going to
extreme contortions to support detect NEON on devices that probably
don't support it doesn't seem worthwhile.
NEON registers v0-v7 are caller saved in Aarch64
*/
auto neon_probe = []() -> int { asm("and v0.16b, v0.16b, v0.16b"); return 1; };
auto aes_probe = []() -> int { asm(".word 0x4e284800"); return 1; };
auto pmull_probe = []() -> int { asm(".word 0x0ee0e000"); return 1; };
auto sha1_probe = []() -> int { asm(".word 0x5e280800"); return 1; };
auto sha2_probe = []() -> int { asm(".word 0x5e282800"); return 1; };
// Only bother running the crypto detection if we found NEON
if(OS::run_cpu_instruction_probe(neon_probe) == 1)
{
detected_features |= CPUID::CPUID_ARM_NEON_BIT;
if(OS::run_cpu_instruction_probe(aes_probe) == 1)
detected_features |= CPUID::CPUID_ARM_AES_BIT;
if(OS::run_cpu_instruction_probe(pmull_probe) == 1)
detected_features |= CPUID::CPUID_ARM_PMULL_BIT;
if(OS::run_cpu_instruction_probe(sha1_probe) == 1)
detected_features |= CPUID::CPUID_ARM_SHA1_BIT;
if(OS::run_cpu_instruction_probe(sha2_probe) == 1)
detected_features |= CPUID::CPUID_ARM_SHA2_BIT;
}
#endif
return detected_features;
}
#endif
}
|