Annotation of /trunk/kernel-alx/patches-4.9/0194-4.9.95-all-fixes.patch
Parent Directory | Revision Log
Revision 3171 -
(hide annotations)
(download)
Wed Aug 8 14:17:26 2018 UTC (6 years, 1 month ago) by niro
File size: 186555 byte(s)
Wed Aug 8 14:17:26 2018 UTC (6 years, 1 month ago) by niro
File size: 186555 byte(s)
-linux-4.9.95
1 | niro | 3171 | diff --git a/Makefile b/Makefile |
2 | index 02188cf8e9af..1aeec9df709d 100644 | ||
3 | --- a/Makefile | ||
4 | +++ b/Makefile | ||
5 | @@ -1,6 +1,6 @@ | ||
6 | VERSION = 4 | ||
7 | PATCHLEVEL = 9 | ||
8 | -SUBLEVEL = 94 | ||
9 | +SUBLEVEL = 95 | ||
10 | EXTRAVERSION = | ||
11 | NAME = Roaring Lionus | ||
12 | |||
13 | diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h | ||
14 | index d5423ab15ed5..9fe1043e72d2 100644 | ||
15 | --- a/arch/arm/include/asm/kvm_host.h | ||
16 | +++ b/arch/arm/include/asm/kvm_host.h | ||
17 | @@ -318,4 +318,10 @@ static inline int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, | ||
18 | return -ENXIO; | ||
19 | } | ||
20 | |||
21 | +static inline bool kvm_arm_harden_branch_predictor(void) | ||
22 | +{ | ||
23 | + /* No way to detect it yet, pretend it is not there. */ | ||
24 | + return false; | ||
25 | +} | ||
26 | + | ||
27 | #endif /* __ARM_KVM_HOST_H__ */ | ||
28 | diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h | ||
29 | index a58bbaa3ec60..d10e36235438 100644 | ||
30 | --- a/arch/arm/include/asm/kvm_mmu.h | ||
31 | +++ b/arch/arm/include/asm/kvm_mmu.h | ||
32 | @@ -223,6 +223,16 @@ static inline unsigned int kvm_get_vmid_bits(void) | ||
33 | return 8; | ||
34 | } | ||
35 | |||
36 | +static inline void *kvm_get_hyp_vector(void) | ||
37 | +{ | ||
38 | + return kvm_ksym_ref(__kvm_hyp_vector); | ||
39 | +} | ||
40 | + | ||
41 | +static inline int kvm_map_vectors(void) | ||
42 | +{ | ||
43 | + return 0; | ||
44 | +} | ||
45 | + | ||
46 | #endif /* !__ASSEMBLY__ */ | ||
47 | |||
48 | #endif /* __ARM_KVM_MMU_H__ */ | ||
49 | diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h | ||
50 | deleted file mode 100644 | ||
51 | index 6bda945d31fa..000000000000 | ||
52 | --- a/arch/arm/include/asm/kvm_psci.h | ||
53 | +++ /dev/null | ||
54 | @@ -1,27 +0,0 @@ | ||
55 | -/* | ||
56 | - * Copyright (C) 2012 - ARM Ltd | ||
57 | - * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
58 | - * | ||
59 | - * This program is free software; you can redistribute it and/or modify | ||
60 | - * it under the terms of the GNU General Public License version 2 as | ||
61 | - * published by the Free Software Foundation. | ||
62 | - * | ||
63 | - * This program is distributed in the hope that it will be useful, | ||
64 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
65 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
66 | - * GNU General Public License for more details. | ||
67 | - * | ||
68 | - * You should have received a copy of the GNU General Public License | ||
69 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
70 | - */ | ||
71 | - | ||
72 | -#ifndef __ARM_KVM_PSCI_H__ | ||
73 | -#define __ARM_KVM_PSCI_H__ | ||
74 | - | ||
75 | -#define KVM_ARM_PSCI_0_1 1 | ||
76 | -#define KVM_ARM_PSCI_0_2 2 | ||
77 | - | ||
78 | -int kvm_psci_version(struct kvm_vcpu *vcpu); | ||
79 | -int kvm_psci_call(struct kvm_vcpu *vcpu); | ||
80 | - | ||
81 | -#endif /* __ARM_KVM_PSCI_H__ */ | ||
82 | diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c | ||
83 | index c38bfbeec306..ef6595c7d697 100644 | ||
84 | --- a/arch/arm/kvm/arm.c | ||
85 | +++ b/arch/arm/kvm/arm.c | ||
86 | @@ -29,6 +29,7 @@ | ||
87 | #include <linux/kvm.h> | ||
88 | #include <trace/events/kvm.h> | ||
89 | #include <kvm/arm_pmu.h> | ||
90 | +#include <kvm/arm_psci.h> | ||
91 | |||
92 | #define CREATE_TRACE_POINTS | ||
93 | #include "trace.h" | ||
94 | @@ -44,7 +45,6 @@ | ||
95 | #include <asm/kvm_mmu.h> | ||
96 | #include <asm/kvm_emulate.h> | ||
97 | #include <asm/kvm_coproc.h> | ||
98 | -#include <asm/kvm_psci.h> | ||
99 | #include <asm/sections.h> | ||
100 | |||
101 | #ifdef REQUIRES_VIRT | ||
102 | @@ -1088,7 +1088,7 @@ static void cpu_init_hyp_mode(void *dummy) | ||
103 | pgd_ptr = kvm_mmu_get_httbr(); | ||
104 | stack_page = __this_cpu_read(kvm_arm_hyp_stack_page); | ||
105 | hyp_stack_ptr = stack_page + PAGE_SIZE; | ||
106 | - vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector); | ||
107 | + vector_ptr = (unsigned long)kvm_get_hyp_vector(); | ||
108 | |||
109 | __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); | ||
110 | __cpu_init_stage2(); | ||
111 | @@ -1345,6 +1345,13 @@ static int init_hyp_mode(void) | ||
112 | goto out_err; | ||
113 | } | ||
114 | |||
115 | + | ||
116 | + err = kvm_map_vectors(); | ||
117 | + if (err) { | ||
118 | + kvm_err("Cannot map vectors\n"); | ||
119 | + goto out_err; | ||
120 | + } | ||
121 | + | ||
122 | /* | ||
123 | * Map the Hyp stack pages | ||
124 | */ | ||
125 | diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c | ||
126 | index 4e57ebca6e69..de1aedce2a8b 100644 | ||
127 | --- a/arch/arm/kvm/handle_exit.c | ||
128 | +++ b/arch/arm/kvm/handle_exit.c | ||
129 | @@ -21,7 +21,7 @@ | ||
130 | #include <asm/kvm_emulate.h> | ||
131 | #include <asm/kvm_coproc.h> | ||
132 | #include <asm/kvm_mmu.h> | ||
133 | -#include <asm/kvm_psci.h> | ||
134 | +#include <kvm/arm_psci.h> | ||
135 | #include <trace/events/kvm.h> | ||
136 | |||
137 | #include "trace.h" | ||
138 | @@ -36,7 +36,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) | ||
139 | kvm_vcpu_hvc_get_imm(vcpu)); | ||
140 | vcpu->stat.hvc_exit_stat++; | ||
141 | |||
142 | - ret = kvm_psci_call(vcpu); | ||
143 | + ret = kvm_hvc_call_handler(vcpu); | ||
144 | if (ret < 0) { | ||
145 | vcpu_set_reg(vcpu, 0, ~0UL); | ||
146 | return 1; | ||
147 | diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c | ||
148 | index a08d7a93aebb..3d962257c166 100644 | ||
149 | --- a/arch/arm/kvm/psci.c | ||
150 | +++ b/arch/arm/kvm/psci.c | ||
151 | @@ -15,16 +15,16 @@ | ||
152 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
153 | */ | ||
154 | |||
155 | +#include <linux/arm-smccc.h> | ||
156 | #include <linux/preempt.h> | ||
157 | #include <linux/kvm_host.h> | ||
158 | #include <linux/wait.h> | ||
159 | |||
160 | #include <asm/cputype.h> | ||
161 | #include <asm/kvm_emulate.h> | ||
162 | -#include <asm/kvm_psci.h> | ||
163 | #include <asm/kvm_host.h> | ||
164 | |||
165 | -#include <uapi/linux/psci.h> | ||
166 | +#include <kvm/arm_psci.h> | ||
167 | |||
168 | /* | ||
169 | * This is an implementation of the Power State Coordination Interface | ||
170 | @@ -33,6 +33,38 @@ | ||
171 | |||
172 | #define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) | ||
173 | |||
174 | +static u32 smccc_get_function(struct kvm_vcpu *vcpu) | ||
175 | +{ | ||
176 | + return vcpu_get_reg(vcpu, 0); | ||
177 | +} | ||
178 | + | ||
179 | +static unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu) | ||
180 | +{ | ||
181 | + return vcpu_get_reg(vcpu, 1); | ||
182 | +} | ||
183 | + | ||
184 | +static unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu) | ||
185 | +{ | ||
186 | + return vcpu_get_reg(vcpu, 2); | ||
187 | +} | ||
188 | + | ||
189 | +static unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu) | ||
190 | +{ | ||
191 | + return vcpu_get_reg(vcpu, 3); | ||
192 | +} | ||
193 | + | ||
194 | +static void smccc_set_retval(struct kvm_vcpu *vcpu, | ||
195 | + unsigned long a0, | ||
196 | + unsigned long a1, | ||
197 | + unsigned long a2, | ||
198 | + unsigned long a3) | ||
199 | +{ | ||
200 | + vcpu_set_reg(vcpu, 0, a0); | ||
201 | + vcpu_set_reg(vcpu, 1, a1); | ||
202 | + vcpu_set_reg(vcpu, 2, a2); | ||
203 | + vcpu_set_reg(vcpu, 3, a3); | ||
204 | +} | ||
205 | + | ||
206 | static unsigned long psci_affinity_mask(unsigned long affinity_level) | ||
207 | { | ||
208 | if (affinity_level <= 3) | ||
209 | @@ -75,7 +107,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) | ||
210 | unsigned long context_id; | ||
211 | phys_addr_t target_pc; | ||
212 | |||
213 | - cpu_id = vcpu_get_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK; | ||
214 | + cpu_id = smccc_get_arg1(source_vcpu) & MPIDR_HWID_BITMASK; | ||
215 | if (vcpu_mode_is_32bit(source_vcpu)) | ||
216 | cpu_id &= ~((u32) 0); | ||
217 | |||
218 | @@ -88,14 +120,14 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) | ||
219 | if (!vcpu) | ||
220 | return PSCI_RET_INVALID_PARAMS; | ||
221 | if (!vcpu->arch.power_off) { | ||
222 | - if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) | ||
223 | + if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1) | ||
224 | return PSCI_RET_ALREADY_ON; | ||
225 | else | ||
226 | return PSCI_RET_INVALID_PARAMS; | ||
227 | } | ||
228 | |||
229 | - target_pc = vcpu_get_reg(source_vcpu, 2); | ||
230 | - context_id = vcpu_get_reg(source_vcpu, 3); | ||
231 | + target_pc = smccc_get_arg2(source_vcpu); | ||
232 | + context_id = smccc_get_arg3(source_vcpu); | ||
233 | |||
234 | kvm_reset_vcpu(vcpu); | ||
235 | |||
236 | @@ -114,7 +146,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) | ||
237 | * NOTE: We always update r0 (or x0) because for PSCI v0.1 | ||
238 | * the general puspose registers are undefined upon CPU_ON. | ||
239 | */ | ||
240 | - vcpu_set_reg(vcpu, 0, context_id); | ||
241 | + smccc_set_retval(vcpu, context_id, 0, 0, 0); | ||
242 | vcpu->arch.power_off = false; | ||
243 | smp_mb(); /* Make sure the above is visible */ | ||
244 | |||
245 | @@ -134,8 +166,8 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) | ||
246 | struct kvm *kvm = vcpu->kvm; | ||
247 | struct kvm_vcpu *tmp; | ||
248 | |||
249 | - target_affinity = vcpu_get_reg(vcpu, 1); | ||
250 | - lowest_affinity_level = vcpu_get_reg(vcpu, 2); | ||
251 | + target_affinity = smccc_get_arg1(vcpu); | ||
252 | + lowest_affinity_level = smccc_get_arg2(vcpu); | ||
253 | |||
254 | /* Determine target affinity mask */ | ||
255 | target_affinity_mask = psci_affinity_mask(lowest_affinity_level); | ||
256 | @@ -198,18 +230,10 @@ static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) | ||
257 | kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); | ||
258 | } | ||
259 | |||
260 | -int kvm_psci_version(struct kvm_vcpu *vcpu) | ||
261 | -{ | ||
262 | - if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) | ||
263 | - return KVM_ARM_PSCI_0_2; | ||
264 | - | ||
265 | - return KVM_ARM_PSCI_0_1; | ||
266 | -} | ||
267 | - | ||
268 | static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) | ||
269 | { | ||
270 | struct kvm *kvm = vcpu->kvm; | ||
271 | - unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0); | ||
272 | + unsigned long psci_fn = smccc_get_function(vcpu); | ||
273 | unsigned long val; | ||
274 | int ret = 1; | ||
275 | |||
276 | @@ -219,7 +243,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) | ||
277 | * Bits[31:16] = Major Version = 0 | ||
278 | * Bits[15:0] = Minor Version = 2 | ||
279 | */ | ||
280 | - val = 2; | ||
281 | + val = KVM_ARM_PSCI_0_2; | ||
282 | break; | ||
283 | case PSCI_0_2_FN_CPU_SUSPEND: | ||
284 | case PSCI_0_2_FN64_CPU_SUSPEND: | ||
285 | @@ -276,14 +300,56 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) | ||
286 | break; | ||
287 | } | ||
288 | |||
289 | - vcpu_set_reg(vcpu, 0, val); | ||
290 | + smccc_set_retval(vcpu, val, 0, 0, 0); | ||
291 | + return ret; | ||
292 | +} | ||
293 | + | ||
294 | +static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu) | ||
295 | +{ | ||
296 | + u32 psci_fn = smccc_get_function(vcpu); | ||
297 | + u32 feature; | ||
298 | + unsigned long val; | ||
299 | + int ret = 1; | ||
300 | + | ||
301 | + switch(psci_fn) { | ||
302 | + case PSCI_0_2_FN_PSCI_VERSION: | ||
303 | + val = KVM_ARM_PSCI_1_0; | ||
304 | + break; | ||
305 | + case PSCI_1_0_FN_PSCI_FEATURES: | ||
306 | + feature = smccc_get_arg1(vcpu); | ||
307 | + switch(feature) { | ||
308 | + case PSCI_0_2_FN_PSCI_VERSION: | ||
309 | + case PSCI_0_2_FN_CPU_SUSPEND: | ||
310 | + case PSCI_0_2_FN64_CPU_SUSPEND: | ||
311 | + case PSCI_0_2_FN_CPU_OFF: | ||
312 | + case PSCI_0_2_FN_CPU_ON: | ||
313 | + case PSCI_0_2_FN64_CPU_ON: | ||
314 | + case PSCI_0_2_FN_AFFINITY_INFO: | ||
315 | + case PSCI_0_2_FN64_AFFINITY_INFO: | ||
316 | + case PSCI_0_2_FN_MIGRATE_INFO_TYPE: | ||
317 | + case PSCI_0_2_FN_SYSTEM_OFF: | ||
318 | + case PSCI_0_2_FN_SYSTEM_RESET: | ||
319 | + case PSCI_1_0_FN_PSCI_FEATURES: | ||
320 | + case ARM_SMCCC_VERSION_FUNC_ID: | ||
321 | + val = 0; | ||
322 | + break; | ||
323 | + default: | ||
324 | + val = PSCI_RET_NOT_SUPPORTED; | ||
325 | + break; | ||
326 | + } | ||
327 | + break; | ||
328 | + default: | ||
329 | + return kvm_psci_0_2_call(vcpu); | ||
330 | + } | ||
331 | + | ||
332 | + smccc_set_retval(vcpu, val, 0, 0, 0); | ||
333 | return ret; | ||
334 | } | ||
335 | |||
336 | static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) | ||
337 | { | ||
338 | struct kvm *kvm = vcpu->kvm; | ||
339 | - unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0); | ||
340 | + unsigned long psci_fn = smccc_get_function(vcpu); | ||
341 | unsigned long val; | ||
342 | |||
343 | switch (psci_fn) { | ||
344 | @@ -301,7 +367,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) | ||
345 | break; | ||
346 | } | ||
347 | |||
348 | - vcpu_set_reg(vcpu, 0, val); | ||
349 | + smccc_set_retval(vcpu, val, 0, 0, 0); | ||
350 | return 1; | ||
351 | } | ||
352 | |||
353 | @@ -319,9 +385,11 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) | ||
354 | * Errors: | ||
355 | * -EINVAL: Unrecognized PSCI function | ||
356 | */ | ||
357 | -int kvm_psci_call(struct kvm_vcpu *vcpu) | ||
358 | +static int kvm_psci_call(struct kvm_vcpu *vcpu) | ||
359 | { | ||
360 | - switch (kvm_psci_version(vcpu)) { | ||
361 | + switch (kvm_psci_version(vcpu, vcpu->kvm)) { | ||
362 | + case KVM_ARM_PSCI_1_0: | ||
363 | + return kvm_psci_1_0_call(vcpu); | ||
364 | case KVM_ARM_PSCI_0_2: | ||
365 | return kvm_psci_0_2_call(vcpu); | ||
366 | case KVM_ARM_PSCI_0_1: | ||
367 | @@ -330,3 +398,30 @@ int kvm_psci_call(struct kvm_vcpu *vcpu) | ||
368 | return -EINVAL; | ||
369 | }; | ||
370 | } | ||
371 | + | ||
372 | +int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) | ||
373 | +{ | ||
374 | + u32 func_id = smccc_get_function(vcpu); | ||
375 | + u32 val = PSCI_RET_NOT_SUPPORTED; | ||
376 | + u32 feature; | ||
377 | + | ||
378 | + switch (func_id) { | ||
379 | + case ARM_SMCCC_VERSION_FUNC_ID: | ||
380 | + val = ARM_SMCCC_VERSION_1_1; | ||
381 | + break; | ||
382 | + case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: | ||
383 | + feature = smccc_get_arg1(vcpu); | ||
384 | + switch(feature) { | ||
385 | + case ARM_SMCCC_ARCH_WORKAROUND_1: | ||
386 | + if (kvm_arm_harden_branch_predictor()) | ||
387 | + val = 0; | ||
388 | + break; | ||
389 | + } | ||
390 | + break; | ||
391 | + default: | ||
392 | + return kvm_psci_call(vcpu); | ||
393 | + } | ||
394 | + | ||
395 | + smccc_set_retval(vcpu, val, 0, 0, 0); | ||
396 | + return 1; | ||
397 | +} | ||
398 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig | ||
399 | index c8471cf46cbb..90e58bbbd858 100644 | ||
400 | --- a/arch/arm64/Kconfig | ||
401 | +++ b/arch/arm64/Kconfig | ||
402 | @@ -745,6 +745,23 @@ config UNMAP_KERNEL_AT_EL0 | ||
403 | |||
404 | If unsure, say Y. | ||
405 | |||
406 | +config HARDEN_BRANCH_PREDICTOR | ||
407 | + bool "Harden the branch predictor against aliasing attacks" if EXPERT | ||
408 | + default y | ||
409 | + help | ||
410 | + Speculation attacks against some high-performance processors rely on | ||
411 | + being able to manipulate the branch predictor for a victim context by | ||
412 | + executing aliasing branches in the attacker context. Such attacks | ||
413 | + can be partially mitigated against by clearing internal branch | ||
414 | + predictor state and limiting the prediction logic in some situations. | ||
415 | + | ||
416 | + This config option will take CPU-specific actions to harden the | ||
417 | + branch predictor against aliasing attacks and may rely on specific | ||
418 | + instruction sequences or control bits being set by the system | ||
419 | + firmware. | ||
420 | + | ||
421 | + If unsure, say Y. | ||
422 | + | ||
423 | menuconfig ARMV8_DEPRECATED | ||
424 | bool "Emulate deprecated/obsolete ARMv8 instructions" | ||
425 | depends on COMPAT | ||
426 | diff --git a/arch/arm64/crypto/sha256-core.S b/arch/arm64/crypto/sha256-core.S | ||
427 | new file mode 100644 | ||
428 | index 000000000000..3ce82cc860bc | ||
429 | --- /dev/null | ||
430 | +++ b/arch/arm64/crypto/sha256-core.S | ||
431 | @@ -0,0 +1,2061 @@ | ||
432 | +// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. | ||
433 | +// | ||
434 | +// Licensed under the OpenSSL license (the "License"). You may not use | ||
435 | +// this file except in compliance with the License. You can obtain a copy | ||
436 | +// in the file LICENSE in the source distribution or at | ||
437 | +// https://www.openssl.org/source/license.html | ||
438 | + | ||
439 | +// ==================================================================== | ||
440 | +// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | ||
441 | +// project. The module is, however, dual licensed under OpenSSL and | ||
442 | +// CRYPTOGAMS licenses depending on where you obtain it. For further | ||
443 | +// details see http://www.openssl.org/~appro/cryptogams/. | ||
444 | +// | ||
445 | +// Permission to use under GPLv2 terms is granted. | ||
446 | +// ==================================================================== | ||
447 | +// | ||
448 | +// SHA256/512 for ARMv8. | ||
449 | +// | ||
450 | +// Performance in cycles per processed byte and improvement coefficient | ||
451 | +// over code generated with "default" compiler: | ||
452 | +// | ||
453 | +// SHA256-hw SHA256(*) SHA512 | ||
454 | +// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) | ||
455 | +// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) | ||
456 | +// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) | ||
457 | +// Denver 2.01 10.5 (+26%) 6.70 (+8%) | ||
458 | +// X-Gene 20.0 (+100%) 12.8 (+300%(***)) | ||
459 | +// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) | ||
460 | +// | ||
461 | +// (*) Software SHA256 results are of lesser relevance, presented | ||
462 | +// mostly for informational purposes. | ||
463 | +// (**) The result is a trade-off: it's possible to improve it by | ||
464 | +// 10% (or by 1 cycle per round), but at the cost of 20% loss | ||
465 | +// on Cortex-A53 (or by 4 cycles per round). | ||
466 | +// (***) Super-impressive coefficients over gcc-generated code are | ||
467 | +// indication of some compiler "pathology", most notably code | ||
468 | +// generated with -mgeneral-regs-only is significanty faster | ||
469 | +// and the gap is only 40-90%. | ||
470 | +// | ||
471 | +// October 2016. | ||
472 | +// | ||
473 | +// Originally it was reckoned that it makes no sense to implement NEON | ||
474 | +// version of SHA256 for 64-bit processors. This is because performance | ||
475 | +// improvement on most wide-spread Cortex-A5x processors was observed | ||
476 | +// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was | ||
477 | +// observed that 32-bit NEON SHA256 performs significantly better than | ||
478 | +// 64-bit scalar version on *some* of the more recent processors. As | ||
479 | +// result 64-bit NEON version of SHA256 was added to provide best | ||
480 | +// all-round performance. For example it executes ~30% faster on X-Gene | ||
481 | +// and Mongoose. [For reference, NEON version of SHA512 is bound to | ||
482 | +// deliver much less improvement, likely *negative* on Cortex-A5x. | ||
483 | +// Which is why NEON support is limited to SHA256.] | ||
484 | + | ||
485 | +#ifndef __KERNEL__ | ||
486 | +# include "arm_arch.h" | ||
487 | +#endif | ||
488 | + | ||
489 | +.text | ||
490 | + | ||
491 | +.extern OPENSSL_armcap_P | ||
492 | +.globl sha256_block_data_order | ||
493 | +.type sha256_block_data_order,%function | ||
494 | +.align 6 | ||
495 | +sha256_block_data_order: | ||
496 | +#ifndef __KERNEL__ | ||
497 | +# ifdef __ILP32__ | ||
498 | + ldrsw x16,.LOPENSSL_armcap_P | ||
499 | +# else | ||
500 | + ldr x16,.LOPENSSL_armcap_P | ||
501 | +# endif | ||
502 | + adr x17,.LOPENSSL_armcap_P | ||
503 | + add x16,x16,x17 | ||
504 | + ldr w16,[x16] | ||
505 | + tst w16,#ARMV8_SHA256 | ||
506 | + b.ne .Lv8_entry | ||
507 | + tst w16,#ARMV7_NEON | ||
508 | + b.ne .Lneon_entry | ||
509 | +#endif | ||
510 | + stp x29,x30,[sp,#-128]! | ||
511 | + add x29,sp,#0 | ||
512 | + | ||
513 | + stp x19,x20,[sp,#16] | ||
514 | + stp x21,x22,[sp,#32] | ||
515 | + stp x23,x24,[sp,#48] | ||
516 | + stp x25,x26,[sp,#64] | ||
517 | + stp x27,x28,[sp,#80] | ||
518 | + sub sp,sp,#4*4 | ||
519 | + | ||
520 | + ldp w20,w21,[x0] // load context | ||
521 | + ldp w22,w23,[x0,#2*4] | ||
522 | + ldp w24,w25,[x0,#4*4] | ||
523 | + add x2,x1,x2,lsl#6 // end of input | ||
524 | + ldp w26,w27,[x0,#6*4] | ||
525 | + adr x30,.LK256 | ||
526 | + stp x0,x2,[x29,#96] | ||
527 | + | ||
528 | +.Loop: | ||
529 | + ldp w3,w4,[x1],#2*4 | ||
530 | + ldr w19,[x30],#4 // *K++ | ||
531 | + eor w28,w21,w22 // magic seed | ||
532 | + str x1,[x29,#112] | ||
533 | +#ifndef __AARCH64EB__ | ||
534 | + rev w3,w3 // 0 | ||
535 | +#endif | ||
536 | + ror w16,w24,#6 | ||
537 | + add w27,w27,w19 // h+=K[i] | ||
538 | + eor w6,w24,w24,ror#14 | ||
539 | + and w17,w25,w24 | ||
540 | + bic w19,w26,w24 | ||
541 | + add w27,w27,w3 // h+=X[i] | ||
542 | + orr w17,w17,w19 // Ch(e,f,g) | ||
543 | + eor w19,w20,w21 // a^b, b^c in next round | ||
544 | + eor w16,w16,w6,ror#11 // Sigma1(e) | ||
545 | + ror w6,w20,#2 | ||
546 | + add w27,w27,w17 // h+=Ch(e,f,g) | ||
547 | + eor w17,w20,w20,ror#9 | ||
548 | + add w27,w27,w16 // h+=Sigma1(e) | ||
549 | + and w28,w28,w19 // (b^c)&=(a^b) | ||
550 | + add w23,w23,w27 // d+=h | ||
551 | + eor w28,w28,w21 // Maj(a,b,c) | ||
552 | + eor w17,w6,w17,ror#13 // Sigma0(a) | ||
553 | + add w27,w27,w28 // h+=Maj(a,b,c) | ||
554 | + ldr w28,[x30],#4 // *K++, w19 in next round | ||
555 | + //add w27,w27,w17 // h+=Sigma0(a) | ||
556 | +#ifndef __AARCH64EB__ | ||
557 | + rev w4,w4 // 1 | ||
558 | +#endif | ||
559 | + ldp w5,w6,[x1],#2*4 | ||
560 | + add w27,w27,w17 // h+=Sigma0(a) | ||
561 | + ror w16,w23,#6 | ||
562 | + add w26,w26,w28 // h+=K[i] | ||
563 | + eor w7,w23,w23,ror#14 | ||
564 | + and w17,w24,w23 | ||
565 | + bic w28,w25,w23 | ||
566 | + add w26,w26,w4 // h+=X[i] | ||
567 | + orr w17,w17,w28 // Ch(e,f,g) | ||
568 | + eor w28,w27,w20 // a^b, b^c in next round | ||
569 | + eor w16,w16,w7,ror#11 // Sigma1(e) | ||
570 | + ror w7,w27,#2 | ||
571 | + add w26,w26,w17 // h+=Ch(e,f,g) | ||
572 | + eor w17,w27,w27,ror#9 | ||
573 | + add w26,w26,w16 // h+=Sigma1(e) | ||
574 | + and w19,w19,w28 // (b^c)&=(a^b) | ||
575 | + add w22,w22,w26 // d+=h | ||
576 | + eor w19,w19,w20 // Maj(a,b,c) | ||
577 | + eor w17,w7,w17,ror#13 // Sigma0(a) | ||
578 | + add w26,w26,w19 // h+=Maj(a,b,c) | ||
579 | + ldr w19,[x30],#4 // *K++, w28 in next round | ||
580 | + //add w26,w26,w17 // h+=Sigma0(a) | ||
581 | +#ifndef __AARCH64EB__ | ||
582 | + rev w5,w5 // 2 | ||
583 | +#endif | ||
584 | + add w26,w26,w17 // h+=Sigma0(a) | ||
585 | + ror w16,w22,#6 | ||
586 | + add w25,w25,w19 // h+=K[i] | ||
587 | + eor w8,w22,w22,ror#14 | ||
588 | + and w17,w23,w22 | ||
589 | + bic w19,w24,w22 | ||
590 | + add w25,w25,w5 // h+=X[i] | ||
591 | + orr w17,w17,w19 // Ch(e,f,g) | ||
592 | + eor w19,w26,w27 // a^b, b^c in next round | ||
593 | + eor w16,w16,w8,ror#11 // Sigma1(e) | ||
594 | + ror w8,w26,#2 | ||
595 | + add w25,w25,w17 // h+=Ch(e,f,g) | ||
596 | + eor w17,w26,w26,ror#9 | ||
597 | + add w25,w25,w16 // h+=Sigma1(e) | ||
598 | + and w28,w28,w19 // (b^c)&=(a^b) | ||
599 | + add w21,w21,w25 // d+=h | ||
600 | + eor w28,w28,w27 // Maj(a,b,c) | ||
601 | + eor w17,w8,w17,ror#13 // Sigma0(a) | ||
602 | + add w25,w25,w28 // h+=Maj(a,b,c) | ||
603 | + ldr w28,[x30],#4 // *K++, w19 in next round | ||
604 | + //add w25,w25,w17 // h+=Sigma0(a) | ||
605 | +#ifndef __AARCH64EB__ | ||
606 | + rev w6,w6 // 3 | ||
607 | +#endif | ||
608 | + ldp w7,w8,[x1],#2*4 | ||
609 | + add w25,w25,w17 // h+=Sigma0(a) | ||
610 | + ror w16,w21,#6 | ||
611 | + add w24,w24,w28 // h+=K[i] | ||
612 | + eor w9,w21,w21,ror#14 | ||
613 | + and w17,w22,w21 | ||
614 | + bic w28,w23,w21 | ||
615 | + add w24,w24,w6 // h+=X[i] | ||
616 | + orr w17,w17,w28 // Ch(e,f,g) | ||
617 | + eor w28,w25,w26 // a^b, b^c in next round | ||
618 | + eor w16,w16,w9,ror#11 // Sigma1(e) | ||
619 | + ror w9,w25,#2 | ||
620 | + add w24,w24,w17 // h+=Ch(e,f,g) | ||
621 | + eor w17,w25,w25,ror#9 | ||
622 | + add w24,w24,w16 // h+=Sigma1(e) | ||
623 | + and w19,w19,w28 // (b^c)&=(a^b) | ||
624 | + add w20,w20,w24 // d+=h | ||
625 | + eor w19,w19,w26 // Maj(a,b,c) | ||
626 | + eor w17,w9,w17,ror#13 // Sigma0(a) | ||
627 | + add w24,w24,w19 // h+=Maj(a,b,c) | ||
628 | + ldr w19,[x30],#4 // *K++, w28 in next round | ||
629 | + //add w24,w24,w17 // h+=Sigma0(a) | ||
630 | +#ifndef __AARCH64EB__ | ||
631 | + rev w7,w7 // 4 | ||
632 | +#endif | ||
633 | + add w24,w24,w17 // h+=Sigma0(a) | ||
634 | + ror w16,w20,#6 | ||
635 | + add w23,w23,w19 // h+=K[i] | ||
636 | + eor w10,w20,w20,ror#14 | ||
637 | + and w17,w21,w20 | ||
638 | + bic w19,w22,w20 | ||
639 | + add w23,w23,w7 // h+=X[i] | ||
640 | + orr w17,w17,w19 // Ch(e,f,g) | ||
641 | + eor w19,w24,w25 // a^b, b^c in next round | ||
642 | + eor w16,w16,w10,ror#11 // Sigma1(e) | ||
643 | + ror w10,w24,#2 | ||
644 | + add w23,w23,w17 // h+=Ch(e,f,g) | ||
645 | + eor w17,w24,w24,ror#9 | ||
646 | + add w23,w23,w16 // h+=Sigma1(e) | ||
647 | + and w28,w28,w19 // (b^c)&=(a^b) | ||
648 | + add w27,w27,w23 // d+=h | ||
649 | + eor w28,w28,w25 // Maj(a,b,c) | ||
650 | + eor w17,w10,w17,ror#13 // Sigma0(a) | ||
651 | + add w23,w23,w28 // h+=Maj(a,b,c) | ||
652 | + ldr w28,[x30],#4 // *K++, w19 in next round | ||
653 | + //add w23,w23,w17 // h+=Sigma0(a) | ||
654 | +#ifndef __AARCH64EB__ | ||
655 | + rev w8,w8 // 5 | ||
656 | +#endif | ||
657 | + ldp w9,w10,[x1],#2*4 | ||
658 | + add w23,w23,w17 // h+=Sigma0(a) | ||
659 | + ror w16,w27,#6 | ||
660 | + add w22,w22,w28 // h+=K[i] | ||
661 | + eor w11,w27,w27,ror#14 | ||
662 | + and w17,w20,w27 | ||
663 | + bic w28,w21,w27 | ||
664 | + add w22,w22,w8 // h+=X[i] | ||
665 | + orr w17,w17,w28 // Ch(e,f,g) | ||
666 | + eor w28,w23,w24 // a^b, b^c in next round | ||
667 | + eor w16,w16,w11,ror#11 // Sigma1(e) | ||
668 | + ror w11,w23,#2 | ||
669 | + add w22,w22,w17 // h+=Ch(e,f,g) | ||
670 | + eor w17,w23,w23,ror#9 | ||
671 | + add w22,w22,w16 // h+=Sigma1(e) | ||
672 | + and w19,w19,w28 // (b^c)&=(a^b) | ||
673 | + add w26,w26,w22 // d+=h | ||
674 | + eor w19,w19,w24 // Maj(a,b,c) | ||
675 | + eor w17,w11,w17,ror#13 // Sigma0(a) | ||
676 | + add w22,w22,w19 // h+=Maj(a,b,c) | ||
677 | + ldr w19,[x30],#4 // *K++, w28 in next round | ||
678 | + //add w22,w22,w17 // h+=Sigma0(a) | ||
679 | +#ifndef __AARCH64EB__ | ||
680 | + rev w9,w9 // 6 | ||
681 | +#endif | ||
682 | + add w22,w22,w17 // h+=Sigma0(a) | ||
683 | + ror w16,w26,#6 | ||
684 | + add w21,w21,w19 // h+=K[i] | ||
685 | + eor w12,w26,w26,ror#14 | ||
686 | + and w17,w27,w26 | ||
687 | + bic w19,w20,w26 | ||
688 | + add w21,w21,w9 // h+=X[i] | ||
689 | + orr w17,w17,w19 // Ch(e,f,g) | ||
690 | + eor w19,w22,w23 // a^b, b^c in next round | ||
691 | + eor w16,w16,w12,ror#11 // Sigma1(e) | ||
692 | + ror w12,w22,#2 | ||
693 | + add w21,w21,w17 // h+=Ch(e,f,g) | ||
694 | + eor w17,w22,w22,ror#9 | ||
695 | + add w21,w21,w16 // h+=Sigma1(e) | ||
696 | + and w28,w28,w19 // (b^c)&=(a^b) | ||
697 | + add w25,w25,w21 // d+=h | ||
698 | + eor w28,w28,w23 // Maj(a,b,c) | ||
699 | + eor w17,w12,w17,ror#13 // Sigma0(a) | ||
700 | + add w21,w21,w28 // h+=Maj(a,b,c) | ||
701 | + ldr w28,[x30],#4 // *K++, w19 in next round | ||
702 | + //add w21,w21,w17 // h+=Sigma0(a) | ||
703 | +#ifndef __AARCH64EB__ | ||
704 | + rev w10,w10 // 7 | ||
705 | +#endif | ||
706 | + ldp w11,w12,[x1],#2*4 | ||
707 | + add w21,w21,w17 // h+=Sigma0(a) | ||
708 | + ror w16,w25,#6 | ||
709 | + add w20,w20,w28 // h+=K[i] | ||
710 | + eor w13,w25,w25,ror#14 | ||
711 | + and w17,w26,w25 | ||
712 | + bic w28,w27,w25 | ||
713 | + add w20,w20,w10 // h+=X[i] | ||
714 | + orr w17,w17,w28 // Ch(e,f,g) | ||
715 | + eor w28,w21,w22 // a^b, b^c in next round | ||
716 | + eor w16,w16,w13,ror#11 // Sigma1(e) | ||
717 | + ror w13,w21,#2 | ||
718 | + add w20,w20,w17 // h+=Ch(e,f,g) | ||
719 | + eor w17,w21,w21,ror#9 | ||
720 | + add w20,w20,w16 // h+=Sigma1(e) | ||
721 | + and w19,w19,w28 // (b^c)&=(a^b) | ||
722 | + add w24,w24,w20 // d+=h | ||
723 | + eor w19,w19,w22 // Maj(a,b,c) | ||
724 | + eor w17,w13,w17,ror#13 // Sigma0(a) | ||
725 | + add w20,w20,w19 // h+=Maj(a,b,c) | ||
726 | + ldr w19,[x30],#4 // *K++, w28 in next round | ||
727 | + //add w20,w20,w17 // h+=Sigma0(a) | ||
728 | +#ifndef __AARCH64EB__ | ||
729 | + rev w11,w11 // 8 | ||
730 | +#endif | ||
731 | + add w20,w20,w17 // h+=Sigma0(a) | ||
732 | + ror w16,w24,#6 | ||
733 | + add w27,w27,w19 // h+=K[i] | ||
734 | + eor w14,w24,w24,ror#14 | ||
735 | + and w17,w25,w24 | ||
736 | + bic w19,w26,w24 | ||
737 | + add w27,w27,w11 // h+=X[i] | ||
738 | + orr w17,w17,w19 // Ch(e,f,g) | ||
739 | + eor w19,w20,w21 // a^b, b^c in next round | ||
740 | + eor w16,w16,w14,ror#11 // Sigma1(e) | ||
741 | + ror w14,w20,#2 | ||
742 | + add w27,w27,w17 // h+=Ch(e,f,g) | ||
743 | + eor w17,w20,w20,ror#9 | ||
744 | + add w27,w27,w16 // h+=Sigma1(e) | ||
745 | + and w28,w28,w19 // (b^c)&=(a^b) | ||
746 | + add w23,w23,w27 // d+=h | ||
747 | + eor w28,w28,w21 // Maj(a,b,c) | ||
748 | + eor w17,w14,w17,ror#13 // Sigma0(a) | ||
749 | + add w27,w27,w28 // h+=Maj(a,b,c) | ||
750 | + ldr w28,[x30],#4 // *K++, w19 in next round | ||
751 | + //add w27,w27,w17 // h+=Sigma0(a) | ||
752 | +#ifndef __AARCH64EB__ | ||
753 | + rev w12,w12 // 9 | ||
754 | +#endif | ||
755 | + ldp w13,w14,[x1],#2*4 | ||
756 | + add w27,w27,w17 // h+=Sigma0(a) | ||
757 | + ror w16,w23,#6 | ||
758 | + add w26,w26,w28 // h+=K[i] | ||
759 | + eor w15,w23,w23,ror#14 | ||
760 | + and w17,w24,w23 | ||
761 | + bic w28,w25,w23 | ||
762 | + add w26,w26,w12 // h+=X[i] | ||
763 | + orr w17,w17,w28 // Ch(e,f,g) | ||
764 | + eor w28,w27,w20 // a^b, b^c in next round | ||
765 | + eor w16,w16,w15,ror#11 // Sigma1(e) | ||
766 | + ror w15,w27,#2 | ||
767 | + add w26,w26,w17 // h+=Ch(e,f,g) | ||
768 | + eor w17,w27,w27,ror#9 | ||
769 | + add w26,w26,w16 // h+=Sigma1(e) | ||
770 | + and w19,w19,w28 // (b^c)&=(a^b) | ||
771 | + add w22,w22,w26 // d+=h | ||
772 | + eor w19,w19,w20 // Maj(a,b,c) | ||
773 | + eor w17,w15,w17,ror#13 // Sigma0(a) | ||
774 | + add w26,w26,w19 // h+=Maj(a,b,c) | ||
775 | + ldr w19,[x30],#4 // *K++, w28 in next round | ||
776 | + //add w26,w26,w17 // h+=Sigma0(a) | ||
777 | +#ifndef __AARCH64EB__ | ||
778 | + rev w13,w13 // 10 | ||
779 | +#endif | ||
780 | + add w26,w26,w17 // h+=Sigma0(a) | ||
781 | + ror w16,w22,#6 | ||
782 | + add w25,w25,w19 // h+=K[i] | ||
783 | + eor w0,w22,w22,ror#14 | ||
784 | + and w17,w23,w22 | ||
785 | + bic w19,w24,w22 | ||
786 | + add w25,w25,w13 // h+=X[i] | ||
787 | + orr w17,w17,w19 // Ch(e,f,g) | ||
788 | + eor w19,w26,w27 // a^b, b^c in next round | ||
789 | + eor w16,w16,w0,ror#11 // Sigma1(e) | ||
790 | + ror w0,w26,#2 | ||
791 | + add w25,w25,w17 // h+=Ch(e,f,g) | ||
792 | + eor w17,w26,w26,ror#9 | ||
793 | + add w25,w25,w16 // h+=Sigma1(e) | ||
794 | + and w28,w28,w19 // (b^c)&=(a^b) | ||
795 | + add w21,w21,w25 // d+=h | ||
796 | + eor w28,w28,w27 // Maj(a,b,c) | ||
797 | + eor w17,w0,w17,ror#13 // Sigma0(a) | ||
798 | + add w25,w25,w28 // h+=Maj(a,b,c) | ||
799 | + ldr w28,[x30],#4 // *K++, w19 in next round | ||
800 | + //add w25,w25,w17 // h+=Sigma0(a) | ||
801 | +#ifndef __AARCH64EB__ | ||
802 | + rev w14,w14 // 11 | ||
803 | +#endif | ||
804 | + ldp w15,w0,[x1],#2*4 | ||
805 | + add w25,w25,w17 // h+=Sigma0(a) | ||
806 | + str w6,[sp,#12] | ||
807 | + ror w16,w21,#6 | ||
808 | + add w24,w24,w28 // h+=K[i] | ||
809 | + eor w6,w21,w21,ror#14 | ||
810 | + and w17,w22,w21 | ||
811 | + bic w28,w23,w21 | ||
812 | + add w24,w24,w14 // h+=X[i] | ||
813 | + orr w17,w17,w28 // Ch(e,f,g) | ||
814 | + eor w28,w25,w26 // a^b, b^c in next round | ||
815 | + eor w16,w16,w6,ror#11 // Sigma1(e) | ||
816 | + ror w6,w25,#2 | ||
817 | + add w24,w24,w17 // h+=Ch(e,f,g) | ||
818 | + eor w17,w25,w25,ror#9 | ||
819 | + add w24,w24,w16 // h+=Sigma1(e) | ||
820 | + and w19,w19,w28 // (b^c)&=(a^b) | ||
821 | + add w20,w20,w24 // d+=h | ||
822 | + eor w19,w19,w26 // Maj(a,b,c) | ||
823 | + eor w17,w6,w17,ror#13 // Sigma0(a) | ||
824 | + add w24,w24,w19 // h+=Maj(a,b,c) | ||
825 | + ldr w19,[x30],#4 // *K++, w28 in next round | ||
826 | + //add w24,w24,w17 // h+=Sigma0(a) | ||
827 | +#ifndef __AARCH64EB__ | ||
828 | + rev w15,w15 // 12 | ||
829 | +#endif | ||
830 | + add w24,w24,w17 // h+=Sigma0(a) | ||
831 | + str w7,[sp,#0] | ||
832 | + ror w16,w20,#6 | ||
833 | + add w23,w23,w19 // h+=K[i] | ||
834 | + eor w7,w20,w20,ror#14 | ||
835 | + and w17,w21,w20 | ||
836 | + bic w19,w22,w20 | ||
837 | + add w23,w23,w15 // h+=X[i] | ||
838 | + orr w17,w17,w19 // Ch(e,f,g) | ||
839 | + eor w19,w24,w25 // a^b, b^c in next round | ||
840 | + eor w16,w16,w7,ror#11 // Sigma1(e) | ||
841 | + ror w7,w24,#2 | ||
842 | + add w23,w23,w17 // h+=Ch(e,f,g) | ||
843 | + eor w17,w24,w24,ror#9 | ||
844 | + add w23,w23,w16 // h+=Sigma1(e) | ||
845 | + and w28,w28,w19 // (b^c)&=(a^b) | ||
846 | + add w27,w27,w23 // d+=h | ||
847 | + eor w28,w28,w25 // Maj(a,b,c) | ||
848 | + eor w17,w7,w17,ror#13 // Sigma0(a) | ||
849 | + add w23,w23,w28 // h+=Maj(a,b,c) | ||
850 | + ldr w28,[x30],#4 // *K++, w19 in next round | ||
851 | + //add w23,w23,w17 // h+=Sigma0(a) | ||
852 | +#ifndef __AARCH64EB__ | ||
853 | + rev w0,w0 // 13 | ||
854 | +#endif | ||
855 | + ldp w1,w2,[x1] | ||
856 | + add w23,w23,w17 // h+=Sigma0(a) | ||
857 | + str w8,[sp,#4] | ||
858 | + ror w16,w27,#6 | ||
859 | + add w22,w22,w28 // h+=K[i] | ||
860 | + eor w8,w27,w27,ror#14 | ||
861 | + and w17,w20,w27 | ||
862 | + bic w28,w21,w27 | ||
863 | + add w22,w22,w0 // h+=X[i] | ||
864 | + orr w17,w17,w28 // Ch(e,f,g) | ||
865 | + eor w28,w23,w24 // a^b, b^c in next round | ||
866 | + eor w16,w16,w8,ror#11 // Sigma1(e) | ||
867 | + ror w8,w23,#2 | ||
868 | + add w22,w22,w17 // h+=Ch(e,f,g) | ||
869 | + eor w17,w23,w23,ror#9 | ||
870 | + add w22,w22,w16 // h+=Sigma1(e) | ||
871 | + and w19,w19,w28 // (b^c)&=(a^b) | ||
872 | + add w26,w26,w22 // d+=h | ||
873 | + eor w19,w19,w24 // Maj(a,b,c) | ||
874 | + eor w17,w8,w17,ror#13 // Sigma0(a) | ||
875 | + add w22,w22,w19 // h+=Maj(a,b,c) | ||
876 | + ldr w19,[x30],#4 // *K++, w28 in next round | ||
877 | + //add w22,w22,w17 // h+=Sigma0(a) | ||
878 | +#ifndef __AARCH64EB__ | ||
879 | + rev w1,w1 // 14 | ||
880 | +#endif | ||
881 | + ldr w6,[sp,#12] | ||
882 | + add w22,w22,w17 // h+=Sigma0(a) | ||
883 | + str w9,[sp,#8] | ||
884 | + ror w16,w26,#6 | ||
885 | + add w21,w21,w19 // h+=K[i] | ||
886 | + eor w9,w26,w26,ror#14 | ||
887 | + and w17,w27,w26 | ||
888 | + bic w19,w20,w26 | ||
889 | + add w21,w21,w1 // h+=X[i] | ||
890 | + orr w17,w17,w19 // Ch(e,f,g) | ||
891 | + eor w19,w22,w23 // a^b, b^c in next round | ||
892 | + eor w16,w16,w9,ror#11 // Sigma1(e) | ||
893 | + ror w9,w22,#2 | ||
894 | + add w21,w21,w17 // h+=Ch(e,f,g) | ||
895 | + eor w17,w22,w22,ror#9 | ||
896 | + add w21,w21,w16 // h+=Sigma1(e) | ||
897 | + and w28,w28,w19 // (b^c)&=(a^b) | ||
898 | + add w25,w25,w21 // d+=h | ||
899 | + eor w28,w28,w23 // Maj(a,b,c) | ||
900 | + eor w17,w9,w17,ror#13 // Sigma0(a) | ||
901 | + add w21,w21,w28 // h+=Maj(a,b,c) | ||
902 | + ldr w28,[x30],#4 // *K++, w19 in next round | ||
903 | + //add w21,w21,w17 // h+=Sigma0(a) | ||
904 | +#ifndef __AARCH64EB__ | ||
905 | + rev w2,w2 // 15 | ||
906 | +#endif | ||
907 | + ldr w7,[sp,#0] | ||
908 | + add w21,w21,w17 // h+=Sigma0(a) | ||
909 | + str w10,[sp,#12] | ||
910 | + ror w16,w25,#6 | ||
911 | + add w20,w20,w28 // h+=K[i] | ||
912 | + ror w9,w4,#7 | ||
913 | + and w17,w26,w25 | ||
914 | + ror w8,w1,#17 | ||
915 | + bic w28,w27,w25 | ||
916 | + ror w10,w21,#2 | ||
917 | + add w20,w20,w2 // h+=X[i] | ||
918 | + eor w16,w16,w25,ror#11 | ||
919 | + eor w9,w9,w4,ror#18 | ||
920 | + orr w17,w17,w28 // Ch(e,f,g) | ||
921 | + eor w28,w21,w22 // a^b, b^c in next round | ||
922 | + eor w16,w16,w25,ror#25 // Sigma1(e) | ||
923 | + eor w10,w10,w21,ror#13 | ||
924 | + add w20,w20,w17 // h+=Ch(e,f,g) | ||
925 | + and w19,w19,w28 // (b^c)&=(a^b) | ||
926 | + eor w8,w8,w1,ror#19 | ||
927 | + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) | ||
928 | + add w20,w20,w16 // h+=Sigma1(e) | ||
929 | + eor w19,w19,w22 // Maj(a,b,c) | ||
930 | + eor w17,w10,w21,ror#22 // Sigma0(a) | ||
931 | + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) | ||
932 | + add w3,w3,w12 | ||
933 | + add w24,w24,w20 // d+=h | ||
934 | + add w20,w20,w19 // h+=Maj(a,b,c) | ||
935 | + ldr w19,[x30],#4 // *K++, w28 in next round | ||
936 | + add w3,w3,w9 | ||
937 | + add w20,w20,w17 // h+=Sigma0(a) | ||
938 | + add w3,w3,w8 | ||
939 | +.Loop_16_xx: | ||
940 | + ldr w8,[sp,#4] | ||
941 | + str w11,[sp,#0] | ||
942 | + ror w16,w24,#6 | ||
943 | + add w27,w27,w19 // h+=K[i] | ||
944 | + ror w10,w5,#7 | ||
945 | + and w17,w25,w24 | ||
946 | + ror w9,w2,#17 | ||
947 | + bic w19,w26,w24 | ||
948 | + ror w11,w20,#2 | ||
949 | + add w27,w27,w3 // h+=X[i] | ||
950 | + eor w16,w16,w24,ror#11 | ||
951 | + eor w10,w10,w5,ror#18 | ||
952 | + orr w17,w17,w19 // Ch(e,f,g) | ||
953 | + eor w19,w20,w21 // a^b, b^c in next round | ||
954 | + eor w16,w16,w24,ror#25 // Sigma1(e) | ||
955 | + eor w11,w11,w20,ror#13 | ||
956 | + add w27,w27,w17 // h+=Ch(e,f,g) | ||
957 | + and w28,w28,w19 // (b^c)&=(a^b) | ||
958 | + eor w9,w9,w2,ror#19 | ||
959 | + eor w10,w10,w5,lsr#3 // sigma0(X[i+1]) | ||
960 | + add w27,w27,w16 // h+=Sigma1(e) | ||
961 | + eor w28,w28,w21 // Maj(a,b,c) | ||
962 | + eor w17,w11,w20,ror#22 // Sigma0(a) | ||
963 | + eor w9,w9,w2,lsr#10 // sigma1(X[i+14]) | ||
964 | + add w4,w4,w13 | ||
965 | + add w23,w23,w27 // d+=h | ||
966 | + add w27,w27,w28 // h+=Maj(a,b,c) | ||
967 | + ldr w28,[x30],#4 // *K++, w19 in next round | ||
968 | + add w4,w4,w10 | ||
969 | + add w27,w27,w17 // h+=Sigma0(a) | ||
970 | + add w4,w4,w9 | ||
971 | + ldr w9,[sp,#8] | ||
972 | + str w12,[sp,#4] | ||
973 | + ror w16,w23,#6 | ||
974 | + add w26,w26,w28 // h+=K[i] | ||
975 | + ror w11,w6,#7 | ||
976 | + and w17,w24,w23 | ||
977 | + ror w10,w3,#17 | ||
978 | + bic w28,w25,w23 | ||
979 | + ror w12,w27,#2 | ||
980 | + add w26,w26,w4 // h+=X[i] | ||
981 | + eor w16,w16,w23,ror#11 | ||
982 | + eor w11,w11,w6,ror#18 | ||
983 | + orr w17,w17,w28 // Ch(e,f,g) | ||
984 | + eor w28,w27,w20 // a^b, b^c in next round | ||
985 | + eor w16,w16,w23,ror#25 // Sigma1(e) | ||
986 | + eor w12,w12,w27,ror#13 | ||
987 | + add w26,w26,w17 // h+=Ch(e,f,g) | ||
988 | + and w19,w19,w28 // (b^c)&=(a^b) | ||
989 | + eor w10,w10,w3,ror#19 | ||
990 | + eor w11,w11,w6,lsr#3 // sigma0(X[i+1]) | ||
991 | + add w26,w26,w16 // h+=Sigma1(e) | ||
992 | + eor w19,w19,w20 // Maj(a,b,c) | ||
993 | + eor w17,w12,w27,ror#22 // Sigma0(a) | ||
994 | + eor w10,w10,w3,lsr#10 // sigma1(X[i+14]) | ||
995 | + add w5,w5,w14 | ||
996 | + add w22,w22,w26 // d+=h | ||
997 | + add w26,w26,w19 // h+=Maj(a,b,c) | ||
998 | + ldr w19,[x30],#4 // *K++, w28 in next round | ||
999 | + add w5,w5,w11 | ||
1000 | + add w26,w26,w17 // h+=Sigma0(a) | ||
1001 | + add w5,w5,w10 | ||
1002 | + ldr w10,[sp,#12] | ||
1003 | + str w13,[sp,#8] | ||
1004 | + ror w16,w22,#6 | ||
1005 | + add w25,w25,w19 // h+=K[i] | ||
1006 | + ror w12,w7,#7 | ||
1007 | + and w17,w23,w22 | ||
1008 | + ror w11,w4,#17 | ||
1009 | + bic w19,w24,w22 | ||
1010 | + ror w13,w26,#2 | ||
1011 | + add w25,w25,w5 // h+=X[i] | ||
1012 | + eor w16,w16,w22,ror#11 | ||
1013 | + eor w12,w12,w7,ror#18 | ||
1014 | + orr w17,w17,w19 // Ch(e,f,g) | ||
1015 | + eor w19,w26,w27 // a^b, b^c in next round | ||
1016 | + eor w16,w16,w22,ror#25 // Sigma1(e) | ||
1017 | + eor w13,w13,w26,ror#13 | ||
1018 | + add w25,w25,w17 // h+=Ch(e,f,g) | ||
1019 | + and w28,w28,w19 // (b^c)&=(a^b) | ||
1020 | + eor w11,w11,w4,ror#19 | ||
1021 | + eor w12,w12,w7,lsr#3 // sigma0(X[i+1]) | ||
1022 | + add w25,w25,w16 // h+=Sigma1(e) | ||
1023 | + eor w28,w28,w27 // Maj(a,b,c) | ||
1024 | + eor w17,w13,w26,ror#22 // Sigma0(a) | ||
1025 | + eor w11,w11,w4,lsr#10 // sigma1(X[i+14]) | ||
1026 | + add w6,w6,w15 | ||
1027 | + add w21,w21,w25 // d+=h | ||
1028 | + add w25,w25,w28 // h+=Maj(a,b,c) | ||
1029 | + ldr w28,[x30],#4 // *K++, w19 in next round | ||
1030 | + add w6,w6,w12 | ||
1031 | + add w25,w25,w17 // h+=Sigma0(a) | ||
1032 | + add w6,w6,w11 | ||
1033 | + ldr w11,[sp,#0] | ||
1034 | + str w14,[sp,#12] | ||
1035 | + ror w16,w21,#6 | ||
1036 | + add w24,w24,w28 // h+=K[i] | ||
1037 | + ror w13,w8,#7 | ||
1038 | + and w17,w22,w21 | ||
1039 | + ror w12,w5,#17 | ||
1040 | + bic w28,w23,w21 | ||
1041 | + ror w14,w25,#2 | ||
1042 | + add w24,w24,w6 // h+=X[i] | ||
1043 | + eor w16,w16,w21,ror#11 | ||
1044 | + eor w13,w13,w8,ror#18 | ||
1045 | + orr w17,w17,w28 // Ch(e,f,g) | ||
1046 | + eor w28,w25,w26 // a^b, b^c in next round | ||
1047 | + eor w16,w16,w21,ror#25 // Sigma1(e) | ||
1048 | + eor w14,w14,w25,ror#13 | ||
1049 | + add w24,w24,w17 // h+=Ch(e,f,g) | ||
1050 | + and w19,w19,w28 // (b^c)&=(a^b) | ||
1051 | + eor w12,w12,w5,ror#19 | ||
1052 | + eor w13,w13,w8,lsr#3 // sigma0(X[i+1]) | ||
1053 | + add w24,w24,w16 // h+=Sigma1(e) | ||
1054 | + eor w19,w19,w26 // Maj(a,b,c) | ||
1055 | + eor w17,w14,w25,ror#22 // Sigma0(a) | ||
1056 | + eor w12,w12,w5,lsr#10 // sigma1(X[i+14]) | ||
1057 | + add w7,w7,w0 | ||
1058 | + add w20,w20,w24 // d+=h | ||
1059 | + add w24,w24,w19 // h+=Maj(a,b,c) | ||
1060 | + ldr w19,[x30],#4 // *K++, w28 in next round | ||
1061 | + add w7,w7,w13 | ||
1062 | + add w24,w24,w17 // h+=Sigma0(a) | ||
1063 | + add w7,w7,w12 | ||
1064 | + ldr w12,[sp,#4] | ||
1065 | + str w15,[sp,#0] | ||
1066 | + ror w16,w20,#6 | ||
1067 | + add w23,w23,w19 // h+=K[i] | ||
1068 | + ror w14,w9,#7 | ||
1069 | + and w17,w21,w20 | ||
1070 | + ror w13,w6,#17 | ||
1071 | + bic w19,w22,w20 | ||
1072 | + ror w15,w24,#2 | ||
1073 | + add w23,w23,w7 // h+=X[i] | ||
1074 | + eor w16,w16,w20,ror#11 | ||
1075 | + eor w14,w14,w9,ror#18 | ||
1076 | + orr w17,w17,w19 // Ch(e,f,g) | ||
1077 | + eor w19,w24,w25 // a^b, b^c in next round | ||
1078 | + eor w16,w16,w20,ror#25 // Sigma1(e) | ||
1079 | + eor w15,w15,w24,ror#13 | ||
1080 | + add w23,w23,w17 // h+=Ch(e,f,g) | ||
1081 | + and w28,w28,w19 // (b^c)&=(a^b) | ||
1082 | + eor w13,w13,w6,ror#19 | ||
1083 | + eor w14,w14,w9,lsr#3 // sigma0(X[i+1]) | ||
1084 | + add w23,w23,w16 // h+=Sigma1(e) | ||
1085 | + eor w28,w28,w25 // Maj(a,b,c) | ||
1086 | + eor w17,w15,w24,ror#22 // Sigma0(a) | ||
1087 | + eor w13,w13,w6,lsr#10 // sigma1(X[i+14]) | ||
1088 | + add w8,w8,w1 | ||
1089 | + add w27,w27,w23 // d+=h | ||
1090 | + add w23,w23,w28 // h+=Maj(a,b,c) | ||
1091 | + ldr w28,[x30],#4 // *K++, w19 in next round | ||
1092 | + add w8,w8,w14 | ||
1093 | + add w23,w23,w17 // h+=Sigma0(a) | ||
1094 | + add w8,w8,w13 | ||
1095 | + ldr w13,[sp,#8] | ||
1096 | + str w0,[sp,#4] | ||
1097 | + ror w16,w27,#6 | ||
1098 | + add w22,w22,w28 // h+=K[i] | ||
1099 | + ror w15,w10,#7 | ||
1100 | + and w17,w20,w27 | ||
1101 | + ror w14,w7,#17 | ||
1102 | + bic w28,w21,w27 | ||
1103 | + ror w0,w23,#2 | ||
1104 | + add w22,w22,w8 // h+=X[i] | ||
1105 | + eor w16,w16,w27,ror#11 | ||
1106 | + eor w15,w15,w10,ror#18 | ||
1107 | + orr w17,w17,w28 // Ch(e,f,g) | ||
1108 | + eor w28,w23,w24 // a^b, b^c in next round | ||
1109 | + eor w16,w16,w27,ror#25 // Sigma1(e) | ||
1110 | + eor w0,w0,w23,ror#13 | ||
1111 | + add w22,w22,w17 // h+=Ch(e,f,g) | ||
1112 | + and w19,w19,w28 // (b^c)&=(a^b) | ||
1113 | + eor w14,w14,w7,ror#19 | ||
1114 | + eor w15,w15,w10,lsr#3 // sigma0(X[i+1]) | ||
1115 | + add w22,w22,w16 // h+=Sigma1(e) | ||
1116 | + eor w19,w19,w24 // Maj(a,b,c) | ||
1117 | + eor w17,w0,w23,ror#22 // Sigma0(a) | ||
1118 | + eor w14,w14,w7,lsr#10 // sigma1(X[i+14]) | ||
1119 | + add w9,w9,w2 | ||
1120 | + add w26,w26,w22 // d+=h | ||
1121 | + add w22,w22,w19 // h+=Maj(a,b,c) | ||
1122 | + ldr w19,[x30],#4 // *K++, w28 in next round | ||
1123 | + add w9,w9,w15 | ||
1124 | + add w22,w22,w17 // h+=Sigma0(a) | ||
1125 | + add w9,w9,w14 | ||
1126 | + ldr w14,[sp,#12] | ||
1127 | + str w1,[sp,#8] | ||
1128 | + ror w16,w26,#6 | ||
1129 | + add w21,w21,w19 // h+=K[i] | ||
1130 | + ror w0,w11,#7 | ||
1131 | + and w17,w27,w26 | ||
1132 | + ror w15,w8,#17 | ||
1133 | + bic w19,w20,w26 | ||
1134 | + ror w1,w22,#2 | ||
1135 | + add w21,w21,w9 // h+=X[i] | ||
1136 | + eor w16,w16,w26,ror#11 | ||
1137 | + eor w0,w0,w11,ror#18 | ||
1138 | + orr w17,w17,w19 // Ch(e,f,g) | ||
1139 | + eor w19,w22,w23 // a^b, b^c in next round | ||
1140 | + eor w16,w16,w26,ror#25 // Sigma1(e) | ||
1141 | + eor w1,w1,w22,ror#13 | ||
1142 | + add w21,w21,w17 // h+=Ch(e,f,g) | ||
1143 | + and w28,w28,w19 // (b^c)&=(a^b) | ||
1144 | + eor w15,w15,w8,ror#19 | ||
1145 | + eor w0,w0,w11,lsr#3 // sigma0(X[i+1]) | ||
1146 | + add w21,w21,w16 // h+=Sigma1(e) | ||
1147 | + eor w28,w28,w23 // Maj(a,b,c) | ||
1148 | + eor w17,w1,w22,ror#22 // Sigma0(a) | ||
1149 | + eor w15,w15,w8,lsr#10 // sigma1(X[i+14]) | ||
1150 | + add w10,w10,w3 | ||
1151 | + add w25,w25,w21 // d+=h | ||
1152 | + add w21,w21,w28 // h+=Maj(a,b,c) | ||
1153 | + ldr w28,[x30],#4 // *K++, w19 in next round | ||
1154 | + add w10,w10,w0 | ||
1155 | + add w21,w21,w17 // h+=Sigma0(a) | ||
1156 | + add w10,w10,w15 | ||
1157 | + ldr w15,[sp,#0] | ||
1158 | + str w2,[sp,#12] | ||
1159 | + ror w16,w25,#6 | ||
1160 | + add w20,w20,w28 // h+=K[i] | ||
1161 | + ror w1,w12,#7 | ||
1162 | + and w17,w26,w25 | ||
1163 | + ror w0,w9,#17 | ||
1164 | + bic w28,w27,w25 | ||
1165 | + ror w2,w21,#2 | ||
1166 | + add w20,w20,w10 // h+=X[i] | ||
1167 | + eor w16,w16,w25,ror#11 | ||
1168 | + eor w1,w1,w12,ror#18 | ||
1169 | + orr w17,w17,w28 // Ch(e,f,g) | ||
1170 | + eor w28,w21,w22 // a^b, b^c in next round | ||
1171 | + eor w16,w16,w25,ror#25 // Sigma1(e) | ||
1172 | + eor w2,w2,w21,ror#13 | ||
1173 | + add w20,w20,w17 // h+=Ch(e,f,g) | ||
1174 | + and w19,w19,w28 // (b^c)&=(a^b) | ||
1175 | + eor w0,w0,w9,ror#19 | ||
1176 | + eor w1,w1,w12,lsr#3 // sigma0(X[i+1]) | ||
1177 | + add w20,w20,w16 // h+=Sigma1(e) | ||
1178 | + eor w19,w19,w22 // Maj(a,b,c) | ||
1179 | + eor w17,w2,w21,ror#22 // Sigma0(a) | ||
1180 | + eor w0,w0,w9,lsr#10 // sigma1(X[i+14]) | ||
1181 | + add w11,w11,w4 | ||
1182 | + add w24,w24,w20 // d+=h | ||
1183 | + add w20,w20,w19 // h+=Maj(a,b,c) | ||
1184 | + ldr w19,[x30],#4 // *K++, w28 in next round | ||
1185 | + add w11,w11,w1 | ||
1186 | + add w20,w20,w17 // h+=Sigma0(a) | ||
1187 | + add w11,w11,w0 | ||
1188 | + ldr w0,[sp,#4] | ||
1189 | + str w3,[sp,#0] | ||
1190 | + ror w16,w24,#6 | ||
1191 | + add w27,w27,w19 // h+=K[i] | ||
1192 | + ror w2,w13,#7 | ||
1193 | + and w17,w25,w24 | ||
1194 | + ror w1,w10,#17 | ||
1195 | + bic w19,w26,w24 | ||
1196 | + ror w3,w20,#2 | ||
1197 | + add w27,w27,w11 // h+=X[i] | ||
1198 | + eor w16,w16,w24,ror#11 | ||
1199 | + eor w2,w2,w13,ror#18 | ||
1200 | + orr w17,w17,w19 // Ch(e,f,g) | ||
1201 | + eor w19,w20,w21 // a^b, b^c in next round | ||
1202 | + eor w16,w16,w24,ror#25 // Sigma1(e) | ||
1203 | + eor w3,w3,w20,ror#13 | ||
1204 | + add w27,w27,w17 // h+=Ch(e,f,g) | ||
1205 | + and w28,w28,w19 // (b^c)&=(a^b) | ||
1206 | + eor w1,w1,w10,ror#19 | ||
1207 | + eor w2,w2,w13,lsr#3 // sigma0(X[i+1]) | ||
1208 | + add w27,w27,w16 // h+=Sigma1(e) | ||
1209 | + eor w28,w28,w21 // Maj(a,b,c) | ||
1210 | + eor w17,w3,w20,ror#22 // Sigma0(a) | ||
1211 | + eor w1,w1,w10,lsr#10 // sigma1(X[i+14]) | ||
1212 | + add w12,w12,w5 | ||
1213 | + add w23,w23,w27 // d+=h | ||
1214 | + add w27,w27,w28 // h+=Maj(a,b,c) | ||
1215 | + ldr w28,[x30],#4 // *K++, w19 in next round | ||
1216 | + add w12,w12,w2 | ||
1217 | + add w27,w27,w17 // h+=Sigma0(a) | ||
1218 | + add w12,w12,w1 | ||
1219 | + ldr w1,[sp,#8] | ||
1220 | + str w4,[sp,#4] | ||
1221 | + ror w16,w23,#6 | ||
1222 | + add w26,w26,w28 // h+=K[i] | ||
1223 | + ror w3,w14,#7 | ||
1224 | + and w17,w24,w23 | ||
1225 | + ror w2,w11,#17 | ||
1226 | + bic w28,w25,w23 | ||
1227 | + ror w4,w27,#2 | ||
1228 | + add w26,w26,w12 // h+=X[i] | ||
1229 | + eor w16,w16,w23,ror#11 | ||
1230 | + eor w3,w3,w14,ror#18 | ||
1231 | + orr w17,w17,w28 // Ch(e,f,g) | ||
1232 | + eor w28,w27,w20 // a^b, b^c in next round | ||
1233 | + eor w16,w16,w23,ror#25 // Sigma1(e) | ||
1234 | + eor w4,w4,w27,ror#13 | ||
1235 | + add w26,w26,w17 // h+=Ch(e,f,g) | ||
1236 | + and w19,w19,w28 // (b^c)&=(a^b) | ||
1237 | + eor w2,w2,w11,ror#19 | ||
1238 | + eor w3,w3,w14,lsr#3 // sigma0(X[i+1]) | ||
1239 | + add w26,w26,w16 // h+=Sigma1(e) | ||
1240 | + eor w19,w19,w20 // Maj(a,b,c) | ||
1241 | + eor w17,w4,w27,ror#22 // Sigma0(a) | ||
1242 | + eor w2,w2,w11,lsr#10 // sigma1(X[i+14]) | ||
1243 | + add w13,w13,w6 | ||
1244 | + add w22,w22,w26 // d+=h | ||
1245 | + add w26,w26,w19 // h+=Maj(a,b,c) | ||
1246 | + ldr w19,[x30],#4 // *K++, w28 in next round | ||
1247 | + add w13,w13,w3 | ||
1248 | + add w26,w26,w17 // h+=Sigma0(a) | ||
1249 | + add w13,w13,w2 | ||
1250 | + ldr w2,[sp,#12] | ||
1251 | + str w5,[sp,#8] | ||
1252 | + ror w16,w22,#6 | ||
1253 | + add w25,w25,w19 // h+=K[i] | ||
1254 | + ror w4,w15,#7 | ||
1255 | + and w17,w23,w22 | ||
1256 | + ror w3,w12,#17 | ||
1257 | + bic w19,w24,w22 | ||
1258 | + ror w5,w26,#2 | ||
1259 | + add w25,w25,w13 // h+=X[i] | ||
1260 | + eor w16,w16,w22,ror#11 | ||
1261 | + eor w4,w4,w15,ror#18 | ||
1262 | + orr w17,w17,w19 // Ch(e,f,g) | ||
1263 | + eor w19,w26,w27 // a^b, b^c in next round | ||
1264 | + eor w16,w16,w22,ror#25 // Sigma1(e) | ||
1265 | + eor w5,w5,w26,ror#13 | ||
1266 | + add w25,w25,w17 // h+=Ch(e,f,g) | ||
1267 | + and w28,w28,w19 // (b^c)&=(a^b) | ||
1268 | + eor w3,w3,w12,ror#19 | ||
1269 | + eor w4,w4,w15,lsr#3 // sigma0(X[i+1]) | ||
1270 | + add w25,w25,w16 // h+=Sigma1(e) | ||
1271 | + eor w28,w28,w27 // Maj(a,b,c) | ||
1272 | + eor w17,w5,w26,ror#22 // Sigma0(a) | ||
1273 | + eor w3,w3,w12,lsr#10 // sigma1(X[i+14]) | ||
1274 | + add w14,w14,w7 | ||
1275 | + add w21,w21,w25 // d+=h | ||
1276 | + add w25,w25,w28 // h+=Maj(a,b,c) | ||
1277 | + ldr w28,[x30],#4 // *K++, w19 in next round | ||
1278 | + add w14,w14,w4 | ||
1279 | + add w25,w25,w17 // h+=Sigma0(a) | ||
1280 | + add w14,w14,w3 | ||
1281 | + ldr w3,[sp,#0] | ||
1282 | + str w6,[sp,#12] | ||
1283 | + ror w16,w21,#6 | ||
1284 | + add w24,w24,w28 // h+=K[i] | ||
1285 | + ror w5,w0,#7 | ||
1286 | + and w17,w22,w21 | ||
1287 | + ror w4,w13,#17 | ||
1288 | + bic w28,w23,w21 | ||
1289 | + ror w6,w25,#2 | ||
1290 | + add w24,w24,w14 // h+=X[i] | ||
1291 | + eor w16,w16,w21,ror#11 | ||
1292 | + eor w5,w5,w0,ror#18 | ||
1293 | + orr w17,w17,w28 // Ch(e,f,g) | ||
1294 | + eor w28,w25,w26 // a^b, b^c in next round | ||
1295 | + eor w16,w16,w21,ror#25 // Sigma1(e) | ||
1296 | + eor w6,w6,w25,ror#13 | ||
1297 | + add w24,w24,w17 // h+=Ch(e,f,g) | ||
1298 | + and w19,w19,w28 // (b^c)&=(a^b) | ||
1299 | + eor w4,w4,w13,ror#19 | ||
1300 | + eor w5,w5,w0,lsr#3 // sigma0(X[i+1]) | ||
1301 | + add w24,w24,w16 // h+=Sigma1(e) | ||
1302 | + eor w19,w19,w26 // Maj(a,b,c) | ||
1303 | + eor w17,w6,w25,ror#22 // Sigma0(a) | ||
1304 | + eor w4,w4,w13,lsr#10 // sigma1(X[i+14]) | ||
1305 | + add w15,w15,w8 | ||
1306 | + add w20,w20,w24 // d+=h | ||
1307 | + add w24,w24,w19 // h+=Maj(a,b,c) | ||
1308 | + ldr w19,[x30],#4 // *K++, w28 in next round | ||
1309 | + add w15,w15,w5 | ||
1310 | + add w24,w24,w17 // h+=Sigma0(a) | ||
1311 | + add w15,w15,w4 | ||
1312 | + ldr w4,[sp,#4] | ||
1313 | + str w7,[sp,#0] | ||
1314 | + ror w16,w20,#6 | ||
1315 | + add w23,w23,w19 // h+=K[i] | ||
1316 | + ror w6,w1,#7 | ||
1317 | + and w17,w21,w20 | ||
1318 | + ror w5,w14,#17 | ||
1319 | + bic w19,w22,w20 | ||
1320 | + ror w7,w24,#2 | ||
1321 | + add w23,w23,w15 // h+=X[i] | ||
1322 | + eor w16,w16,w20,ror#11 | ||
1323 | + eor w6,w6,w1,ror#18 | ||
1324 | + orr w17,w17,w19 // Ch(e,f,g) | ||
1325 | + eor w19,w24,w25 // a^b, b^c in next round | ||
1326 | + eor w16,w16,w20,ror#25 // Sigma1(e) | ||
1327 | + eor w7,w7,w24,ror#13 | ||
1328 | + add w23,w23,w17 // h+=Ch(e,f,g) | ||
1329 | + and w28,w28,w19 // (b^c)&=(a^b) | ||
1330 | + eor w5,w5,w14,ror#19 | ||
1331 | + eor w6,w6,w1,lsr#3 // sigma0(X[i+1]) | ||
1332 | + add w23,w23,w16 // h+=Sigma1(e) | ||
1333 | + eor w28,w28,w25 // Maj(a,b,c) | ||
1334 | + eor w17,w7,w24,ror#22 // Sigma0(a) | ||
1335 | + eor w5,w5,w14,lsr#10 // sigma1(X[i+14]) | ||
1336 | + add w0,w0,w9 | ||
1337 | + add w27,w27,w23 // d+=h | ||
1338 | + add w23,w23,w28 // h+=Maj(a,b,c) | ||
1339 | + ldr w28,[x30],#4 // *K++, w19 in next round | ||
1340 | + add w0,w0,w6 | ||
1341 | + add w23,w23,w17 // h+=Sigma0(a) | ||
1342 | + add w0,w0,w5 | ||
1343 | + ldr w5,[sp,#8] | ||
1344 | + str w8,[sp,#4] | ||
1345 | + ror w16,w27,#6 | ||
1346 | + add w22,w22,w28 // h+=K[i] | ||
1347 | + ror w7,w2,#7 | ||
1348 | + and w17,w20,w27 | ||
1349 | + ror w6,w15,#17 | ||
1350 | + bic w28,w21,w27 | ||
1351 | + ror w8,w23,#2 | ||
1352 | + add w22,w22,w0 // h+=X[i] | ||
1353 | + eor w16,w16,w27,ror#11 | ||
1354 | + eor w7,w7,w2,ror#18 | ||
1355 | + orr w17,w17,w28 // Ch(e,f,g) | ||
1356 | + eor w28,w23,w24 // a^b, b^c in next round | ||
1357 | + eor w16,w16,w27,ror#25 // Sigma1(e) | ||
1358 | + eor w8,w8,w23,ror#13 | ||
1359 | + add w22,w22,w17 // h+=Ch(e,f,g) | ||
1360 | + and w19,w19,w28 // (b^c)&=(a^b) | ||
1361 | + eor w6,w6,w15,ror#19 | ||
1362 | + eor w7,w7,w2,lsr#3 // sigma0(X[i+1]) | ||
1363 | + add w22,w22,w16 // h+=Sigma1(e) | ||
1364 | + eor w19,w19,w24 // Maj(a,b,c) | ||
1365 | + eor w17,w8,w23,ror#22 // Sigma0(a) | ||
1366 | + eor w6,w6,w15,lsr#10 // sigma1(X[i+14]) | ||
1367 | + add w1,w1,w10 | ||
1368 | + add w26,w26,w22 // d+=h | ||
1369 | + add w22,w22,w19 // h+=Maj(a,b,c) | ||
1370 | + ldr w19,[x30],#4 // *K++, w28 in next round | ||
1371 | + add w1,w1,w7 | ||
1372 | + add w22,w22,w17 // h+=Sigma0(a) | ||
1373 | + add w1,w1,w6 | ||
1374 | + ldr w6,[sp,#12] | ||
1375 | + str w9,[sp,#8] | ||
1376 | + ror w16,w26,#6 | ||
1377 | + add w21,w21,w19 // h+=K[i] | ||
1378 | + ror w8,w3,#7 | ||
1379 | + and w17,w27,w26 | ||
1380 | + ror w7,w0,#17 | ||
1381 | + bic w19,w20,w26 | ||
1382 | + ror w9,w22,#2 | ||
1383 | + add w21,w21,w1 // h+=X[i] | ||
1384 | + eor w16,w16,w26,ror#11 | ||
1385 | + eor w8,w8,w3,ror#18 | ||
1386 | + orr w17,w17,w19 // Ch(e,f,g) | ||
1387 | + eor w19,w22,w23 // a^b, b^c in next round | ||
1388 | + eor w16,w16,w26,ror#25 // Sigma1(e) | ||
1389 | + eor w9,w9,w22,ror#13 | ||
1390 | + add w21,w21,w17 // h+=Ch(e,f,g) | ||
1391 | + and w28,w28,w19 // (b^c)&=(a^b) | ||
1392 | + eor w7,w7,w0,ror#19 | ||
1393 | + eor w8,w8,w3,lsr#3 // sigma0(X[i+1]) | ||
1394 | + add w21,w21,w16 // h+=Sigma1(e) | ||
1395 | + eor w28,w28,w23 // Maj(a,b,c) | ||
1396 | + eor w17,w9,w22,ror#22 // Sigma0(a) | ||
1397 | + eor w7,w7,w0,lsr#10 // sigma1(X[i+14]) | ||
1398 | + add w2,w2,w11 | ||
1399 | + add w25,w25,w21 // d+=h | ||
1400 | + add w21,w21,w28 // h+=Maj(a,b,c) | ||
1401 | + ldr w28,[x30],#4 // *K++, w19 in next round | ||
1402 | + add w2,w2,w8 | ||
1403 | + add w21,w21,w17 // h+=Sigma0(a) | ||
1404 | + add w2,w2,w7 | ||
1405 | + ldr w7,[sp,#0] | ||
1406 | + str w10,[sp,#12] | ||
1407 | + ror w16,w25,#6 | ||
1408 | + add w20,w20,w28 // h+=K[i] | ||
1409 | + ror w9,w4,#7 | ||
1410 | + and w17,w26,w25 | ||
1411 | + ror w8,w1,#17 | ||
1412 | + bic w28,w27,w25 | ||
1413 | + ror w10,w21,#2 | ||
1414 | + add w20,w20,w2 // h+=X[i] | ||
1415 | + eor w16,w16,w25,ror#11 | ||
1416 | + eor w9,w9,w4,ror#18 | ||
1417 | + orr w17,w17,w28 // Ch(e,f,g) | ||
1418 | + eor w28,w21,w22 // a^b, b^c in next round | ||
1419 | + eor w16,w16,w25,ror#25 // Sigma1(e) | ||
1420 | + eor w10,w10,w21,ror#13 | ||
1421 | + add w20,w20,w17 // h+=Ch(e,f,g) | ||
1422 | + and w19,w19,w28 // (b^c)&=(a^b) | ||
1423 | + eor w8,w8,w1,ror#19 | ||
1424 | + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) | ||
1425 | + add w20,w20,w16 // h+=Sigma1(e) | ||
1426 | + eor w19,w19,w22 // Maj(a,b,c) | ||
1427 | + eor w17,w10,w21,ror#22 // Sigma0(a) | ||
1428 | + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) | ||
1429 | + add w3,w3,w12 | ||
1430 | + add w24,w24,w20 // d+=h | ||
1431 | + add w20,w20,w19 // h+=Maj(a,b,c) | ||
1432 | + ldr w19,[x30],#4 // *K++, w28 in next round | ||
1433 | + add w3,w3,w9 | ||
1434 | + add w20,w20,w17 // h+=Sigma0(a) | ||
1435 | + add w3,w3,w8 | ||
1436 | + cbnz w19,.Loop_16_xx | ||
1437 | + | ||
1438 | + ldp x0,x2,[x29,#96] | ||
1439 | + ldr x1,[x29,#112] | ||
1440 | + sub x30,x30,#260 // rewind | ||
1441 | + | ||
1442 | + ldp w3,w4,[x0] | ||
1443 | + ldp w5,w6,[x0,#2*4] | ||
1444 | + add x1,x1,#14*4 // advance input pointer | ||
1445 | + ldp w7,w8,[x0,#4*4] | ||
1446 | + add w20,w20,w3 | ||
1447 | + ldp w9,w10,[x0,#6*4] | ||
1448 | + add w21,w21,w4 | ||
1449 | + add w22,w22,w5 | ||
1450 | + add w23,w23,w6 | ||
1451 | + stp w20,w21,[x0] | ||
1452 | + add w24,w24,w7 | ||
1453 | + add w25,w25,w8 | ||
1454 | + stp w22,w23,[x0,#2*4] | ||
1455 | + add w26,w26,w9 | ||
1456 | + add w27,w27,w10 | ||
1457 | + cmp x1,x2 | ||
1458 | + stp w24,w25,[x0,#4*4] | ||
1459 | + stp w26,w27,[x0,#6*4] | ||
1460 | + b.ne .Loop | ||
1461 | + | ||
1462 | + ldp x19,x20,[x29,#16] | ||
1463 | + add sp,sp,#4*4 | ||
1464 | + ldp x21,x22,[x29,#32] | ||
1465 | + ldp x23,x24,[x29,#48] | ||
1466 | + ldp x25,x26,[x29,#64] | ||
1467 | + ldp x27,x28,[x29,#80] | ||
1468 | + ldp x29,x30,[sp],#128 | ||
1469 | + ret | ||
1470 | +.size sha256_block_data_order,.-sha256_block_data_order | ||
1471 | + | ||
1472 | +.align 6 | ||
1473 | +.type .LK256,%object | ||
1474 | +.LK256: | ||
1475 | + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | ||
1476 | + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | ||
1477 | + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | ||
1478 | + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | ||
1479 | + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | ||
1480 | + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | ||
1481 | + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | ||
1482 | + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | ||
1483 | + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | ||
1484 | + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | ||
1485 | + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | ||
1486 | + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | ||
1487 | + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | ||
1488 | + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | ||
1489 | + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | ||
1490 | + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | ||
1491 | + .long 0 //terminator | ||
1492 | +.size .LK256,.-.LK256 | ||
1493 | +#ifndef __KERNEL__ | ||
1494 | +.align 3 | ||
1495 | +.LOPENSSL_armcap_P: | ||
1496 | +# ifdef __ILP32__ | ||
1497 | + .long OPENSSL_armcap_P-. | ||
1498 | +# else | ||
1499 | + .quad OPENSSL_armcap_P-. | ||
1500 | +# endif | ||
1501 | +#endif | ||
1502 | +.asciz "SHA256 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>" | ||
1503 | +.align 2 | ||
1504 | +#ifndef __KERNEL__ | ||
1505 | +.type sha256_block_armv8,%function | ||
1506 | +.align 6 | ||
1507 | +sha256_block_armv8: | ||
1508 | +.Lv8_entry: | ||
1509 | + stp x29,x30,[sp,#-16]! | ||
1510 | + add x29,sp,#0 | ||
1511 | + | ||
1512 | + ld1 {v0.4s,v1.4s},[x0] | ||
1513 | + adr x3,.LK256 | ||
1514 | + | ||
1515 | +.Loop_hw: | ||
1516 | + ld1 {v4.16b-v7.16b},[x1],#64 | ||
1517 | + sub x2,x2,#1 | ||
1518 | + ld1 {v16.4s},[x3],#16 | ||
1519 | + rev32 v4.16b,v4.16b | ||
1520 | + rev32 v5.16b,v5.16b | ||
1521 | + rev32 v6.16b,v6.16b | ||
1522 | + rev32 v7.16b,v7.16b | ||
1523 | + orr v18.16b,v0.16b,v0.16b // offload | ||
1524 | + orr v19.16b,v1.16b,v1.16b | ||
1525 | + ld1 {v17.4s},[x3],#16 | ||
1526 | + add v16.4s,v16.4s,v4.4s | ||
1527 | + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b | ||
1528 | + orr v2.16b,v0.16b,v0.16b | ||
1529 | + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s | ||
1530 | + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s | ||
1531 | + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b | ||
1532 | + ld1 {v16.4s},[x3],#16 | ||
1533 | + add v17.4s,v17.4s,v5.4s | ||
1534 | + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b | ||
1535 | + orr v2.16b,v0.16b,v0.16b | ||
1536 | + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s | ||
1537 | + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s | ||
1538 | + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b | ||
1539 | + ld1 {v17.4s},[x3],#16 | ||
1540 | + add v16.4s,v16.4s,v6.4s | ||
1541 | + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b | ||
1542 | + orr v2.16b,v0.16b,v0.16b | ||
1543 | + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s | ||
1544 | + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s | ||
1545 | + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b | ||
1546 | + ld1 {v16.4s},[x3],#16 | ||
1547 | + add v17.4s,v17.4s,v7.4s | ||
1548 | + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b | ||
1549 | + orr v2.16b,v0.16b,v0.16b | ||
1550 | + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s | ||
1551 | + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s | ||
1552 | + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b | ||
1553 | + ld1 {v17.4s},[x3],#16 | ||
1554 | + add v16.4s,v16.4s,v4.4s | ||
1555 | + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b | ||
1556 | + orr v2.16b,v0.16b,v0.16b | ||
1557 | + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s | ||
1558 | + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s | ||
1559 | + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b | ||
1560 | + ld1 {v16.4s},[x3],#16 | ||
1561 | + add v17.4s,v17.4s,v5.4s | ||
1562 | + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b | ||
1563 | + orr v2.16b,v0.16b,v0.16b | ||
1564 | + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s | ||
1565 | + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s | ||
1566 | + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b | ||
1567 | + ld1 {v17.4s},[x3],#16 | ||
1568 | + add v16.4s,v16.4s,v6.4s | ||
1569 | + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b | ||
1570 | + orr v2.16b,v0.16b,v0.16b | ||
1571 | + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s | ||
1572 | + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s | ||
1573 | + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b | ||
1574 | + ld1 {v16.4s},[x3],#16 | ||
1575 | + add v17.4s,v17.4s,v7.4s | ||
1576 | + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b | ||
1577 | + orr v2.16b,v0.16b,v0.16b | ||
1578 | + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s | ||
1579 | + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s | ||
1580 | + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b | ||
1581 | + ld1 {v17.4s},[x3],#16 | ||
1582 | + add v16.4s,v16.4s,v4.4s | ||
1583 | + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b | ||
1584 | + orr v2.16b,v0.16b,v0.16b | ||
1585 | + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s | ||
1586 | + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s | ||
1587 | + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b | ||
1588 | + ld1 {v16.4s},[x3],#16 | ||
1589 | + add v17.4s,v17.4s,v5.4s | ||
1590 | + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b | ||
1591 | + orr v2.16b,v0.16b,v0.16b | ||
1592 | + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s | ||
1593 | + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s | ||
1594 | + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b | ||
1595 | + ld1 {v17.4s},[x3],#16 | ||
1596 | + add v16.4s,v16.4s,v6.4s | ||
1597 | + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b | ||
1598 | + orr v2.16b,v0.16b,v0.16b | ||
1599 | + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s | ||
1600 | + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s | ||
1601 | + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b | ||
1602 | + ld1 {v16.4s},[x3],#16 | ||
1603 | + add v17.4s,v17.4s,v7.4s | ||
1604 | + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b | ||
1605 | + orr v2.16b,v0.16b,v0.16b | ||
1606 | + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s | ||
1607 | + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s | ||
1608 | + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b | ||
1609 | + ld1 {v17.4s},[x3],#16 | ||
1610 | + add v16.4s,v16.4s,v4.4s | ||
1611 | + orr v2.16b,v0.16b,v0.16b | ||
1612 | + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s | ||
1613 | + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s | ||
1614 | + | ||
1615 | + ld1 {v16.4s},[x3],#16 | ||
1616 | + add v17.4s,v17.4s,v5.4s | ||
1617 | + orr v2.16b,v0.16b,v0.16b | ||
1618 | + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s | ||
1619 | + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s | ||
1620 | + | ||
1621 | + ld1 {v17.4s},[x3] | ||
1622 | + add v16.4s,v16.4s,v6.4s | ||
1623 | + sub x3,x3,#64*4-16 // rewind | ||
1624 | + orr v2.16b,v0.16b,v0.16b | ||
1625 | + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s | ||
1626 | + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s | ||
1627 | + | ||
1628 | + add v17.4s,v17.4s,v7.4s | ||
1629 | + orr v2.16b,v0.16b,v0.16b | ||
1630 | + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s | ||
1631 | + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s | ||
1632 | + | ||
1633 | + add v0.4s,v0.4s,v18.4s | ||
1634 | + add v1.4s,v1.4s,v19.4s | ||
1635 | + | ||
1636 | + cbnz x2,.Loop_hw | ||
1637 | + | ||
1638 | + st1 {v0.4s,v1.4s},[x0] | ||
1639 | + | ||
1640 | + ldr x29,[sp],#16 | ||
1641 | + ret | ||
1642 | +.size sha256_block_armv8,.-sha256_block_armv8 | ||
1643 | +#endif | ||
1644 | +#ifdef __KERNEL__ | ||
1645 | +.globl sha256_block_neon | ||
1646 | +#endif | ||
1647 | +.type sha256_block_neon,%function | ||
1648 | +.align 4 | ||
1649 | +sha256_block_neon: | ||
1650 | +.Lneon_entry: | ||
1651 | + stp x29, x30, [sp, #-16]! | ||
1652 | + mov x29, sp | ||
1653 | + sub sp,sp,#16*4 | ||
1654 | + | ||
1655 | + adr x16,.LK256 | ||
1656 | + add x2,x1,x2,lsl#6 // len to point at the end of inp | ||
1657 | + | ||
1658 | + ld1 {v0.16b},[x1], #16 | ||
1659 | + ld1 {v1.16b},[x1], #16 | ||
1660 | + ld1 {v2.16b},[x1], #16 | ||
1661 | + ld1 {v3.16b},[x1], #16 | ||
1662 | + ld1 {v4.4s},[x16], #16 | ||
1663 | + ld1 {v5.4s},[x16], #16 | ||
1664 | + ld1 {v6.4s},[x16], #16 | ||
1665 | + ld1 {v7.4s},[x16], #16 | ||
1666 | + rev32 v0.16b,v0.16b // yes, even on | ||
1667 | + rev32 v1.16b,v1.16b // big-endian | ||
1668 | + rev32 v2.16b,v2.16b | ||
1669 | + rev32 v3.16b,v3.16b | ||
1670 | + mov x17,sp | ||
1671 | + add v4.4s,v4.4s,v0.4s | ||
1672 | + add v5.4s,v5.4s,v1.4s | ||
1673 | + add v6.4s,v6.4s,v2.4s | ||
1674 | + st1 {v4.4s-v5.4s},[x17], #32 | ||
1675 | + add v7.4s,v7.4s,v3.4s | ||
1676 | + st1 {v6.4s-v7.4s},[x17] | ||
1677 | + sub x17,x17,#32 | ||
1678 | + | ||
1679 | + ldp w3,w4,[x0] | ||
1680 | + ldp w5,w6,[x0,#8] | ||
1681 | + ldp w7,w8,[x0,#16] | ||
1682 | + ldp w9,w10,[x0,#24] | ||
1683 | + ldr w12,[sp,#0] | ||
1684 | + mov w13,wzr | ||
1685 | + eor w14,w4,w5 | ||
1686 | + mov w15,wzr | ||
1687 | + b .L_00_48 | ||
1688 | + | ||
1689 | +.align 4 | ||
1690 | +.L_00_48: | ||
1691 | + ext v4.16b,v0.16b,v1.16b,#4 | ||
1692 | + add w10,w10,w12 | ||
1693 | + add w3,w3,w15 | ||
1694 | + and w12,w8,w7 | ||
1695 | + bic w15,w9,w7 | ||
1696 | + ext v7.16b,v2.16b,v3.16b,#4 | ||
1697 | + eor w11,w7,w7,ror#5 | ||
1698 | + add w3,w3,w13 | ||
1699 | + mov d19,v3.d[1] | ||
1700 | + orr w12,w12,w15 | ||
1701 | + eor w11,w11,w7,ror#19 | ||
1702 | + ushr v6.4s,v4.4s,#7 | ||
1703 | + eor w15,w3,w3,ror#11 | ||
1704 | + ushr v5.4s,v4.4s,#3 | ||
1705 | + add w10,w10,w12 | ||
1706 | + add v0.4s,v0.4s,v7.4s | ||
1707 | + ror w11,w11,#6 | ||
1708 | + sli v6.4s,v4.4s,#25 | ||
1709 | + eor w13,w3,w4 | ||
1710 | + eor w15,w15,w3,ror#20 | ||
1711 | + ushr v7.4s,v4.4s,#18 | ||
1712 | + add w10,w10,w11 | ||
1713 | + ldr w12,[sp,#4] | ||
1714 | + and w14,w14,w13 | ||
1715 | + eor v5.16b,v5.16b,v6.16b | ||
1716 | + ror w15,w15,#2 | ||
1717 | + add w6,w6,w10 | ||
1718 | + sli v7.4s,v4.4s,#14 | ||
1719 | + eor w14,w14,w4 | ||
1720 | + ushr v16.4s,v19.4s,#17 | ||
1721 | + add w9,w9,w12 | ||
1722 | + add w10,w10,w15 | ||
1723 | + and w12,w7,w6 | ||
1724 | + eor v5.16b,v5.16b,v7.16b | ||
1725 | + bic w15,w8,w6 | ||
1726 | + eor w11,w6,w6,ror#5 | ||
1727 | + sli v16.4s,v19.4s,#15 | ||
1728 | + add w10,w10,w14 | ||
1729 | + orr w12,w12,w15 | ||
1730 | + ushr v17.4s,v19.4s,#10 | ||
1731 | + eor w11,w11,w6,ror#19 | ||
1732 | + eor w15,w10,w10,ror#11 | ||
1733 | + ushr v7.4s,v19.4s,#19 | ||
1734 | + add w9,w9,w12 | ||
1735 | + ror w11,w11,#6 | ||
1736 | + add v0.4s,v0.4s,v5.4s | ||
1737 | + eor w14,w10,w3 | ||
1738 | + eor w15,w15,w10,ror#20 | ||
1739 | + sli v7.4s,v19.4s,#13 | ||
1740 | + add w9,w9,w11 | ||
1741 | + ldr w12,[sp,#8] | ||
1742 | + and w13,w13,w14 | ||
1743 | + eor v17.16b,v17.16b,v16.16b | ||
1744 | + ror w15,w15,#2 | ||
1745 | + add w5,w5,w9 | ||
1746 | + eor w13,w13,w3 | ||
1747 | + eor v17.16b,v17.16b,v7.16b | ||
1748 | + add w8,w8,w12 | ||
1749 | + add w9,w9,w15 | ||
1750 | + and w12,w6,w5 | ||
1751 | + add v0.4s,v0.4s,v17.4s | ||
1752 | + bic w15,w7,w5 | ||
1753 | + eor w11,w5,w5,ror#5 | ||
1754 | + add w9,w9,w13 | ||
1755 | + ushr v18.4s,v0.4s,#17 | ||
1756 | + orr w12,w12,w15 | ||
1757 | + ushr v19.4s,v0.4s,#10 | ||
1758 | + eor w11,w11,w5,ror#19 | ||
1759 | + eor w15,w9,w9,ror#11 | ||
1760 | + sli v18.4s,v0.4s,#15 | ||
1761 | + add w8,w8,w12 | ||
1762 | + ushr v17.4s,v0.4s,#19 | ||
1763 | + ror w11,w11,#6 | ||
1764 | + eor w13,w9,w10 | ||
1765 | + eor v19.16b,v19.16b,v18.16b | ||
1766 | + eor w15,w15,w9,ror#20 | ||
1767 | + add w8,w8,w11 | ||
1768 | + sli v17.4s,v0.4s,#13 | ||
1769 | + ldr w12,[sp,#12] | ||
1770 | + and w14,w14,w13 | ||
1771 | + ror w15,w15,#2 | ||
1772 | + ld1 {v4.4s},[x16], #16 | ||
1773 | + add w4,w4,w8 | ||
1774 | + eor v19.16b,v19.16b,v17.16b | ||
1775 | + eor w14,w14,w10 | ||
1776 | + eor v17.16b,v17.16b,v17.16b | ||
1777 | + add w7,w7,w12 | ||
1778 | + add w8,w8,w15 | ||
1779 | + and w12,w5,w4 | ||
1780 | + mov v17.d[1],v19.d[0] | ||
1781 | + bic w15,w6,w4 | ||
1782 | + eor w11,w4,w4,ror#5 | ||
1783 | + add w8,w8,w14 | ||
1784 | + add v0.4s,v0.4s,v17.4s | ||
1785 | + orr w12,w12,w15 | ||
1786 | + eor w11,w11,w4,ror#19 | ||
1787 | + eor w15,w8,w8,ror#11 | ||
1788 | + add v4.4s,v4.4s,v0.4s | ||
1789 | + add w7,w7,w12 | ||
1790 | + ror w11,w11,#6 | ||
1791 | + eor w14,w8,w9 | ||
1792 | + eor w15,w15,w8,ror#20 | ||
1793 | + add w7,w7,w11 | ||
1794 | + ldr w12,[sp,#16] | ||
1795 | + and w13,w13,w14 | ||
1796 | + ror w15,w15,#2 | ||
1797 | + add w3,w3,w7 | ||
1798 | + eor w13,w13,w9 | ||
1799 | + st1 {v4.4s},[x17], #16 | ||
1800 | + ext v4.16b,v1.16b,v2.16b,#4 | ||
1801 | + add w6,w6,w12 | ||
1802 | + add w7,w7,w15 | ||
1803 | + and w12,w4,w3 | ||
1804 | + bic w15,w5,w3 | ||
1805 | + ext v7.16b,v3.16b,v0.16b,#4 | ||
1806 | + eor w11,w3,w3,ror#5 | ||
1807 | + add w7,w7,w13 | ||
1808 | + mov d19,v0.d[1] | ||
1809 | + orr w12,w12,w15 | ||
1810 | + eor w11,w11,w3,ror#19 | ||
1811 | + ushr v6.4s,v4.4s,#7 | ||
1812 | + eor w15,w7,w7,ror#11 | ||
1813 | + ushr v5.4s,v4.4s,#3 | ||
1814 | + add w6,w6,w12 | ||
1815 | + add v1.4s,v1.4s,v7.4s | ||
1816 | + ror w11,w11,#6 | ||
1817 | + sli v6.4s,v4.4s,#25 | ||
1818 | + eor w13,w7,w8 | ||
1819 | + eor w15,w15,w7,ror#20 | ||
1820 | + ushr v7.4s,v4.4s,#18 | ||
1821 | + add w6,w6,w11 | ||
1822 | + ldr w12,[sp,#20] | ||
1823 | + and w14,w14,w13 | ||
1824 | + eor v5.16b,v5.16b,v6.16b | ||
1825 | + ror w15,w15,#2 | ||
1826 | + add w10,w10,w6 | ||
1827 | + sli v7.4s,v4.4s,#14 | ||
1828 | + eor w14,w14,w8 | ||
1829 | + ushr v16.4s,v19.4s,#17 | ||
1830 | + add w5,w5,w12 | ||
1831 | + add w6,w6,w15 | ||
1832 | + and w12,w3,w10 | ||
1833 | + eor v5.16b,v5.16b,v7.16b | ||
1834 | + bic w15,w4,w10 | ||
1835 | + eor w11,w10,w10,ror#5 | ||
1836 | + sli v16.4s,v19.4s,#15 | ||
1837 | + add w6,w6,w14 | ||
1838 | + orr w12,w12,w15 | ||
1839 | + ushr v17.4s,v19.4s,#10 | ||
1840 | + eor w11,w11,w10,ror#19 | ||
1841 | + eor w15,w6,w6,ror#11 | ||
1842 | + ushr v7.4s,v19.4s,#19 | ||
1843 | + add w5,w5,w12 | ||
1844 | + ror w11,w11,#6 | ||
1845 | + add v1.4s,v1.4s,v5.4s | ||
1846 | + eor w14,w6,w7 | ||
1847 | + eor w15,w15,w6,ror#20 | ||
1848 | + sli v7.4s,v19.4s,#13 | ||
1849 | + add w5,w5,w11 | ||
1850 | + ldr w12,[sp,#24] | ||
1851 | + and w13,w13,w14 | ||
1852 | + eor v17.16b,v17.16b,v16.16b | ||
1853 | + ror w15,w15,#2 | ||
1854 | + add w9,w9,w5 | ||
1855 | + eor w13,w13,w7 | ||
1856 | + eor v17.16b,v17.16b,v7.16b | ||
1857 | + add w4,w4,w12 | ||
1858 | + add w5,w5,w15 | ||
1859 | + and w12,w10,w9 | ||
1860 | + add v1.4s,v1.4s,v17.4s | ||
1861 | + bic w15,w3,w9 | ||
1862 | + eor w11,w9,w9,ror#5 | ||
1863 | + add w5,w5,w13 | ||
1864 | + ushr v18.4s,v1.4s,#17 | ||
1865 | + orr w12,w12,w15 | ||
1866 | + ushr v19.4s,v1.4s,#10 | ||
1867 | + eor w11,w11,w9,ror#19 | ||
1868 | + eor w15,w5,w5,ror#11 | ||
1869 | + sli v18.4s,v1.4s,#15 | ||
1870 | + add w4,w4,w12 | ||
1871 | + ushr v17.4s,v1.4s,#19 | ||
1872 | + ror w11,w11,#6 | ||
1873 | + eor w13,w5,w6 | ||
1874 | + eor v19.16b,v19.16b,v18.16b | ||
1875 | + eor w15,w15,w5,ror#20 | ||
1876 | + add w4,w4,w11 | ||
1877 | + sli v17.4s,v1.4s,#13 | ||
1878 | + ldr w12,[sp,#28] | ||
1879 | + and w14,w14,w13 | ||
1880 | + ror w15,w15,#2 | ||
1881 | + ld1 {v4.4s},[x16], #16 | ||
1882 | + add w8,w8,w4 | ||
1883 | + eor v19.16b,v19.16b,v17.16b | ||
1884 | + eor w14,w14,w6 | ||
1885 | + eor v17.16b,v17.16b,v17.16b | ||
1886 | + add w3,w3,w12 | ||
1887 | + add w4,w4,w15 | ||
1888 | + and w12,w9,w8 | ||
1889 | + mov v17.d[1],v19.d[0] | ||
1890 | + bic w15,w10,w8 | ||
1891 | + eor w11,w8,w8,ror#5 | ||
1892 | + add w4,w4,w14 | ||
1893 | + add v1.4s,v1.4s,v17.4s | ||
1894 | + orr w12,w12,w15 | ||
1895 | + eor w11,w11,w8,ror#19 | ||
1896 | + eor w15,w4,w4,ror#11 | ||
1897 | + add v4.4s,v4.4s,v1.4s | ||
1898 | + add w3,w3,w12 | ||
1899 | + ror w11,w11,#6 | ||
1900 | + eor w14,w4,w5 | ||
1901 | + eor w15,w15,w4,ror#20 | ||
1902 | + add w3,w3,w11 | ||
1903 | + ldr w12,[sp,#32] | ||
1904 | + and w13,w13,w14 | ||
1905 | + ror w15,w15,#2 | ||
1906 | + add w7,w7,w3 | ||
1907 | + eor w13,w13,w5 | ||
1908 | + st1 {v4.4s},[x17], #16 | ||
1909 | + ext v4.16b,v2.16b,v3.16b,#4 | ||
1910 | + add w10,w10,w12 | ||
1911 | + add w3,w3,w15 | ||
1912 | + and w12,w8,w7 | ||
1913 | + bic w15,w9,w7 | ||
1914 | + ext v7.16b,v0.16b,v1.16b,#4 | ||
1915 | + eor w11,w7,w7,ror#5 | ||
1916 | + add w3,w3,w13 | ||
1917 | + mov d19,v1.d[1] | ||
1918 | + orr w12,w12,w15 | ||
1919 | + eor w11,w11,w7,ror#19 | ||
1920 | + ushr v6.4s,v4.4s,#7 | ||
1921 | + eor w15,w3,w3,ror#11 | ||
1922 | + ushr v5.4s,v4.4s,#3 | ||
1923 | + add w10,w10,w12 | ||
1924 | + add v2.4s,v2.4s,v7.4s | ||
1925 | + ror w11,w11,#6 | ||
1926 | + sli v6.4s,v4.4s,#25 | ||
1927 | + eor w13,w3,w4 | ||
1928 | + eor w15,w15,w3,ror#20 | ||
1929 | + ushr v7.4s,v4.4s,#18 | ||
1930 | + add w10,w10,w11 | ||
1931 | + ldr w12,[sp,#36] | ||
1932 | + and w14,w14,w13 | ||
1933 | + eor v5.16b,v5.16b,v6.16b | ||
1934 | + ror w15,w15,#2 | ||
1935 | + add w6,w6,w10 | ||
1936 | + sli v7.4s,v4.4s,#14 | ||
1937 | + eor w14,w14,w4 | ||
1938 | + ushr v16.4s,v19.4s,#17 | ||
1939 | + add w9,w9,w12 | ||
1940 | + add w10,w10,w15 | ||
1941 | + and w12,w7,w6 | ||
1942 | + eor v5.16b,v5.16b,v7.16b | ||
1943 | + bic w15,w8,w6 | ||
1944 | + eor w11,w6,w6,ror#5 | ||
1945 | + sli v16.4s,v19.4s,#15 | ||
1946 | + add w10,w10,w14 | ||
1947 | + orr w12,w12,w15 | ||
1948 | + ushr v17.4s,v19.4s,#10 | ||
1949 | + eor w11,w11,w6,ror#19 | ||
1950 | + eor w15,w10,w10,ror#11 | ||
1951 | + ushr v7.4s,v19.4s,#19 | ||
1952 | + add w9,w9,w12 | ||
1953 | + ror w11,w11,#6 | ||
1954 | + add v2.4s,v2.4s,v5.4s | ||
1955 | + eor w14,w10,w3 | ||
1956 | + eor w15,w15,w10,ror#20 | ||
1957 | + sli v7.4s,v19.4s,#13 | ||
1958 | + add w9,w9,w11 | ||
1959 | + ldr w12,[sp,#40] | ||
1960 | + and w13,w13,w14 | ||
1961 | + eor v17.16b,v17.16b,v16.16b | ||
1962 | + ror w15,w15,#2 | ||
1963 | + add w5,w5,w9 | ||
1964 | + eor w13,w13,w3 | ||
1965 | + eor v17.16b,v17.16b,v7.16b | ||
1966 | + add w8,w8,w12 | ||
1967 | + add w9,w9,w15 | ||
1968 | + and w12,w6,w5 | ||
1969 | + add v2.4s,v2.4s,v17.4s | ||
1970 | + bic w15,w7,w5 | ||
1971 | + eor w11,w5,w5,ror#5 | ||
1972 | + add w9,w9,w13 | ||
1973 | + ushr v18.4s,v2.4s,#17 | ||
1974 | + orr w12,w12,w15 | ||
1975 | + ushr v19.4s,v2.4s,#10 | ||
1976 | + eor w11,w11,w5,ror#19 | ||
1977 | + eor w15,w9,w9,ror#11 | ||
1978 | + sli v18.4s,v2.4s,#15 | ||
1979 | + add w8,w8,w12 | ||
1980 | + ushr v17.4s,v2.4s,#19 | ||
1981 | + ror w11,w11,#6 | ||
1982 | + eor w13,w9,w10 | ||
1983 | + eor v19.16b,v19.16b,v18.16b | ||
1984 | + eor w15,w15,w9,ror#20 | ||
1985 | + add w8,w8,w11 | ||
1986 | + sli v17.4s,v2.4s,#13 | ||
1987 | + ldr w12,[sp,#44] | ||
1988 | + and w14,w14,w13 | ||
1989 | + ror w15,w15,#2 | ||
1990 | + ld1 {v4.4s},[x16], #16 | ||
1991 | + add w4,w4,w8 | ||
1992 | + eor v19.16b,v19.16b,v17.16b | ||
1993 | + eor w14,w14,w10 | ||
1994 | + eor v17.16b,v17.16b,v17.16b | ||
1995 | + add w7,w7,w12 | ||
1996 | + add w8,w8,w15 | ||
1997 | + and w12,w5,w4 | ||
1998 | + mov v17.d[1],v19.d[0] | ||
1999 | + bic w15,w6,w4 | ||
2000 | + eor w11,w4,w4,ror#5 | ||
2001 | + add w8,w8,w14 | ||
2002 | + add v2.4s,v2.4s,v17.4s | ||
2003 | + orr w12,w12,w15 | ||
2004 | + eor w11,w11,w4,ror#19 | ||
2005 | + eor w15,w8,w8,ror#11 | ||
2006 | + add v4.4s,v4.4s,v2.4s | ||
2007 | + add w7,w7,w12 | ||
2008 | + ror w11,w11,#6 | ||
2009 | + eor w14,w8,w9 | ||
2010 | + eor w15,w15,w8,ror#20 | ||
2011 | + add w7,w7,w11 | ||
2012 | + ldr w12,[sp,#48] | ||
2013 | + and w13,w13,w14 | ||
2014 | + ror w15,w15,#2 | ||
2015 | + add w3,w3,w7 | ||
2016 | + eor w13,w13,w9 | ||
2017 | + st1 {v4.4s},[x17], #16 | ||
2018 | + ext v4.16b,v3.16b,v0.16b,#4 | ||
2019 | + add w6,w6,w12 | ||
2020 | + add w7,w7,w15 | ||
2021 | + and w12,w4,w3 | ||
2022 | + bic w15,w5,w3 | ||
2023 | + ext v7.16b,v1.16b,v2.16b,#4 | ||
2024 | + eor w11,w3,w3,ror#5 | ||
2025 | + add w7,w7,w13 | ||
2026 | + mov d19,v2.d[1] | ||
2027 | + orr w12,w12,w15 | ||
2028 | + eor w11,w11,w3,ror#19 | ||
2029 | + ushr v6.4s,v4.4s,#7 | ||
2030 | + eor w15,w7,w7,ror#11 | ||
2031 | + ushr v5.4s,v4.4s,#3 | ||
2032 | + add w6,w6,w12 | ||
2033 | + add v3.4s,v3.4s,v7.4s | ||
2034 | + ror w11,w11,#6 | ||
2035 | + sli v6.4s,v4.4s,#25 | ||
2036 | + eor w13,w7,w8 | ||
2037 | + eor w15,w15,w7,ror#20 | ||
2038 | + ushr v7.4s,v4.4s,#18 | ||
2039 | + add w6,w6,w11 | ||
2040 | + ldr w12,[sp,#52] | ||
2041 | + and w14,w14,w13 | ||
2042 | + eor v5.16b,v5.16b,v6.16b | ||
2043 | + ror w15,w15,#2 | ||
2044 | + add w10,w10,w6 | ||
2045 | + sli v7.4s,v4.4s,#14 | ||
2046 | + eor w14,w14,w8 | ||
2047 | + ushr v16.4s,v19.4s,#17 | ||
2048 | + add w5,w5,w12 | ||
2049 | + add w6,w6,w15 | ||
2050 | + and w12,w3,w10 | ||
2051 | + eor v5.16b,v5.16b,v7.16b | ||
2052 | + bic w15,w4,w10 | ||
2053 | + eor w11,w10,w10,ror#5 | ||
2054 | + sli v16.4s,v19.4s,#15 | ||
2055 | + add w6,w6,w14 | ||
2056 | + orr w12,w12,w15 | ||
2057 | + ushr v17.4s,v19.4s,#10 | ||
2058 | + eor w11,w11,w10,ror#19 | ||
2059 | + eor w15,w6,w6,ror#11 | ||
2060 | + ushr v7.4s,v19.4s,#19 | ||
2061 | + add w5,w5,w12 | ||
2062 | + ror w11,w11,#6 | ||
2063 | + add v3.4s,v3.4s,v5.4s | ||
2064 | + eor w14,w6,w7 | ||
2065 | + eor w15,w15,w6,ror#20 | ||
2066 | + sli v7.4s,v19.4s,#13 | ||
2067 | + add w5,w5,w11 | ||
2068 | + ldr w12,[sp,#56] | ||
2069 | + and w13,w13,w14 | ||
2070 | + eor v17.16b,v17.16b,v16.16b | ||
2071 | + ror w15,w15,#2 | ||
2072 | + add w9,w9,w5 | ||
2073 | + eor w13,w13,w7 | ||
2074 | + eor v17.16b,v17.16b,v7.16b | ||
2075 | + add w4,w4,w12 | ||
2076 | + add w5,w5,w15 | ||
2077 | + and w12,w10,w9 | ||
2078 | + add v3.4s,v3.4s,v17.4s | ||
2079 | + bic w15,w3,w9 | ||
2080 | + eor w11,w9,w9,ror#5 | ||
2081 | + add w5,w5,w13 | ||
2082 | + ushr v18.4s,v3.4s,#17 | ||
2083 | + orr w12,w12,w15 | ||
2084 | + ushr v19.4s,v3.4s,#10 | ||
2085 | + eor w11,w11,w9,ror#19 | ||
2086 | + eor w15,w5,w5,ror#11 | ||
2087 | + sli v18.4s,v3.4s,#15 | ||
2088 | + add w4,w4,w12 | ||
2089 | + ushr v17.4s,v3.4s,#19 | ||
2090 | + ror w11,w11,#6 | ||
2091 | + eor w13,w5,w6 | ||
2092 | + eor v19.16b,v19.16b,v18.16b | ||
2093 | + eor w15,w15,w5,ror#20 | ||
2094 | + add w4,w4,w11 | ||
2095 | + sli v17.4s,v3.4s,#13 | ||
2096 | + ldr w12,[sp,#60] | ||
2097 | + and w14,w14,w13 | ||
2098 | + ror w15,w15,#2 | ||
2099 | + ld1 {v4.4s},[x16], #16 | ||
2100 | + add w8,w8,w4 | ||
2101 | + eor v19.16b,v19.16b,v17.16b | ||
2102 | + eor w14,w14,w6 | ||
2103 | + eor v17.16b,v17.16b,v17.16b | ||
2104 | + add w3,w3,w12 | ||
2105 | + add w4,w4,w15 | ||
2106 | + and w12,w9,w8 | ||
2107 | + mov v17.d[1],v19.d[0] | ||
2108 | + bic w15,w10,w8 | ||
2109 | + eor w11,w8,w8,ror#5 | ||
2110 | + add w4,w4,w14 | ||
2111 | + add v3.4s,v3.4s,v17.4s | ||
2112 | + orr w12,w12,w15 | ||
2113 | + eor w11,w11,w8,ror#19 | ||
2114 | + eor w15,w4,w4,ror#11 | ||
2115 | + add v4.4s,v4.4s,v3.4s | ||
2116 | + add w3,w3,w12 | ||
2117 | + ror w11,w11,#6 | ||
2118 | + eor w14,w4,w5 | ||
2119 | + eor w15,w15,w4,ror#20 | ||
2120 | + add w3,w3,w11 | ||
2121 | + ldr w12,[x16] | ||
2122 | + and w13,w13,w14 | ||
2123 | + ror w15,w15,#2 | ||
2124 | + add w7,w7,w3 | ||
2125 | + eor w13,w13,w5 | ||
2126 | + st1 {v4.4s},[x17], #16 | ||
2127 | + cmp w12,#0 // check for K256 terminator | ||
2128 | + ldr w12,[sp,#0] | ||
2129 | + sub x17,x17,#64 | ||
2130 | + bne .L_00_48 | ||
2131 | + | ||
2132 | + sub x16,x16,#256 // rewind x16 | ||
2133 | + cmp x1,x2 | ||
2134 | + mov x17, #64 | ||
2135 | + csel x17, x17, xzr, eq | ||
2136 | + sub x1,x1,x17 // avoid SEGV | ||
2137 | + mov x17,sp | ||
2138 | + add w10,w10,w12 | ||
2139 | + add w3,w3,w15 | ||
2140 | + and w12,w8,w7 | ||
2141 | + ld1 {v0.16b},[x1],#16 | ||
2142 | + bic w15,w9,w7 | ||
2143 | + eor w11,w7,w7,ror#5 | ||
2144 | + ld1 {v4.4s},[x16],#16 | ||
2145 | + add w3,w3,w13 | ||
2146 | + orr w12,w12,w15 | ||
2147 | + eor w11,w11,w7,ror#19 | ||
2148 | + eor w15,w3,w3,ror#11 | ||
2149 | + rev32 v0.16b,v0.16b | ||
2150 | + add w10,w10,w12 | ||
2151 | + ror w11,w11,#6 | ||
2152 | + eor w13,w3,w4 | ||
2153 | + eor w15,w15,w3,ror#20 | ||
2154 | + add v4.4s,v4.4s,v0.4s | ||
2155 | + add w10,w10,w11 | ||
2156 | + ldr w12,[sp,#4] | ||
2157 | + and w14,w14,w13 | ||
2158 | + ror w15,w15,#2 | ||
2159 | + add w6,w6,w10 | ||
2160 | + eor w14,w14,w4 | ||
2161 | + add w9,w9,w12 | ||
2162 | + add w10,w10,w15 | ||
2163 | + and w12,w7,w6 | ||
2164 | + bic w15,w8,w6 | ||
2165 | + eor w11,w6,w6,ror#5 | ||
2166 | + add w10,w10,w14 | ||
2167 | + orr w12,w12,w15 | ||
2168 | + eor w11,w11,w6,ror#19 | ||
2169 | + eor w15,w10,w10,ror#11 | ||
2170 | + add w9,w9,w12 | ||
2171 | + ror w11,w11,#6 | ||
2172 | + eor w14,w10,w3 | ||
2173 | + eor w15,w15,w10,ror#20 | ||
2174 | + add w9,w9,w11 | ||
2175 | + ldr w12,[sp,#8] | ||
2176 | + and w13,w13,w14 | ||
2177 | + ror w15,w15,#2 | ||
2178 | + add w5,w5,w9 | ||
2179 | + eor w13,w13,w3 | ||
2180 | + add w8,w8,w12 | ||
2181 | + add w9,w9,w15 | ||
2182 | + and w12,w6,w5 | ||
2183 | + bic w15,w7,w5 | ||
2184 | + eor w11,w5,w5,ror#5 | ||
2185 | + add w9,w9,w13 | ||
2186 | + orr w12,w12,w15 | ||
2187 | + eor w11,w11,w5,ror#19 | ||
2188 | + eor w15,w9,w9,ror#11 | ||
2189 | + add w8,w8,w12 | ||
2190 | + ror w11,w11,#6 | ||
2191 | + eor w13,w9,w10 | ||
2192 | + eor w15,w15,w9,ror#20 | ||
2193 | + add w8,w8,w11 | ||
2194 | + ldr w12,[sp,#12] | ||
2195 | + and w14,w14,w13 | ||
2196 | + ror w15,w15,#2 | ||
2197 | + add w4,w4,w8 | ||
2198 | + eor w14,w14,w10 | ||
2199 | + add w7,w7,w12 | ||
2200 | + add w8,w8,w15 | ||
2201 | + and w12,w5,w4 | ||
2202 | + bic w15,w6,w4 | ||
2203 | + eor w11,w4,w4,ror#5 | ||
2204 | + add w8,w8,w14 | ||
2205 | + orr w12,w12,w15 | ||
2206 | + eor w11,w11,w4,ror#19 | ||
2207 | + eor w15,w8,w8,ror#11 | ||
2208 | + add w7,w7,w12 | ||
2209 | + ror w11,w11,#6 | ||
2210 | + eor w14,w8,w9 | ||
2211 | + eor w15,w15,w8,ror#20 | ||
2212 | + add w7,w7,w11 | ||
2213 | + ldr w12,[sp,#16] | ||
2214 | + and w13,w13,w14 | ||
2215 | + ror w15,w15,#2 | ||
2216 | + add w3,w3,w7 | ||
2217 | + eor w13,w13,w9 | ||
2218 | + st1 {v4.4s},[x17], #16 | ||
2219 | + add w6,w6,w12 | ||
2220 | + add w7,w7,w15 | ||
2221 | + and w12,w4,w3 | ||
2222 | + ld1 {v1.16b},[x1],#16 | ||
2223 | + bic w15,w5,w3 | ||
2224 | + eor w11,w3,w3,ror#5 | ||
2225 | + ld1 {v4.4s},[x16],#16 | ||
2226 | + add w7,w7,w13 | ||
2227 | + orr w12,w12,w15 | ||
2228 | + eor w11,w11,w3,ror#19 | ||
2229 | + eor w15,w7,w7,ror#11 | ||
2230 | + rev32 v1.16b,v1.16b | ||
2231 | + add w6,w6,w12 | ||
2232 | + ror w11,w11,#6 | ||
2233 | + eor w13,w7,w8 | ||
2234 | + eor w15,w15,w7,ror#20 | ||
2235 | + add v4.4s,v4.4s,v1.4s | ||
2236 | + add w6,w6,w11 | ||
2237 | + ldr w12,[sp,#20] | ||
2238 | + and w14,w14,w13 | ||
2239 | + ror w15,w15,#2 | ||
2240 | + add w10,w10,w6 | ||
2241 | + eor w14,w14,w8 | ||
2242 | + add w5,w5,w12 | ||
2243 | + add w6,w6,w15 | ||
2244 | + and w12,w3,w10 | ||
2245 | + bic w15,w4,w10 | ||
2246 | + eor w11,w10,w10,ror#5 | ||
2247 | + add w6,w6,w14 | ||
2248 | + orr w12,w12,w15 | ||
2249 | + eor w11,w11,w10,ror#19 | ||
2250 | + eor w15,w6,w6,ror#11 | ||
2251 | + add w5,w5,w12 | ||
2252 | + ror w11,w11,#6 | ||
2253 | + eor w14,w6,w7 | ||
2254 | + eor w15,w15,w6,ror#20 | ||
2255 | + add w5,w5,w11 | ||
2256 | + ldr w12,[sp,#24] | ||
2257 | + and w13,w13,w14 | ||
2258 | + ror w15,w15,#2 | ||
2259 | + add w9,w9,w5 | ||
2260 | + eor w13,w13,w7 | ||
2261 | + add w4,w4,w12 | ||
2262 | + add w5,w5,w15 | ||
2263 | + and w12,w10,w9 | ||
2264 | + bic w15,w3,w9 | ||
2265 | + eor w11,w9,w9,ror#5 | ||
2266 | + add w5,w5,w13 | ||
2267 | + orr w12,w12,w15 | ||
2268 | + eor w11,w11,w9,ror#19 | ||
2269 | + eor w15,w5,w5,ror#11 | ||
2270 | + add w4,w4,w12 | ||
2271 | + ror w11,w11,#6 | ||
2272 | + eor w13,w5,w6 | ||
2273 | + eor w15,w15,w5,ror#20 | ||
2274 | + add w4,w4,w11 | ||
2275 | + ldr w12,[sp,#28] | ||
2276 | + and w14,w14,w13 | ||
2277 | + ror w15,w15,#2 | ||
2278 | + add w8,w8,w4 | ||
2279 | + eor w14,w14,w6 | ||
2280 | + add w3,w3,w12 | ||
2281 | + add w4,w4,w15 | ||
2282 | + and w12,w9,w8 | ||
2283 | + bic w15,w10,w8 | ||
2284 | + eor w11,w8,w8,ror#5 | ||
2285 | + add w4,w4,w14 | ||
2286 | + orr w12,w12,w15 | ||
2287 | + eor w11,w11,w8,ror#19 | ||
2288 | + eor w15,w4,w4,ror#11 | ||
2289 | + add w3,w3,w12 | ||
2290 | + ror w11,w11,#6 | ||
2291 | + eor w14,w4,w5 | ||
2292 | + eor w15,w15,w4,ror#20 | ||
2293 | + add w3,w3,w11 | ||
2294 | + ldr w12,[sp,#32] | ||
2295 | + and w13,w13,w14 | ||
2296 | + ror w15,w15,#2 | ||
2297 | + add w7,w7,w3 | ||
2298 | + eor w13,w13,w5 | ||
2299 | + st1 {v4.4s},[x17], #16 | ||
2300 | + add w10,w10,w12 | ||
2301 | + add w3,w3,w15 | ||
2302 | + and w12,w8,w7 | ||
2303 | + ld1 {v2.16b},[x1],#16 | ||
2304 | + bic w15,w9,w7 | ||
2305 | + eor w11,w7,w7,ror#5 | ||
2306 | + ld1 {v4.4s},[x16],#16 | ||
2307 | + add w3,w3,w13 | ||
2308 | + orr w12,w12,w15 | ||
2309 | + eor w11,w11,w7,ror#19 | ||
2310 | + eor w15,w3,w3,ror#11 | ||
2311 | + rev32 v2.16b,v2.16b | ||
2312 | + add w10,w10,w12 | ||
2313 | + ror w11,w11,#6 | ||
2314 | + eor w13,w3,w4 | ||
2315 | + eor w15,w15,w3,ror#20 | ||
2316 | + add v4.4s,v4.4s,v2.4s | ||
2317 | + add w10,w10,w11 | ||
2318 | + ldr w12,[sp,#36] | ||
2319 | + and w14,w14,w13 | ||
2320 | + ror w15,w15,#2 | ||
2321 | + add w6,w6,w10 | ||
2322 | + eor w14,w14,w4 | ||
2323 | + add w9,w9,w12 | ||
2324 | + add w10,w10,w15 | ||
2325 | + and w12,w7,w6 | ||
2326 | + bic w15,w8,w6 | ||
2327 | + eor w11,w6,w6,ror#5 | ||
2328 | + add w10,w10,w14 | ||
2329 | + orr w12,w12,w15 | ||
2330 | + eor w11,w11,w6,ror#19 | ||
2331 | + eor w15,w10,w10,ror#11 | ||
2332 | + add w9,w9,w12 | ||
2333 | + ror w11,w11,#6 | ||
2334 | + eor w14,w10,w3 | ||
2335 | + eor w15,w15,w10,ror#20 | ||
2336 | + add w9,w9,w11 | ||
2337 | + ldr w12,[sp,#40] | ||
2338 | + and w13,w13,w14 | ||
2339 | + ror w15,w15,#2 | ||
2340 | + add w5,w5,w9 | ||
2341 | + eor w13,w13,w3 | ||
2342 | + add w8,w8,w12 | ||
2343 | + add w9,w9,w15 | ||
2344 | + and w12,w6,w5 | ||
2345 | + bic w15,w7,w5 | ||
2346 | + eor w11,w5,w5,ror#5 | ||
2347 | + add w9,w9,w13 | ||
2348 | + orr w12,w12,w15 | ||
2349 | + eor w11,w11,w5,ror#19 | ||
2350 | + eor w15,w9,w9,ror#11 | ||
2351 | + add w8,w8,w12 | ||
2352 | + ror w11,w11,#6 | ||
2353 | + eor w13,w9,w10 | ||
2354 | + eor w15,w15,w9,ror#20 | ||
2355 | + add w8,w8,w11 | ||
2356 | + ldr w12,[sp,#44] | ||
2357 | + and w14,w14,w13 | ||
2358 | + ror w15,w15,#2 | ||
2359 | + add w4,w4,w8 | ||
2360 | + eor w14,w14,w10 | ||
2361 | + add w7,w7,w12 | ||
2362 | + add w8,w8,w15 | ||
2363 | + and w12,w5,w4 | ||
2364 | + bic w15,w6,w4 | ||
2365 | + eor w11,w4,w4,ror#5 | ||
2366 | + add w8,w8,w14 | ||
2367 | + orr w12,w12,w15 | ||
2368 | + eor w11,w11,w4,ror#19 | ||
2369 | + eor w15,w8,w8,ror#11 | ||
2370 | + add w7,w7,w12 | ||
2371 | + ror w11,w11,#6 | ||
2372 | + eor w14,w8,w9 | ||
2373 | + eor w15,w15,w8,ror#20 | ||
2374 | + add w7,w7,w11 | ||
2375 | + ldr w12,[sp,#48] | ||
2376 | + and w13,w13,w14 | ||
2377 | + ror w15,w15,#2 | ||
2378 | + add w3,w3,w7 | ||
2379 | + eor w13,w13,w9 | ||
2380 | + st1 {v4.4s},[x17], #16 | ||
2381 | + add w6,w6,w12 | ||
2382 | + add w7,w7,w15 | ||
2383 | + and w12,w4,w3 | ||
2384 | + ld1 {v3.16b},[x1],#16 | ||
2385 | + bic w15,w5,w3 | ||
2386 | + eor w11,w3,w3,ror#5 | ||
2387 | + ld1 {v4.4s},[x16],#16 | ||
2388 | + add w7,w7,w13 | ||
2389 | + orr w12,w12,w15 | ||
2390 | + eor w11,w11,w3,ror#19 | ||
2391 | + eor w15,w7,w7,ror#11 | ||
2392 | + rev32 v3.16b,v3.16b | ||
2393 | + add w6,w6,w12 | ||
2394 | + ror w11,w11,#6 | ||
2395 | + eor w13,w7,w8 | ||
2396 | + eor w15,w15,w7,ror#20 | ||
2397 | + add v4.4s,v4.4s,v3.4s | ||
2398 | + add w6,w6,w11 | ||
2399 | + ldr w12,[sp,#52] | ||
2400 | + and w14,w14,w13 | ||
2401 | + ror w15,w15,#2 | ||
2402 | + add w10,w10,w6 | ||
2403 | + eor w14,w14,w8 | ||
2404 | + add w5,w5,w12 | ||
2405 | + add w6,w6,w15 | ||
2406 | + and w12,w3,w10 | ||
2407 | + bic w15,w4,w10 | ||
2408 | + eor w11,w10,w10,ror#5 | ||
2409 | + add w6,w6,w14 | ||
2410 | + orr w12,w12,w15 | ||
2411 | + eor w11,w11,w10,ror#19 | ||
2412 | + eor w15,w6,w6,ror#11 | ||
2413 | + add w5,w5,w12 | ||
2414 | + ror w11,w11,#6 | ||
2415 | + eor w14,w6,w7 | ||
2416 | + eor w15,w15,w6,ror#20 | ||
2417 | + add w5,w5,w11 | ||
2418 | + ldr w12,[sp,#56] | ||
2419 | + and w13,w13,w14 | ||
2420 | + ror w15,w15,#2 | ||
2421 | + add w9,w9,w5 | ||
2422 | + eor w13,w13,w7 | ||
2423 | + add w4,w4,w12 | ||
2424 | + add w5,w5,w15 | ||
2425 | + and w12,w10,w9 | ||
2426 | + bic w15,w3,w9 | ||
2427 | + eor w11,w9,w9,ror#5 | ||
2428 | + add w5,w5,w13 | ||
2429 | + orr w12,w12,w15 | ||
2430 | + eor w11,w11,w9,ror#19 | ||
2431 | + eor w15,w5,w5,ror#11 | ||
2432 | + add w4,w4,w12 | ||
2433 | + ror w11,w11,#6 | ||
2434 | + eor w13,w5,w6 | ||
2435 | + eor w15,w15,w5,ror#20 | ||
2436 | + add w4,w4,w11 | ||
2437 | + ldr w12,[sp,#60] | ||
2438 | + and w14,w14,w13 | ||
2439 | + ror w15,w15,#2 | ||
2440 | + add w8,w8,w4 | ||
2441 | + eor w14,w14,w6 | ||
2442 | + add w3,w3,w12 | ||
2443 | + add w4,w4,w15 | ||
2444 | + and w12,w9,w8 | ||
2445 | + bic w15,w10,w8 | ||
2446 | + eor w11,w8,w8,ror#5 | ||
2447 | + add w4,w4,w14 | ||
2448 | + orr w12,w12,w15 | ||
2449 | + eor w11,w11,w8,ror#19 | ||
2450 | + eor w15,w4,w4,ror#11 | ||
2451 | + add w3,w3,w12 | ||
2452 | + ror w11,w11,#6 | ||
2453 | + eor w14,w4,w5 | ||
2454 | + eor w15,w15,w4,ror#20 | ||
2455 | + add w3,w3,w11 | ||
2456 | + and w13,w13,w14 | ||
2457 | + ror w15,w15,#2 | ||
2458 | + add w7,w7,w3 | ||
2459 | + eor w13,w13,w5 | ||
2460 | + st1 {v4.4s},[x17], #16 | ||
2461 | + add w3,w3,w15 // h+=Sigma0(a) from the past | ||
2462 | + ldp w11,w12,[x0,#0] | ||
2463 | + add w3,w3,w13 // h+=Maj(a,b,c) from the past | ||
2464 | + ldp w13,w14,[x0,#8] | ||
2465 | + add w3,w3,w11 // accumulate | ||
2466 | + add w4,w4,w12 | ||
2467 | + ldp w11,w12,[x0,#16] | ||
2468 | + add w5,w5,w13 | ||
2469 | + add w6,w6,w14 | ||
2470 | + ldp w13,w14,[x0,#24] | ||
2471 | + add w7,w7,w11 | ||
2472 | + add w8,w8,w12 | ||
2473 | + ldr w12,[sp,#0] | ||
2474 | + stp w3,w4,[x0,#0] | ||
2475 | + add w9,w9,w13 | ||
2476 | + mov w13,wzr | ||
2477 | + stp w5,w6,[x0,#8] | ||
2478 | + add w10,w10,w14 | ||
2479 | + stp w7,w8,[x0,#16] | ||
2480 | + eor w14,w4,w5 | ||
2481 | + stp w9,w10,[x0,#24] | ||
2482 | + mov w15,wzr | ||
2483 | + mov x17,sp | ||
2484 | + b.ne .L_00_48 | ||
2485 | + | ||
2486 | + ldr x29,[x29] | ||
2487 | + add sp,sp,#16*4+16 | ||
2488 | + ret | ||
2489 | +.size sha256_block_neon,.-sha256_block_neon | ||
2490 | +#ifndef __KERNEL__ | ||
2491 | +.comm OPENSSL_armcap_P,4,4 | ||
2492 | +#endif | ||
2493 | diff --git a/arch/arm64/crypto/sha512-core.S b/arch/arm64/crypto/sha512-core.S | ||
2494 | new file mode 100644 | ||
2495 | index 000000000000..bd0f59f06c9d | ||
2496 | --- /dev/null | ||
2497 | +++ b/arch/arm64/crypto/sha512-core.S | ||
2498 | @@ -0,0 +1,1085 @@ | ||
2499 | +// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. | ||
2500 | +// | ||
2501 | +// Licensed under the OpenSSL license (the "License"). You may not use | ||
2502 | +// this file except in compliance with the License. You can obtain a copy | ||
2503 | +// in the file LICENSE in the source distribution or at | ||
2504 | +// https://www.openssl.org/source/license.html | ||
2505 | + | ||
2506 | +// ==================================================================== | ||
2507 | +// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | ||
2508 | +// project. The module is, however, dual licensed under OpenSSL and | ||
2509 | +// CRYPTOGAMS licenses depending on where you obtain it. For further | ||
2510 | +// details see http://www.openssl.org/~appro/cryptogams/. | ||
2511 | +// | ||
2512 | +// Permission to use under GPLv2 terms is granted. | ||
2513 | +// ==================================================================== | ||
2514 | +// | ||
2515 | +// SHA256/512 for ARMv8. | ||
2516 | +// | ||
2517 | +// Performance in cycles per processed byte and improvement coefficient | ||
2518 | +// over code generated with "default" compiler: | ||
2519 | +// | ||
2520 | +// SHA256-hw SHA256(*) SHA512 | ||
2521 | +// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) | ||
2522 | +// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) | ||
2523 | +// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) | ||
2524 | +// Denver 2.01 10.5 (+26%) 6.70 (+8%) | ||
2525 | +// X-Gene 20.0 (+100%) 12.8 (+300%(***)) | ||
2526 | +// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) | ||
2527 | +// | ||
2528 | +// (*) Software SHA256 results are of lesser relevance, presented | ||
2529 | +// mostly for informational purposes. | ||
2530 | +// (**) The result is a trade-off: it's possible to improve it by | ||
2531 | +// 10% (or by 1 cycle per round), but at the cost of 20% loss | ||
2532 | +// on Cortex-A53 (or by 4 cycles per round). | ||
2533 | +// (***) Super-impressive coefficients over gcc-generated code are | ||
2534 | +// indication of some compiler "pathology", most notably code | ||
2535 | +// generated with -mgeneral-regs-only is significanty faster | ||
2536 | +// and the gap is only 40-90%. | ||
2537 | +// | ||
2538 | +// October 2016. | ||
2539 | +// | ||
2540 | +// Originally it was reckoned that it makes no sense to implement NEON | ||
2541 | +// version of SHA256 for 64-bit processors. This is because performance | ||
2542 | +// improvement on most wide-spread Cortex-A5x processors was observed | ||
2543 | +// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was | ||
2544 | +// observed that 32-bit NEON SHA256 performs significantly better than | ||
2545 | +// 64-bit scalar version on *some* of the more recent processors. As | ||
2546 | +// result 64-bit NEON version of SHA256 was added to provide best | ||
2547 | +// all-round performance. For example it executes ~30% faster on X-Gene | ||
2548 | +// and Mongoose. [For reference, NEON version of SHA512 is bound to | ||
2549 | +// deliver much less improvement, likely *negative* on Cortex-A5x. | ||
2550 | +// Which is why NEON support is limited to SHA256.] | ||
2551 | + | ||
2552 | +#ifndef __KERNEL__ | ||
2553 | +# include "arm_arch.h" | ||
2554 | +#endif | ||
2555 | + | ||
2556 | +.text | ||
2557 | + | ||
2558 | +.extern OPENSSL_armcap_P | ||
2559 | +.globl sha512_block_data_order | ||
2560 | +.type sha512_block_data_order,%function | ||
2561 | +.align 6 | ||
2562 | +sha512_block_data_order: | ||
2563 | + stp x29,x30,[sp,#-128]! | ||
2564 | + add x29,sp,#0 | ||
2565 | + | ||
2566 | + stp x19,x20,[sp,#16] | ||
2567 | + stp x21,x22,[sp,#32] | ||
2568 | + stp x23,x24,[sp,#48] | ||
2569 | + stp x25,x26,[sp,#64] | ||
2570 | + stp x27,x28,[sp,#80] | ||
2571 | + sub sp,sp,#4*8 | ||
2572 | + | ||
2573 | + ldp x20,x21,[x0] // load context | ||
2574 | + ldp x22,x23,[x0,#2*8] | ||
2575 | + ldp x24,x25,[x0,#4*8] | ||
2576 | + add x2,x1,x2,lsl#7 // end of input | ||
2577 | + ldp x26,x27,[x0,#6*8] | ||
2578 | + adr x30,.LK512 | ||
2579 | + stp x0,x2,[x29,#96] | ||
2580 | + | ||
2581 | +.Loop: | ||
2582 | + ldp x3,x4,[x1],#2*8 | ||
2583 | + ldr x19,[x30],#8 // *K++ | ||
2584 | + eor x28,x21,x22 // magic seed | ||
2585 | + str x1,[x29,#112] | ||
2586 | +#ifndef __AARCH64EB__ | ||
2587 | + rev x3,x3 // 0 | ||
2588 | +#endif | ||
2589 | + ror x16,x24,#14 | ||
2590 | + add x27,x27,x19 // h+=K[i] | ||
2591 | + eor x6,x24,x24,ror#23 | ||
2592 | + and x17,x25,x24 | ||
2593 | + bic x19,x26,x24 | ||
2594 | + add x27,x27,x3 // h+=X[i] | ||
2595 | + orr x17,x17,x19 // Ch(e,f,g) | ||
2596 | + eor x19,x20,x21 // a^b, b^c in next round | ||
2597 | + eor x16,x16,x6,ror#18 // Sigma1(e) | ||
2598 | + ror x6,x20,#28 | ||
2599 | + add x27,x27,x17 // h+=Ch(e,f,g) | ||
2600 | + eor x17,x20,x20,ror#5 | ||
2601 | + add x27,x27,x16 // h+=Sigma1(e) | ||
2602 | + and x28,x28,x19 // (b^c)&=(a^b) | ||
2603 | + add x23,x23,x27 // d+=h | ||
2604 | + eor x28,x28,x21 // Maj(a,b,c) | ||
2605 | + eor x17,x6,x17,ror#34 // Sigma0(a) | ||
2606 | + add x27,x27,x28 // h+=Maj(a,b,c) | ||
2607 | + ldr x28,[x30],#8 // *K++, x19 in next round | ||
2608 | + //add x27,x27,x17 // h+=Sigma0(a) | ||
2609 | +#ifndef __AARCH64EB__ | ||
2610 | + rev x4,x4 // 1 | ||
2611 | +#endif | ||
2612 | + ldp x5,x6,[x1],#2*8 | ||
2613 | + add x27,x27,x17 // h+=Sigma0(a) | ||
2614 | + ror x16,x23,#14 | ||
2615 | + add x26,x26,x28 // h+=K[i] | ||
2616 | + eor x7,x23,x23,ror#23 | ||
2617 | + and x17,x24,x23 | ||
2618 | + bic x28,x25,x23 | ||
2619 | + add x26,x26,x4 // h+=X[i] | ||
2620 | + orr x17,x17,x28 // Ch(e,f,g) | ||
2621 | + eor x28,x27,x20 // a^b, b^c in next round | ||
2622 | + eor x16,x16,x7,ror#18 // Sigma1(e) | ||
2623 | + ror x7,x27,#28 | ||
2624 | + add x26,x26,x17 // h+=Ch(e,f,g) | ||
2625 | + eor x17,x27,x27,ror#5 | ||
2626 | + add x26,x26,x16 // h+=Sigma1(e) | ||
2627 | + and x19,x19,x28 // (b^c)&=(a^b) | ||
2628 | + add x22,x22,x26 // d+=h | ||
2629 | + eor x19,x19,x20 // Maj(a,b,c) | ||
2630 | + eor x17,x7,x17,ror#34 // Sigma0(a) | ||
2631 | + add x26,x26,x19 // h+=Maj(a,b,c) | ||
2632 | + ldr x19,[x30],#8 // *K++, x28 in next round | ||
2633 | + //add x26,x26,x17 // h+=Sigma0(a) | ||
2634 | +#ifndef __AARCH64EB__ | ||
2635 | + rev x5,x5 // 2 | ||
2636 | +#endif | ||
2637 | + add x26,x26,x17 // h+=Sigma0(a) | ||
2638 | + ror x16,x22,#14 | ||
2639 | + add x25,x25,x19 // h+=K[i] | ||
2640 | + eor x8,x22,x22,ror#23 | ||
2641 | + and x17,x23,x22 | ||
2642 | + bic x19,x24,x22 | ||
2643 | + add x25,x25,x5 // h+=X[i] | ||
2644 | + orr x17,x17,x19 // Ch(e,f,g) | ||
2645 | + eor x19,x26,x27 // a^b, b^c in next round | ||
2646 | + eor x16,x16,x8,ror#18 // Sigma1(e) | ||
2647 | + ror x8,x26,#28 | ||
2648 | + add x25,x25,x17 // h+=Ch(e,f,g) | ||
2649 | + eor x17,x26,x26,ror#5 | ||
2650 | + add x25,x25,x16 // h+=Sigma1(e) | ||
2651 | + and x28,x28,x19 // (b^c)&=(a^b) | ||
2652 | + add x21,x21,x25 // d+=h | ||
2653 | + eor x28,x28,x27 // Maj(a,b,c) | ||
2654 | + eor x17,x8,x17,ror#34 // Sigma0(a) | ||
2655 | + add x25,x25,x28 // h+=Maj(a,b,c) | ||
2656 | + ldr x28,[x30],#8 // *K++, x19 in next round | ||
2657 | + //add x25,x25,x17 // h+=Sigma0(a) | ||
2658 | +#ifndef __AARCH64EB__ | ||
2659 | + rev x6,x6 // 3 | ||
2660 | +#endif | ||
2661 | + ldp x7,x8,[x1],#2*8 | ||
2662 | + add x25,x25,x17 // h+=Sigma0(a) | ||
2663 | + ror x16,x21,#14 | ||
2664 | + add x24,x24,x28 // h+=K[i] | ||
2665 | + eor x9,x21,x21,ror#23 | ||
2666 | + and x17,x22,x21 | ||
2667 | + bic x28,x23,x21 | ||
2668 | + add x24,x24,x6 // h+=X[i] | ||
2669 | + orr x17,x17,x28 // Ch(e,f,g) | ||
2670 | + eor x28,x25,x26 // a^b, b^c in next round | ||
2671 | + eor x16,x16,x9,ror#18 // Sigma1(e) | ||
2672 | + ror x9,x25,#28 | ||
2673 | + add x24,x24,x17 // h+=Ch(e,f,g) | ||
2674 | + eor x17,x25,x25,ror#5 | ||
2675 | + add x24,x24,x16 // h+=Sigma1(e) | ||
2676 | + and x19,x19,x28 // (b^c)&=(a^b) | ||
2677 | + add x20,x20,x24 // d+=h | ||
2678 | + eor x19,x19,x26 // Maj(a,b,c) | ||
2679 | + eor x17,x9,x17,ror#34 // Sigma0(a) | ||
2680 | + add x24,x24,x19 // h+=Maj(a,b,c) | ||
2681 | + ldr x19,[x30],#8 // *K++, x28 in next round | ||
2682 | + //add x24,x24,x17 // h+=Sigma0(a) | ||
2683 | +#ifndef __AARCH64EB__ | ||
2684 | + rev x7,x7 // 4 | ||
2685 | +#endif | ||
2686 | + add x24,x24,x17 // h+=Sigma0(a) | ||
2687 | + ror x16,x20,#14 | ||
2688 | + add x23,x23,x19 // h+=K[i] | ||
2689 | + eor x10,x20,x20,ror#23 | ||
2690 | + and x17,x21,x20 | ||
2691 | + bic x19,x22,x20 | ||
2692 | + add x23,x23,x7 // h+=X[i] | ||
2693 | + orr x17,x17,x19 // Ch(e,f,g) | ||
2694 | + eor x19,x24,x25 // a^b, b^c in next round | ||
2695 | + eor x16,x16,x10,ror#18 // Sigma1(e) | ||
2696 | + ror x10,x24,#28 | ||
2697 | + add x23,x23,x17 // h+=Ch(e,f,g) | ||
2698 | + eor x17,x24,x24,ror#5 | ||
2699 | + add x23,x23,x16 // h+=Sigma1(e) | ||
2700 | + and x28,x28,x19 // (b^c)&=(a^b) | ||
2701 | + add x27,x27,x23 // d+=h | ||
2702 | + eor x28,x28,x25 // Maj(a,b,c) | ||
2703 | + eor x17,x10,x17,ror#34 // Sigma0(a) | ||
2704 | + add x23,x23,x28 // h+=Maj(a,b,c) | ||
2705 | + ldr x28,[x30],#8 // *K++, x19 in next round | ||
2706 | + //add x23,x23,x17 // h+=Sigma0(a) | ||
2707 | +#ifndef __AARCH64EB__ | ||
2708 | + rev x8,x8 // 5 | ||
2709 | +#endif | ||
2710 | + ldp x9,x10,[x1],#2*8 | ||
2711 | + add x23,x23,x17 // h+=Sigma0(a) | ||
2712 | + ror x16,x27,#14 | ||
2713 | + add x22,x22,x28 // h+=K[i] | ||
2714 | + eor x11,x27,x27,ror#23 | ||
2715 | + and x17,x20,x27 | ||
2716 | + bic x28,x21,x27 | ||
2717 | + add x22,x22,x8 // h+=X[i] | ||
2718 | + orr x17,x17,x28 // Ch(e,f,g) | ||
2719 | + eor x28,x23,x24 // a^b, b^c in next round | ||
2720 | + eor x16,x16,x11,ror#18 // Sigma1(e) | ||
2721 | + ror x11,x23,#28 | ||
2722 | + add x22,x22,x17 // h+=Ch(e,f,g) | ||
2723 | + eor x17,x23,x23,ror#5 | ||
2724 | + add x22,x22,x16 // h+=Sigma1(e) | ||
2725 | + and x19,x19,x28 // (b^c)&=(a^b) | ||
2726 | + add x26,x26,x22 // d+=h | ||
2727 | + eor x19,x19,x24 // Maj(a,b,c) | ||
2728 | + eor x17,x11,x17,ror#34 // Sigma0(a) | ||
2729 | + add x22,x22,x19 // h+=Maj(a,b,c) | ||
2730 | + ldr x19,[x30],#8 // *K++, x28 in next round | ||
2731 | + //add x22,x22,x17 // h+=Sigma0(a) | ||
2732 | +#ifndef __AARCH64EB__ | ||
2733 | + rev x9,x9 // 6 | ||
2734 | +#endif | ||
2735 | + add x22,x22,x17 // h+=Sigma0(a) | ||
2736 | + ror x16,x26,#14 | ||
2737 | + add x21,x21,x19 // h+=K[i] | ||
2738 | + eor x12,x26,x26,ror#23 | ||
2739 | + and x17,x27,x26 | ||
2740 | + bic x19,x20,x26 | ||
2741 | + add x21,x21,x9 // h+=X[i] | ||
2742 | + orr x17,x17,x19 // Ch(e,f,g) | ||
2743 | + eor x19,x22,x23 // a^b, b^c in next round | ||
2744 | + eor x16,x16,x12,ror#18 // Sigma1(e) | ||
2745 | + ror x12,x22,#28 | ||
2746 | + add x21,x21,x17 // h+=Ch(e,f,g) | ||
2747 | + eor x17,x22,x22,ror#5 | ||
2748 | + add x21,x21,x16 // h+=Sigma1(e) | ||
2749 | + and x28,x28,x19 // (b^c)&=(a^b) | ||
2750 | + add x25,x25,x21 // d+=h | ||
2751 | + eor x28,x28,x23 // Maj(a,b,c) | ||
2752 | + eor x17,x12,x17,ror#34 // Sigma0(a) | ||
2753 | + add x21,x21,x28 // h+=Maj(a,b,c) | ||
2754 | + ldr x28,[x30],#8 // *K++, x19 in next round | ||
2755 | + //add x21,x21,x17 // h+=Sigma0(a) | ||
2756 | +#ifndef __AARCH64EB__ | ||
2757 | + rev x10,x10 // 7 | ||
2758 | +#endif | ||
2759 | + ldp x11,x12,[x1],#2*8 | ||
2760 | + add x21,x21,x17 // h+=Sigma0(a) | ||
2761 | + ror x16,x25,#14 | ||
2762 | + add x20,x20,x28 // h+=K[i] | ||
2763 | + eor x13,x25,x25,ror#23 | ||
2764 | + and x17,x26,x25 | ||
2765 | + bic x28,x27,x25 | ||
2766 | + add x20,x20,x10 // h+=X[i] | ||
2767 | + orr x17,x17,x28 // Ch(e,f,g) | ||
2768 | + eor x28,x21,x22 // a^b, b^c in next round | ||
2769 | + eor x16,x16,x13,ror#18 // Sigma1(e) | ||
2770 | + ror x13,x21,#28 | ||
2771 | + add x20,x20,x17 // h+=Ch(e,f,g) | ||
2772 | + eor x17,x21,x21,ror#5 | ||
2773 | + add x20,x20,x16 // h+=Sigma1(e) | ||
2774 | + and x19,x19,x28 // (b^c)&=(a^b) | ||
2775 | + add x24,x24,x20 // d+=h | ||
2776 | + eor x19,x19,x22 // Maj(a,b,c) | ||
2777 | + eor x17,x13,x17,ror#34 // Sigma0(a) | ||
2778 | + add x20,x20,x19 // h+=Maj(a,b,c) | ||
2779 | + ldr x19,[x30],#8 // *K++, x28 in next round | ||
2780 | + //add x20,x20,x17 // h+=Sigma0(a) | ||
2781 | +#ifndef __AARCH64EB__ | ||
2782 | + rev x11,x11 // 8 | ||
2783 | +#endif | ||
2784 | + add x20,x20,x17 // h+=Sigma0(a) | ||
2785 | + ror x16,x24,#14 | ||
2786 | + add x27,x27,x19 // h+=K[i] | ||
2787 | + eor x14,x24,x24,ror#23 | ||
2788 | + and x17,x25,x24 | ||
2789 | + bic x19,x26,x24 | ||
2790 | + add x27,x27,x11 // h+=X[i] | ||
2791 | + orr x17,x17,x19 // Ch(e,f,g) | ||
2792 | + eor x19,x20,x21 // a^b, b^c in next round | ||
2793 | + eor x16,x16,x14,ror#18 // Sigma1(e) | ||
2794 | + ror x14,x20,#28 | ||
2795 | + add x27,x27,x17 // h+=Ch(e,f,g) | ||
2796 | + eor x17,x20,x20,ror#5 | ||
2797 | + add x27,x27,x16 // h+=Sigma1(e) | ||
2798 | + and x28,x28,x19 // (b^c)&=(a^b) | ||
2799 | + add x23,x23,x27 // d+=h | ||
2800 | + eor x28,x28,x21 // Maj(a,b,c) | ||
2801 | + eor x17,x14,x17,ror#34 // Sigma0(a) | ||
2802 | + add x27,x27,x28 // h+=Maj(a,b,c) | ||
2803 | + ldr x28,[x30],#8 // *K++, x19 in next round | ||
2804 | + //add x27,x27,x17 // h+=Sigma0(a) | ||
2805 | +#ifndef __AARCH64EB__ | ||
2806 | + rev x12,x12 // 9 | ||
2807 | +#endif | ||
2808 | + ldp x13,x14,[x1],#2*8 | ||
2809 | + add x27,x27,x17 // h+=Sigma0(a) | ||
2810 | + ror x16,x23,#14 | ||
2811 | + add x26,x26,x28 // h+=K[i] | ||
2812 | + eor x15,x23,x23,ror#23 | ||
2813 | + and x17,x24,x23 | ||
2814 | + bic x28,x25,x23 | ||
2815 | + add x26,x26,x12 // h+=X[i] | ||
2816 | + orr x17,x17,x28 // Ch(e,f,g) | ||
2817 | + eor x28,x27,x20 // a^b, b^c in next round | ||
2818 | + eor x16,x16,x15,ror#18 // Sigma1(e) | ||
2819 | + ror x15,x27,#28 | ||
2820 | + add x26,x26,x17 // h+=Ch(e,f,g) | ||
2821 | + eor x17,x27,x27,ror#5 | ||
2822 | + add x26,x26,x16 // h+=Sigma1(e) | ||
2823 | + and x19,x19,x28 // (b^c)&=(a^b) | ||
2824 | + add x22,x22,x26 // d+=h | ||
2825 | + eor x19,x19,x20 // Maj(a,b,c) | ||
2826 | + eor x17,x15,x17,ror#34 // Sigma0(a) | ||
2827 | + add x26,x26,x19 // h+=Maj(a,b,c) | ||
2828 | + ldr x19,[x30],#8 // *K++, x28 in next round | ||
2829 | + //add x26,x26,x17 // h+=Sigma0(a) | ||
2830 | +#ifndef __AARCH64EB__ | ||
2831 | + rev x13,x13 // 10 | ||
2832 | +#endif | ||
2833 | + add x26,x26,x17 // h+=Sigma0(a) | ||
2834 | + ror x16,x22,#14 | ||
2835 | + add x25,x25,x19 // h+=K[i] | ||
2836 | + eor x0,x22,x22,ror#23 | ||
2837 | + and x17,x23,x22 | ||
2838 | + bic x19,x24,x22 | ||
2839 | + add x25,x25,x13 // h+=X[i] | ||
2840 | + orr x17,x17,x19 // Ch(e,f,g) | ||
2841 | + eor x19,x26,x27 // a^b, b^c in next round | ||
2842 | + eor x16,x16,x0,ror#18 // Sigma1(e) | ||
2843 | + ror x0,x26,#28 | ||
2844 | + add x25,x25,x17 // h+=Ch(e,f,g) | ||
2845 | + eor x17,x26,x26,ror#5 | ||
2846 | + add x25,x25,x16 // h+=Sigma1(e) | ||
2847 | + and x28,x28,x19 // (b^c)&=(a^b) | ||
2848 | + add x21,x21,x25 // d+=h | ||
2849 | + eor x28,x28,x27 // Maj(a,b,c) | ||
2850 | + eor x17,x0,x17,ror#34 // Sigma0(a) | ||
2851 | + add x25,x25,x28 // h+=Maj(a,b,c) | ||
2852 | + ldr x28,[x30],#8 // *K++, x19 in next round | ||
2853 | + //add x25,x25,x17 // h+=Sigma0(a) | ||
2854 | +#ifndef __AARCH64EB__ | ||
2855 | + rev x14,x14 // 11 | ||
2856 | +#endif | ||
2857 | + ldp x15,x0,[x1],#2*8 | ||
2858 | + add x25,x25,x17 // h+=Sigma0(a) | ||
2859 | + str x6,[sp,#24] | ||
2860 | + ror x16,x21,#14 | ||
2861 | + add x24,x24,x28 // h+=K[i] | ||
2862 | + eor x6,x21,x21,ror#23 | ||
2863 | + and x17,x22,x21 | ||
2864 | + bic x28,x23,x21 | ||
2865 | + add x24,x24,x14 // h+=X[i] | ||
2866 | + orr x17,x17,x28 // Ch(e,f,g) | ||
2867 | + eor x28,x25,x26 // a^b, b^c in next round | ||
2868 | + eor x16,x16,x6,ror#18 // Sigma1(e) | ||
2869 | + ror x6,x25,#28 | ||
2870 | + add x24,x24,x17 // h+=Ch(e,f,g) | ||
2871 | + eor x17,x25,x25,ror#5 | ||
2872 | + add x24,x24,x16 // h+=Sigma1(e) | ||
2873 | + and x19,x19,x28 // (b^c)&=(a^b) | ||
2874 | + add x20,x20,x24 // d+=h | ||
2875 | + eor x19,x19,x26 // Maj(a,b,c) | ||
2876 | + eor x17,x6,x17,ror#34 // Sigma0(a) | ||
2877 | + add x24,x24,x19 // h+=Maj(a,b,c) | ||
2878 | + ldr x19,[x30],#8 // *K++, x28 in next round | ||
2879 | + //add x24,x24,x17 // h+=Sigma0(a) | ||
2880 | +#ifndef __AARCH64EB__ | ||
2881 | + rev x15,x15 // 12 | ||
2882 | +#endif | ||
2883 | + add x24,x24,x17 // h+=Sigma0(a) | ||
2884 | + str x7,[sp,#0] | ||
2885 | + ror x16,x20,#14 | ||
2886 | + add x23,x23,x19 // h+=K[i] | ||
2887 | + eor x7,x20,x20,ror#23 | ||
2888 | + and x17,x21,x20 | ||
2889 | + bic x19,x22,x20 | ||
2890 | + add x23,x23,x15 // h+=X[i] | ||
2891 | + orr x17,x17,x19 // Ch(e,f,g) | ||
2892 | + eor x19,x24,x25 // a^b, b^c in next round | ||
2893 | + eor x16,x16,x7,ror#18 // Sigma1(e) | ||
2894 | + ror x7,x24,#28 | ||
2895 | + add x23,x23,x17 // h+=Ch(e,f,g) | ||
2896 | + eor x17,x24,x24,ror#5 | ||
2897 | + add x23,x23,x16 // h+=Sigma1(e) | ||
2898 | + and x28,x28,x19 // (b^c)&=(a^b) | ||
2899 | + add x27,x27,x23 // d+=h | ||
2900 | + eor x28,x28,x25 // Maj(a,b,c) | ||
2901 | + eor x17,x7,x17,ror#34 // Sigma0(a) | ||
2902 | + add x23,x23,x28 // h+=Maj(a,b,c) | ||
2903 | + ldr x28,[x30],#8 // *K++, x19 in next round | ||
2904 | + //add x23,x23,x17 // h+=Sigma0(a) | ||
2905 | +#ifndef __AARCH64EB__ | ||
2906 | + rev x0,x0 // 13 | ||
2907 | +#endif | ||
2908 | + ldp x1,x2,[x1] | ||
2909 | + add x23,x23,x17 // h+=Sigma0(a) | ||
2910 | + str x8,[sp,#8] | ||
2911 | + ror x16,x27,#14 | ||
2912 | + add x22,x22,x28 // h+=K[i] | ||
2913 | + eor x8,x27,x27,ror#23 | ||
2914 | + and x17,x20,x27 | ||
2915 | + bic x28,x21,x27 | ||
2916 | + add x22,x22,x0 // h+=X[i] | ||
2917 | + orr x17,x17,x28 // Ch(e,f,g) | ||
2918 | + eor x28,x23,x24 // a^b, b^c in next round | ||
2919 | + eor x16,x16,x8,ror#18 // Sigma1(e) | ||
2920 | + ror x8,x23,#28 | ||
2921 | + add x22,x22,x17 // h+=Ch(e,f,g) | ||
2922 | + eor x17,x23,x23,ror#5 | ||
2923 | + add x22,x22,x16 // h+=Sigma1(e) | ||
2924 | + and x19,x19,x28 // (b^c)&=(a^b) | ||
2925 | + add x26,x26,x22 // d+=h | ||
2926 | + eor x19,x19,x24 // Maj(a,b,c) | ||
2927 | + eor x17,x8,x17,ror#34 // Sigma0(a) | ||
2928 | + add x22,x22,x19 // h+=Maj(a,b,c) | ||
2929 | + ldr x19,[x30],#8 // *K++, x28 in next round | ||
2930 | + //add x22,x22,x17 // h+=Sigma0(a) | ||
2931 | +#ifndef __AARCH64EB__ | ||
2932 | + rev x1,x1 // 14 | ||
2933 | +#endif | ||
2934 | + ldr x6,[sp,#24] | ||
2935 | + add x22,x22,x17 // h+=Sigma0(a) | ||
2936 | + str x9,[sp,#16] | ||
2937 | + ror x16,x26,#14 | ||
2938 | + add x21,x21,x19 // h+=K[i] | ||
2939 | + eor x9,x26,x26,ror#23 | ||
2940 | + and x17,x27,x26 | ||
2941 | + bic x19,x20,x26 | ||
2942 | + add x21,x21,x1 // h+=X[i] | ||
2943 | + orr x17,x17,x19 // Ch(e,f,g) | ||
2944 | + eor x19,x22,x23 // a^b, b^c in next round | ||
2945 | + eor x16,x16,x9,ror#18 // Sigma1(e) | ||
2946 | + ror x9,x22,#28 | ||
2947 | + add x21,x21,x17 // h+=Ch(e,f,g) | ||
2948 | + eor x17,x22,x22,ror#5 | ||
2949 | + add x21,x21,x16 // h+=Sigma1(e) | ||
2950 | + and x28,x28,x19 // (b^c)&=(a^b) | ||
2951 | + add x25,x25,x21 // d+=h | ||
2952 | + eor x28,x28,x23 // Maj(a,b,c) | ||
2953 | + eor x17,x9,x17,ror#34 // Sigma0(a) | ||
2954 | + add x21,x21,x28 // h+=Maj(a,b,c) | ||
2955 | + ldr x28,[x30],#8 // *K++, x19 in next round | ||
2956 | + //add x21,x21,x17 // h+=Sigma0(a) | ||
2957 | +#ifndef __AARCH64EB__ | ||
2958 | + rev x2,x2 // 15 | ||
2959 | +#endif | ||
2960 | + ldr x7,[sp,#0] | ||
2961 | + add x21,x21,x17 // h+=Sigma0(a) | ||
2962 | + str x10,[sp,#24] | ||
2963 | + ror x16,x25,#14 | ||
2964 | + add x20,x20,x28 // h+=K[i] | ||
2965 | + ror x9,x4,#1 | ||
2966 | + and x17,x26,x25 | ||
2967 | + ror x8,x1,#19 | ||
2968 | + bic x28,x27,x25 | ||
2969 | + ror x10,x21,#28 | ||
2970 | + add x20,x20,x2 // h+=X[i] | ||
2971 | + eor x16,x16,x25,ror#18 | ||
2972 | + eor x9,x9,x4,ror#8 | ||
2973 | + orr x17,x17,x28 // Ch(e,f,g) | ||
2974 | + eor x28,x21,x22 // a^b, b^c in next round | ||
2975 | + eor x16,x16,x25,ror#41 // Sigma1(e) | ||
2976 | + eor x10,x10,x21,ror#34 | ||
2977 | + add x20,x20,x17 // h+=Ch(e,f,g) | ||
2978 | + and x19,x19,x28 // (b^c)&=(a^b) | ||
2979 | + eor x8,x8,x1,ror#61 | ||
2980 | + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) | ||
2981 | + add x20,x20,x16 // h+=Sigma1(e) | ||
2982 | + eor x19,x19,x22 // Maj(a,b,c) | ||
2983 | + eor x17,x10,x21,ror#39 // Sigma0(a) | ||
2984 | + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) | ||
2985 | + add x3,x3,x12 | ||
2986 | + add x24,x24,x20 // d+=h | ||
2987 | + add x20,x20,x19 // h+=Maj(a,b,c) | ||
2988 | + ldr x19,[x30],#8 // *K++, x28 in next round | ||
2989 | + add x3,x3,x9 | ||
2990 | + add x20,x20,x17 // h+=Sigma0(a) | ||
2991 | + add x3,x3,x8 | ||
2992 | +.Loop_16_xx: | ||
2993 | + ldr x8,[sp,#8] | ||
2994 | + str x11,[sp,#0] | ||
2995 | + ror x16,x24,#14 | ||
2996 | + add x27,x27,x19 // h+=K[i] | ||
2997 | + ror x10,x5,#1 | ||
2998 | + and x17,x25,x24 | ||
2999 | + ror x9,x2,#19 | ||
3000 | + bic x19,x26,x24 | ||
3001 | + ror x11,x20,#28 | ||
3002 | + add x27,x27,x3 // h+=X[i] | ||
3003 | + eor x16,x16,x24,ror#18 | ||
3004 | + eor x10,x10,x5,ror#8 | ||
3005 | + orr x17,x17,x19 // Ch(e,f,g) | ||
3006 | + eor x19,x20,x21 // a^b, b^c in next round | ||
3007 | + eor x16,x16,x24,ror#41 // Sigma1(e) | ||
3008 | + eor x11,x11,x20,ror#34 | ||
3009 | + add x27,x27,x17 // h+=Ch(e,f,g) | ||
3010 | + and x28,x28,x19 // (b^c)&=(a^b) | ||
3011 | + eor x9,x9,x2,ror#61 | ||
3012 | + eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) | ||
3013 | + add x27,x27,x16 // h+=Sigma1(e) | ||
3014 | + eor x28,x28,x21 // Maj(a,b,c) | ||
3015 | + eor x17,x11,x20,ror#39 // Sigma0(a) | ||
3016 | + eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) | ||
3017 | + add x4,x4,x13 | ||
3018 | + add x23,x23,x27 // d+=h | ||
3019 | + add x27,x27,x28 // h+=Maj(a,b,c) | ||
3020 | + ldr x28,[x30],#8 // *K++, x19 in next round | ||
3021 | + add x4,x4,x10 | ||
3022 | + add x27,x27,x17 // h+=Sigma0(a) | ||
3023 | + add x4,x4,x9 | ||
3024 | + ldr x9,[sp,#16] | ||
3025 | + str x12,[sp,#8] | ||
3026 | + ror x16,x23,#14 | ||
3027 | + add x26,x26,x28 // h+=K[i] | ||
3028 | + ror x11,x6,#1 | ||
3029 | + and x17,x24,x23 | ||
3030 | + ror x10,x3,#19 | ||
3031 | + bic x28,x25,x23 | ||
3032 | + ror x12,x27,#28 | ||
3033 | + add x26,x26,x4 // h+=X[i] | ||
3034 | + eor x16,x16,x23,ror#18 | ||
3035 | + eor x11,x11,x6,ror#8 | ||
3036 | + orr x17,x17,x28 // Ch(e,f,g) | ||
3037 | + eor x28,x27,x20 // a^b, b^c in next round | ||
3038 | + eor x16,x16,x23,ror#41 // Sigma1(e) | ||
3039 | + eor x12,x12,x27,ror#34 | ||
3040 | + add x26,x26,x17 // h+=Ch(e,f,g) | ||
3041 | + and x19,x19,x28 // (b^c)&=(a^b) | ||
3042 | + eor x10,x10,x3,ror#61 | ||
3043 | + eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) | ||
3044 | + add x26,x26,x16 // h+=Sigma1(e) | ||
3045 | + eor x19,x19,x20 // Maj(a,b,c) | ||
3046 | + eor x17,x12,x27,ror#39 // Sigma0(a) | ||
3047 | + eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) | ||
3048 | + add x5,x5,x14 | ||
3049 | + add x22,x22,x26 // d+=h | ||
3050 | + add x26,x26,x19 // h+=Maj(a,b,c) | ||
3051 | + ldr x19,[x30],#8 // *K++, x28 in next round | ||
3052 | + add x5,x5,x11 | ||
3053 | + add x26,x26,x17 // h+=Sigma0(a) | ||
3054 | + add x5,x5,x10 | ||
3055 | + ldr x10,[sp,#24] | ||
3056 | + str x13,[sp,#16] | ||
3057 | + ror x16,x22,#14 | ||
3058 | + add x25,x25,x19 // h+=K[i] | ||
3059 | + ror x12,x7,#1 | ||
3060 | + and x17,x23,x22 | ||
3061 | + ror x11,x4,#19 | ||
3062 | + bic x19,x24,x22 | ||
3063 | + ror x13,x26,#28 | ||
3064 | + add x25,x25,x5 // h+=X[i] | ||
3065 | + eor x16,x16,x22,ror#18 | ||
3066 | + eor x12,x12,x7,ror#8 | ||
3067 | + orr x17,x17,x19 // Ch(e,f,g) | ||
3068 | + eor x19,x26,x27 // a^b, b^c in next round | ||
3069 | + eor x16,x16,x22,ror#41 // Sigma1(e) | ||
3070 | + eor x13,x13,x26,ror#34 | ||
3071 | + add x25,x25,x17 // h+=Ch(e,f,g) | ||
3072 | + and x28,x28,x19 // (b^c)&=(a^b) | ||
3073 | + eor x11,x11,x4,ror#61 | ||
3074 | + eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) | ||
3075 | + add x25,x25,x16 // h+=Sigma1(e) | ||
3076 | + eor x28,x28,x27 // Maj(a,b,c) | ||
3077 | + eor x17,x13,x26,ror#39 // Sigma0(a) | ||
3078 | + eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) | ||
3079 | + add x6,x6,x15 | ||
3080 | + add x21,x21,x25 // d+=h | ||
3081 | + add x25,x25,x28 // h+=Maj(a,b,c) | ||
3082 | + ldr x28,[x30],#8 // *K++, x19 in next round | ||
3083 | + add x6,x6,x12 | ||
3084 | + add x25,x25,x17 // h+=Sigma0(a) | ||
3085 | + add x6,x6,x11 | ||
3086 | + ldr x11,[sp,#0] | ||
3087 | + str x14,[sp,#24] | ||
3088 | + ror x16,x21,#14 | ||
3089 | + add x24,x24,x28 // h+=K[i] | ||
3090 | + ror x13,x8,#1 | ||
3091 | + and x17,x22,x21 | ||
3092 | + ror x12,x5,#19 | ||
3093 | + bic x28,x23,x21 | ||
3094 | + ror x14,x25,#28 | ||
3095 | + add x24,x24,x6 // h+=X[i] | ||
3096 | + eor x16,x16,x21,ror#18 | ||
3097 | + eor x13,x13,x8,ror#8 | ||
3098 | + orr x17,x17,x28 // Ch(e,f,g) | ||
3099 | + eor x28,x25,x26 // a^b, b^c in next round | ||
3100 | + eor x16,x16,x21,ror#41 // Sigma1(e) | ||
3101 | + eor x14,x14,x25,ror#34 | ||
3102 | + add x24,x24,x17 // h+=Ch(e,f,g) | ||
3103 | + and x19,x19,x28 // (b^c)&=(a^b) | ||
3104 | + eor x12,x12,x5,ror#61 | ||
3105 | + eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) | ||
3106 | + add x24,x24,x16 // h+=Sigma1(e) | ||
3107 | + eor x19,x19,x26 // Maj(a,b,c) | ||
3108 | + eor x17,x14,x25,ror#39 // Sigma0(a) | ||
3109 | + eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) | ||
3110 | + add x7,x7,x0 | ||
3111 | + add x20,x20,x24 // d+=h | ||
3112 | + add x24,x24,x19 // h+=Maj(a,b,c) | ||
3113 | + ldr x19,[x30],#8 // *K++, x28 in next round | ||
3114 | + add x7,x7,x13 | ||
3115 | + add x24,x24,x17 // h+=Sigma0(a) | ||
3116 | + add x7,x7,x12 | ||
3117 | + ldr x12,[sp,#8] | ||
3118 | + str x15,[sp,#0] | ||
3119 | + ror x16,x20,#14 | ||
3120 | + add x23,x23,x19 // h+=K[i] | ||
3121 | + ror x14,x9,#1 | ||
3122 | + and x17,x21,x20 | ||
3123 | + ror x13,x6,#19 | ||
3124 | + bic x19,x22,x20 | ||
3125 | + ror x15,x24,#28 | ||
3126 | + add x23,x23,x7 // h+=X[i] | ||
3127 | + eor x16,x16,x20,ror#18 | ||
3128 | + eor x14,x14,x9,ror#8 | ||
3129 | + orr x17,x17,x19 // Ch(e,f,g) | ||
3130 | + eor x19,x24,x25 // a^b, b^c in next round | ||
3131 | + eor x16,x16,x20,ror#41 // Sigma1(e) | ||
3132 | + eor x15,x15,x24,ror#34 | ||
3133 | + add x23,x23,x17 // h+=Ch(e,f,g) | ||
3134 | + and x28,x28,x19 // (b^c)&=(a^b) | ||
3135 | + eor x13,x13,x6,ror#61 | ||
3136 | + eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) | ||
3137 | + add x23,x23,x16 // h+=Sigma1(e) | ||
3138 | + eor x28,x28,x25 // Maj(a,b,c) | ||
3139 | + eor x17,x15,x24,ror#39 // Sigma0(a) | ||
3140 | + eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) | ||
3141 | + add x8,x8,x1 | ||
3142 | + add x27,x27,x23 // d+=h | ||
3143 | + add x23,x23,x28 // h+=Maj(a,b,c) | ||
3144 | + ldr x28,[x30],#8 // *K++, x19 in next round | ||
3145 | + add x8,x8,x14 | ||
3146 | + add x23,x23,x17 // h+=Sigma0(a) | ||
3147 | + add x8,x8,x13 | ||
3148 | + ldr x13,[sp,#16] | ||
3149 | + str x0,[sp,#8] | ||
3150 | + ror x16,x27,#14 | ||
3151 | + add x22,x22,x28 // h+=K[i] | ||
3152 | + ror x15,x10,#1 | ||
3153 | + and x17,x20,x27 | ||
3154 | + ror x14,x7,#19 | ||
3155 | + bic x28,x21,x27 | ||
3156 | + ror x0,x23,#28 | ||
3157 | + add x22,x22,x8 // h+=X[i] | ||
3158 | + eor x16,x16,x27,ror#18 | ||
3159 | + eor x15,x15,x10,ror#8 | ||
3160 | + orr x17,x17,x28 // Ch(e,f,g) | ||
3161 | + eor x28,x23,x24 // a^b, b^c in next round | ||
3162 | + eor x16,x16,x27,ror#41 // Sigma1(e) | ||
3163 | + eor x0,x0,x23,ror#34 | ||
3164 | + add x22,x22,x17 // h+=Ch(e,f,g) | ||
3165 | + and x19,x19,x28 // (b^c)&=(a^b) | ||
3166 | + eor x14,x14,x7,ror#61 | ||
3167 | + eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) | ||
3168 | + add x22,x22,x16 // h+=Sigma1(e) | ||
3169 | + eor x19,x19,x24 // Maj(a,b,c) | ||
3170 | + eor x17,x0,x23,ror#39 // Sigma0(a) | ||
3171 | + eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) | ||
3172 | + add x9,x9,x2 | ||
3173 | + add x26,x26,x22 // d+=h | ||
3174 | + add x22,x22,x19 // h+=Maj(a,b,c) | ||
3175 | + ldr x19,[x30],#8 // *K++, x28 in next round | ||
3176 | + add x9,x9,x15 | ||
3177 | + add x22,x22,x17 // h+=Sigma0(a) | ||
3178 | + add x9,x9,x14 | ||
3179 | + ldr x14,[sp,#24] | ||
3180 | + str x1,[sp,#16] | ||
3181 | + ror x16,x26,#14 | ||
3182 | + add x21,x21,x19 // h+=K[i] | ||
3183 | + ror x0,x11,#1 | ||
3184 | + and x17,x27,x26 | ||
3185 | + ror x15,x8,#19 | ||
3186 | + bic x19,x20,x26 | ||
3187 | + ror x1,x22,#28 | ||
3188 | + add x21,x21,x9 // h+=X[i] | ||
3189 | + eor x16,x16,x26,ror#18 | ||
3190 | + eor x0,x0,x11,ror#8 | ||
3191 | + orr x17,x17,x19 // Ch(e,f,g) | ||
3192 | + eor x19,x22,x23 // a^b, b^c in next round | ||
3193 | + eor x16,x16,x26,ror#41 // Sigma1(e) | ||
3194 | + eor x1,x1,x22,ror#34 | ||
3195 | + add x21,x21,x17 // h+=Ch(e,f,g) | ||
3196 | + and x28,x28,x19 // (b^c)&=(a^b) | ||
3197 | + eor x15,x15,x8,ror#61 | ||
3198 | + eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) | ||
3199 | + add x21,x21,x16 // h+=Sigma1(e) | ||
3200 | + eor x28,x28,x23 // Maj(a,b,c) | ||
3201 | + eor x17,x1,x22,ror#39 // Sigma0(a) | ||
3202 | + eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) | ||
3203 | + add x10,x10,x3 | ||
3204 | + add x25,x25,x21 // d+=h | ||
3205 | + add x21,x21,x28 // h+=Maj(a,b,c) | ||
3206 | + ldr x28,[x30],#8 // *K++, x19 in next round | ||
3207 | + add x10,x10,x0 | ||
3208 | + add x21,x21,x17 // h+=Sigma0(a) | ||
3209 | + add x10,x10,x15 | ||
3210 | + ldr x15,[sp,#0] | ||
3211 | + str x2,[sp,#24] | ||
3212 | + ror x16,x25,#14 | ||
3213 | + add x20,x20,x28 // h+=K[i] | ||
3214 | + ror x1,x12,#1 | ||
3215 | + and x17,x26,x25 | ||
3216 | + ror x0,x9,#19 | ||
3217 | + bic x28,x27,x25 | ||
3218 | + ror x2,x21,#28 | ||
3219 | + add x20,x20,x10 // h+=X[i] | ||
3220 | + eor x16,x16,x25,ror#18 | ||
3221 | + eor x1,x1,x12,ror#8 | ||
3222 | + orr x17,x17,x28 // Ch(e,f,g) | ||
3223 | + eor x28,x21,x22 // a^b, b^c in next round | ||
3224 | + eor x16,x16,x25,ror#41 // Sigma1(e) | ||
3225 | + eor x2,x2,x21,ror#34 | ||
3226 | + add x20,x20,x17 // h+=Ch(e,f,g) | ||
3227 | + and x19,x19,x28 // (b^c)&=(a^b) | ||
3228 | + eor x0,x0,x9,ror#61 | ||
3229 | + eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) | ||
3230 | + add x20,x20,x16 // h+=Sigma1(e) | ||
3231 | + eor x19,x19,x22 // Maj(a,b,c) | ||
3232 | + eor x17,x2,x21,ror#39 // Sigma0(a) | ||
3233 | + eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) | ||
3234 | + add x11,x11,x4 | ||
3235 | + add x24,x24,x20 // d+=h | ||
3236 | + add x20,x20,x19 // h+=Maj(a,b,c) | ||
3237 | + ldr x19,[x30],#8 // *K++, x28 in next round | ||
3238 | + add x11,x11,x1 | ||
3239 | + add x20,x20,x17 // h+=Sigma0(a) | ||
3240 | + add x11,x11,x0 | ||
3241 | + ldr x0,[sp,#8] | ||
3242 | + str x3,[sp,#0] | ||
3243 | + ror x16,x24,#14 | ||
3244 | + add x27,x27,x19 // h+=K[i] | ||
3245 | + ror x2,x13,#1 | ||
3246 | + and x17,x25,x24 | ||
3247 | + ror x1,x10,#19 | ||
3248 | + bic x19,x26,x24 | ||
3249 | + ror x3,x20,#28 | ||
3250 | + add x27,x27,x11 // h+=X[i] | ||
3251 | + eor x16,x16,x24,ror#18 | ||
3252 | + eor x2,x2,x13,ror#8 | ||
3253 | + orr x17,x17,x19 // Ch(e,f,g) | ||
3254 | + eor x19,x20,x21 // a^b, b^c in next round | ||
3255 | + eor x16,x16,x24,ror#41 // Sigma1(e) | ||
3256 | + eor x3,x3,x20,ror#34 | ||
3257 | + add x27,x27,x17 // h+=Ch(e,f,g) | ||
3258 | + and x28,x28,x19 // (b^c)&=(a^b) | ||
3259 | + eor x1,x1,x10,ror#61 | ||
3260 | + eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) | ||
3261 | + add x27,x27,x16 // h+=Sigma1(e) | ||
3262 | + eor x28,x28,x21 // Maj(a,b,c) | ||
3263 | + eor x17,x3,x20,ror#39 // Sigma0(a) | ||
3264 | + eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) | ||
3265 | + add x12,x12,x5 | ||
3266 | + add x23,x23,x27 // d+=h | ||
3267 | + add x27,x27,x28 // h+=Maj(a,b,c) | ||
3268 | + ldr x28,[x30],#8 // *K++, x19 in next round | ||
3269 | + add x12,x12,x2 | ||
3270 | + add x27,x27,x17 // h+=Sigma0(a) | ||
3271 | + add x12,x12,x1 | ||
3272 | + ldr x1,[sp,#16] | ||
3273 | + str x4,[sp,#8] | ||
3274 | + ror x16,x23,#14 | ||
3275 | + add x26,x26,x28 // h+=K[i] | ||
3276 | + ror x3,x14,#1 | ||
3277 | + and x17,x24,x23 | ||
3278 | + ror x2,x11,#19 | ||
3279 | + bic x28,x25,x23 | ||
3280 | + ror x4,x27,#28 | ||
3281 | + add x26,x26,x12 // h+=X[i] | ||
3282 | + eor x16,x16,x23,ror#18 | ||
3283 | + eor x3,x3,x14,ror#8 | ||
3284 | + orr x17,x17,x28 // Ch(e,f,g) | ||
3285 | + eor x28,x27,x20 // a^b, b^c in next round | ||
3286 | + eor x16,x16,x23,ror#41 // Sigma1(e) | ||
3287 | + eor x4,x4,x27,ror#34 | ||
3288 | + add x26,x26,x17 // h+=Ch(e,f,g) | ||
3289 | + and x19,x19,x28 // (b^c)&=(a^b) | ||
3290 | + eor x2,x2,x11,ror#61 | ||
3291 | + eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) | ||
3292 | + add x26,x26,x16 // h+=Sigma1(e) | ||
3293 | + eor x19,x19,x20 // Maj(a,b,c) | ||
3294 | + eor x17,x4,x27,ror#39 // Sigma0(a) | ||
3295 | + eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) | ||
3296 | + add x13,x13,x6 | ||
3297 | + add x22,x22,x26 // d+=h | ||
3298 | + add x26,x26,x19 // h+=Maj(a,b,c) | ||
3299 | + ldr x19,[x30],#8 // *K++, x28 in next round | ||
3300 | + add x13,x13,x3 | ||
3301 | + add x26,x26,x17 // h+=Sigma0(a) | ||
3302 | + add x13,x13,x2 | ||
3303 | + ldr x2,[sp,#24] | ||
3304 | + str x5,[sp,#16] | ||
3305 | + ror x16,x22,#14 | ||
3306 | + add x25,x25,x19 // h+=K[i] | ||
3307 | + ror x4,x15,#1 | ||
3308 | + and x17,x23,x22 | ||
3309 | + ror x3,x12,#19 | ||
3310 | + bic x19,x24,x22 | ||
3311 | + ror x5,x26,#28 | ||
3312 | + add x25,x25,x13 // h+=X[i] | ||
3313 | + eor x16,x16,x22,ror#18 | ||
3314 | + eor x4,x4,x15,ror#8 | ||
3315 | + orr x17,x17,x19 // Ch(e,f,g) | ||
3316 | + eor x19,x26,x27 // a^b, b^c in next round | ||
3317 | + eor x16,x16,x22,ror#41 // Sigma1(e) | ||
3318 | + eor x5,x5,x26,ror#34 | ||
3319 | + add x25,x25,x17 // h+=Ch(e,f,g) | ||
3320 | + and x28,x28,x19 // (b^c)&=(a^b) | ||
3321 | + eor x3,x3,x12,ror#61 | ||
3322 | + eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) | ||
3323 | + add x25,x25,x16 // h+=Sigma1(e) | ||
3324 | + eor x28,x28,x27 // Maj(a,b,c) | ||
3325 | + eor x17,x5,x26,ror#39 // Sigma0(a) | ||
3326 | + eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) | ||
3327 | + add x14,x14,x7 | ||
3328 | + add x21,x21,x25 // d+=h | ||
3329 | + add x25,x25,x28 // h+=Maj(a,b,c) | ||
3330 | + ldr x28,[x30],#8 // *K++, x19 in next round | ||
3331 | + add x14,x14,x4 | ||
3332 | + add x25,x25,x17 // h+=Sigma0(a) | ||
3333 | + add x14,x14,x3 | ||
3334 | + ldr x3,[sp,#0] | ||
3335 | + str x6,[sp,#24] | ||
3336 | + ror x16,x21,#14 | ||
3337 | + add x24,x24,x28 // h+=K[i] | ||
3338 | + ror x5,x0,#1 | ||
3339 | + and x17,x22,x21 | ||
3340 | + ror x4,x13,#19 | ||
3341 | + bic x28,x23,x21 | ||
3342 | + ror x6,x25,#28 | ||
3343 | + add x24,x24,x14 // h+=X[i] | ||
3344 | + eor x16,x16,x21,ror#18 | ||
3345 | + eor x5,x5,x0,ror#8 | ||
3346 | + orr x17,x17,x28 // Ch(e,f,g) | ||
3347 | + eor x28,x25,x26 // a^b, b^c in next round | ||
3348 | + eor x16,x16,x21,ror#41 // Sigma1(e) | ||
3349 | + eor x6,x6,x25,ror#34 | ||
3350 | + add x24,x24,x17 // h+=Ch(e,f,g) | ||
3351 | + and x19,x19,x28 // (b^c)&=(a^b) | ||
3352 | + eor x4,x4,x13,ror#61 | ||
3353 | + eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) | ||
3354 | + add x24,x24,x16 // h+=Sigma1(e) | ||
3355 | + eor x19,x19,x26 // Maj(a,b,c) | ||
3356 | + eor x17,x6,x25,ror#39 // Sigma0(a) | ||
3357 | + eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) | ||
3358 | + add x15,x15,x8 | ||
3359 | + add x20,x20,x24 // d+=h | ||
3360 | + add x24,x24,x19 // h+=Maj(a,b,c) | ||
3361 | + ldr x19,[x30],#8 // *K++, x28 in next round | ||
3362 | + add x15,x15,x5 | ||
3363 | + add x24,x24,x17 // h+=Sigma0(a) | ||
3364 | + add x15,x15,x4 | ||
3365 | + ldr x4,[sp,#8] | ||
3366 | + str x7,[sp,#0] | ||
3367 | + ror x16,x20,#14 | ||
3368 | + add x23,x23,x19 // h+=K[i] | ||
3369 | + ror x6,x1,#1 | ||
3370 | + and x17,x21,x20 | ||
3371 | + ror x5,x14,#19 | ||
3372 | + bic x19,x22,x20 | ||
3373 | + ror x7,x24,#28 | ||
3374 | + add x23,x23,x15 // h+=X[i] | ||
3375 | + eor x16,x16,x20,ror#18 | ||
3376 | + eor x6,x6,x1,ror#8 | ||
3377 | + orr x17,x17,x19 // Ch(e,f,g) | ||
3378 | + eor x19,x24,x25 // a^b, b^c in next round | ||
3379 | + eor x16,x16,x20,ror#41 // Sigma1(e) | ||
3380 | + eor x7,x7,x24,ror#34 | ||
3381 | + add x23,x23,x17 // h+=Ch(e,f,g) | ||
3382 | + and x28,x28,x19 // (b^c)&=(a^b) | ||
3383 | + eor x5,x5,x14,ror#61 | ||
3384 | + eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) | ||
3385 | + add x23,x23,x16 // h+=Sigma1(e) | ||
3386 | + eor x28,x28,x25 // Maj(a,b,c) | ||
3387 | + eor x17,x7,x24,ror#39 // Sigma0(a) | ||
3388 | + eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) | ||
3389 | + add x0,x0,x9 | ||
3390 | + add x27,x27,x23 // d+=h | ||
3391 | + add x23,x23,x28 // h+=Maj(a,b,c) | ||
3392 | + ldr x28,[x30],#8 // *K++, x19 in next round | ||
3393 | + add x0,x0,x6 | ||
3394 | + add x23,x23,x17 // h+=Sigma0(a) | ||
3395 | + add x0,x0,x5 | ||
3396 | + ldr x5,[sp,#16] | ||
3397 | + str x8,[sp,#8] | ||
3398 | + ror x16,x27,#14 | ||
3399 | + add x22,x22,x28 // h+=K[i] | ||
3400 | + ror x7,x2,#1 | ||
3401 | + and x17,x20,x27 | ||
3402 | + ror x6,x15,#19 | ||
3403 | + bic x28,x21,x27 | ||
3404 | + ror x8,x23,#28 | ||
3405 | + add x22,x22,x0 // h+=X[i] | ||
3406 | + eor x16,x16,x27,ror#18 | ||
3407 | + eor x7,x7,x2,ror#8 | ||
3408 | + orr x17,x17,x28 // Ch(e,f,g) | ||
3409 | + eor x28,x23,x24 // a^b, b^c in next round | ||
3410 | + eor x16,x16,x27,ror#41 // Sigma1(e) | ||
3411 | + eor x8,x8,x23,ror#34 | ||
3412 | + add x22,x22,x17 // h+=Ch(e,f,g) | ||
3413 | + and x19,x19,x28 // (b^c)&=(a^b) | ||
3414 | + eor x6,x6,x15,ror#61 | ||
3415 | + eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) | ||
3416 | + add x22,x22,x16 // h+=Sigma1(e) | ||
3417 | + eor x19,x19,x24 // Maj(a,b,c) | ||
3418 | + eor x17,x8,x23,ror#39 // Sigma0(a) | ||
3419 | + eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) | ||
3420 | + add x1,x1,x10 | ||
3421 | + add x26,x26,x22 // d+=h | ||
3422 | + add x22,x22,x19 // h+=Maj(a,b,c) | ||
3423 | + ldr x19,[x30],#8 // *K++, x28 in next round | ||
3424 | + add x1,x1,x7 | ||
3425 | + add x22,x22,x17 // h+=Sigma0(a) | ||
3426 | + add x1,x1,x6 | ||
3427 | + ldr x6,[sp,#24] | ||
3428 | + str x9,[sp,#16] | ||
3429 | + ror x16,x26,#14 | ||
3430 | + add x21,x21,x19 // h+=K[i] | ||
3431 | + ror x8,x3,#1 | ||
3432 | + and x17,x27,x26 | ||
3433 | + ror x7,x0,#19 | ||
3434 | + bic x19,x20,x26 | ||
3435 | + ror x9,x22,#28 | ||
3436 | + add x21,x21,x1 // h+=X[i] | ||
3437 | + eor x16,x16,x26,ror#18 | ||
3438 | + eor x8,x8,x3,ror#8 | ||
3439 | + orr x17,x17,x19 // Ch(e,f,g) | ||
3440 | + eor x19,x22,x23 // a^b, b^c in next round | ||
3441 | + eor x16,x16,x26,ror#41 // Sigma1(e) | ||
3442 | + eor x9,x9,x22,ror#34 | ||
3443 | + add x21,x21,x17 // h+=Ch(e,f,g) | ||
3444 | + and x28,x28,x19 // (b^c)&=(a^b) | ||
3445 | + eor x7,x7,x0,ror#61 | ||
3446 | + eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) | ||
3447 | + add x21,x21,x16 // h+=Sigma1(e) | ||
3448 | + eor x28,x28,x23 // Maj(a,b,c) | ||
3449 | + eor x17,x9,x22,ror#39 // Sigma0(a) | ||
3450 | + eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) | ||
3451 | + add x2,x2,x11 | ||
3452 | + add x25,x25,x21 // d+=h | ||
3453 | + add x21,x21,x28 // h+=Maj(a,b,c) | ||
3454 | + ldr x28,[x30],#8 // *K++, x19 in next round | ||
3455 | + add x2,x2,x8 | ||
3456 | + add x21,x21,x17 // h+=Sigma0(a) | ||
3457 | + add x2,x2,x7 | ||
3458 | + ldr x7,[sp,#0] | ||
3459 | + str x10,[sp,#24] | ||
3460 | + ror x16,x25,#14 | ||
3461 | + add x20,x20,x28 // h+=K[i] | ||
3462 | + ror x9,x4,#1 | ||
3463 | + and x17,x26,x25 | ||
3464 | + ror x8,x1,#19 | ||
3465 | + bic x28,x27,x25 | ||
3466 | + ror x10,x21,#28 | ||
3467 | + add x20,x20,x2 // h+=X[i] | ||
3468 | + eor x16,x16,x25,ror#18 | ||
3469 | + eor x9,x9,x4,ror#8 | ||
3470 | + orr x17,x17,x28 // Ch(e,f,g) | ||
3471 | + eor x28,x21,x22 // a^b, b^c in next round | ||
3472 | + eor x16,x16,x25,ror#41 // Sigma1(e) | ||
3473 | + eor x10,x10,x21,ror#34 | ||
3474 | + add x20,x20,x17 // h+=Ch(e,f,g) | ||
3475 | + and x19,x19,x28 // (b^c)&=(a^b) | ||
3476 | + eor x8,x8,x1,ror#61 | ||
3477 | + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) | ||
3478 | + add x20,x20,x16 // h+=Sigma1(e) | ||
3479 | + eor x19,x19,x22 // Maj(a,b,c) | ||
3480 | + eor x17,x10,x21,ror#39 // Sigma0(a) | ||
3481 | + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) | ||
3482 | + add x3,x3,x12 | ||
3483 | + add x24,x24,x20 // d+=h | ||
3484 | + add x20,x20,x19 // h+=Maj(a,b,c) | ||
3485 | + ldr x19,[x30],#8 // *K++, x28 in next round | ||
3486 | + add x3,x3,x9 | ||
3487 | + add x20,x20,x17 // h+=Sigma0(a) | ||
3488 | + add x3,x3,x8 | ||
3489 | + cbnz x19,.Loop_16_xx | ||
3490 | + | ||
3491 | + ldp x0,x2,[x29,#96] | ||
3492 | + ldr x1,[x29,#112] | ||
3493 | + sub x30,x30,#648 // rewind | ||
3494 | + | ||
3495 | + ldp x3,x4,[x0] | ||
3496 | + ldp x5,x6,[x0,#2*8] | ||
3497 | + add x1,x1,#14*8 // advance input pointer | ||
3498 | + ldp x7,x8,[x0,#4*8] | ||
3499 | + add x20,x20,x3 | ||
3500 | + ldp x9,x10,[x0,#6*8] | ||
3501 | + add x21,x21,x4 | ||
3502 | + add x22,x22,x5 | ||
3503 | + add x23,x23,x6 | ||
3504 | + stp x20,x21,[x0] | ||
3505 | + add x24,x24,x7 | ||
3506 | + add x25,x25,x8 | ||
3507 | + stp x22,x23,[x0,#2*8] | ||
3508 | + add x26,x26,x9 | ||
3509 | + add x27,x27,x10 | ||
3510 | + cmp x1,x2 | ||
3511 | + stp x24,x25,[x0,#4*8] | ||
3512 | + stp x26,x27,[x0,#6*8] | ||
3513 | + b.ne .Loop | ||
3514 | + | ||
3515 | + ldp x19,x20,[x29,#16] | ||
3516 | + add sp,sp,#4*8 | ||
3517 | + ldp x21,x22,[x29,#32] | ||
3518 | + ldp x23,x24,[x29,#48] | ||
3519 | + ldp x25,x26,[x29,#64] | ||
3520 | + ldp x27,x28,[x29,#80] | ||
3521 | + ldp x29,x30,[sp],#128 | ||
3522 | + ret | ||
3523 | +.size sha512_block_data_order,.-sha512_block_data_order | ||
3524 | + | ||
3525 | +.align 6 | ||
3526 | +.type .LK512,%object | ||
3527 | +.LK512: | ||
3528 | + .quad 0x428a2f98d728ae22,0x7137449123ef65cd | ||
3529 | + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc | ||
3530 | + .quad 0x3956c25bf348b538,0x59f111f1b605d019 | ||
3531 | + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 | ||
3532 | + .quad 0xd807aa98a3030242,0x12835b0145706fbe | ||
3533 | + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 | ||
3534 | + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 | ||
3535 | + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 | ||
3536 | + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 | ||
3537 | + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 | ||
3538 | + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 | ||
3539 | + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 | ||
3540 | + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 | ||
3541 | + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 | ||
3542 | + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 | ||
3543 | + .quad 0x06ca6351e003826f,0x142929670a0e6e70 | ||
3544 | + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 | ||
3545 | + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df | ||
3546 | + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 | ||
3547 | + .quad 0x81c2c92e47edaee6,0x92722c851482353b | ||
3548 | + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 | ||
3549 | + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 | ||
3550 | + .quad 0xd192e819d6ef5218,0xd69906245565a910 | ||
3551 | + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 | ||
3552 | + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 | ||
3553 | + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 | ||
3554 | + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb | ||
3555 | + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 | ||
3556 | + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 | ||
3557 | + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec | ||
3558 | + .quad 0x90befffa23631e28,0xa4506cebde82bde9 | ||
3559 | + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b | ||
3560 | + .quad 0xca273eceea26619c,0xd186b8c721c0c207 | ||
3561 | + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 | ||
3562 | + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 | ||
3563 | + .quad 0x113f9804bef90dae,0x1b710b35131c471b | ||
3564 | + .quad 0x28db77f523047d84,0x32caab7b40c72493 | ||
3565 | + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c | ||
3566 | + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a | ||
3567 | + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 | ||
3568 | + .quad 0 // terminator | ||
3569 | +.size .LK512,.-.LK512 | ||
3570 | +#ifndef __KERNEL__ | ||
3571 | +.align 3 | ||
3572 | +.LOPENSSL_armcap_P: | ||
3573 | +# ifdef __ILP32__ | ||
3574 | + .long OPENSSL_armcap_P-. | ||
3575 | +# else | ||
3576 | + .quad OPENSSL_armcap_P-. | ||
3577 | +# endif | ||
3578 | +#endif | ||
3579 | +.asciz "SHA512 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>" | ||
3580 | +.align 2 | ||
3581 | +#ifndef __KERNEL__ | ||
3582 | +.comm OPENSSL_armcap_P,4,4 | ||
3583 | +#endif | ||
3584 | diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h | ||
3585 | index 7193bf97b8da..e60375ce0dd2 100644 | ||
3586 | --- a/arch/arm64/include/asm/assembler.h | ||
3587 | +++ b/arch/arm64/include/asm/assembler.h | ||
3588 | @@ -86,6 +86,24 @@ | ||
3589 | dmb \opt | ||
3590 | .endm | ||
3591 | |||
3592 | +/* | ||
3593 | + * Value prediction barrier | ||
3594 | + */ | ||
3595 | + .macro csdb | ||
3596 | + hint #20 | ||
3597 | + .endm | ||
3598 | + | ||
3599 | +/* | ||
3600 | + * Sanitise a 64-bit bounded index wrt speculation, returning zero if out | ||
3601 | + * of bounds. | ||
3602 | + */ | ||
3603 | + .macro mask_nospec64, idx, limit, tmp | ||
3604 | + sub \tmp, \idx, \limit | ||
3605 | + bic \tmp, \tmp, \idx | ||
3606 | + and \idx, \idx, \tmp, asr #63 | ||
3607 | + csdb | ||
3608 | + .endm | ||
3609 | + | ||
3610 | /* | ||
3611 | * NOP sequence | ||
3612 | */ | ||
3613 | @@ -416,4 +434,5 @@ alternative_endif | ||
3614 | .macro pte_to_phys, phys, pte | ||
3615 | and \phys, \pte, #(((1 << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) | ||
3616 | .endm | ||
3617 | + | ||
3618 | #endif /* __ASM_ASSEMBLER_H */ | ||
3619 | diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h | ||
3620 | index 0fe7e43b7fbc..0b0755c961ac 100644 | ||
3621 | --- a/arch/arm64/include/asm/barrier.h | ||
3622 | +++ b/arch/arm64/include/asm/barrier.h | ||
3623 | @@ -31,6 +31,8 @@ | ||
3624 | #define dmb(opt) asm volatile("dmb " #opt : : : "memory") | ||
3625 | #define dsb(opt) asm volatile("dsb " #opt : : : "memory") | ||
3626 | |||
3627 | +#define csdb() asm volatile("hint #20" : : : "memory") | ||
3628 | + | ||
3629 | #define mb() dsb(sy) | ||
3630 | #define rmb() dsb(ld) | ||
3631 | #define wmb() dsb(st) | ||
3632 | @@ -38,6 +40,27 @@ | ||
3633 | #define dma_rmb() dmb(oshld) | ||
3634 | #define dma_wmb() dmb(oshst) | ||
3635 | |||
3636 | +/* | ||
3637 | + * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz | ||
3638 | + * and 0 otherwise. | ||
3639 | + */ | ||
3640 | +#define array_index_mask_nospec array_index_mask_nospec | ||
3641 | +static inline unsigned long array_index_mask_nospec(unsigned long idx, | ||
3642 | + unsigned long sz) | ||
3643 | +{ | ||
3644 | + unsigned long mask; | ||
3645 | + | ||
3646 | + asm volatile( | ||
3647 | + " cmp %1, %2\n" | ||
3648 | + " sbc %0, xzr, xzr\n" | ||
3649 | + : "=r" (mask) | ||
3650 | + : "r" (idx), "Ir" (sz) | ||
3651 | + : "cc"); | ||
3652 | + | ||
3653 | + csdb(); | ||
3654 | + return mask; | ||
3655 | +} | ||
3656 | + | ||
3657 | #define __smp_mb() dmb(ish) | ||
3658 | #define __smp_rmb() dmb(ishld) | ||
3659 | #define __smp_wmb() dmb(ishst) | ||
3660 | diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h | ||
3661 | index 7ddf233f05bd..ce67bf6a0886 100644 | ||
3662 | --- a/arch/arm64/include/asm/cpucaps.h | ||
3663 | +++ b/arch/arm64/include/asm/cpucaps.h | ||
3664 | @@ -35,7 +35,8 @@ | ||
3665 | #define ARM64_HYP_OFFSET_LOW 14 | ||
3666 | #define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 | ||
3667 | #define ARM64_UNMAP_KERNEL_AT_EL0 16 | ||
3668 | +#define ARM64_HARDEN_BRANCH_PREDICTOR 17 | ||
3669 | |||
3670 | -#define ARM64_NCAPS 17 | ||
3671 | +#define ARM64_NCAPS 18 | ||
3672 | |||
3673 | #endif /* __ASM_CPUCAPS_H */ | ||
3674 | diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h | ||
3675 | index 1d47930c30dc..9ee3038a6b98 100644 | ||
3676 | --- a/arch/arm64/include/asm/cputype.h | ||
3677 | +++ b/arch/arm64/include/asm/cputype.h | ||
3678 | @@ -75,7 +75,10 @@ | ||
3679 | #define ARM_CPU_PART_AEM_V8 0xD0F | ||
3680 | #define ARM_CPU_PART_FOUNDATION 0xD00 | ||
3681 | #define ARM_CPU_PART_CORTEX_A57 0xD07 | ||
3682 | +#define ARM_CPU_PART_CORTEX_A72 0xD08 | ||
3683 | #define ARM_CPU_PART_CORTEX_A53 0xD03 | ||
3684 | +#define ARM_CPU_PART_CORTEX_A73 0xD09 | ||
3685 | +#define ARM_CPU_PART_CORTEX_A75 0xD0A | ||
3686 | |||
3687 | #define APM_CPU_PART_POTENZA 0x000 | ||
3688 | |||
3689 | @@ -87,6 +90,9 @@ | ||
3690 | |||
3691 | #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) | ||
3692 | #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) | ||
3693 | +#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) | ||
3694 | +#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) | ||
3695 | +#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) | ||
3696 | #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) | ||
3697 | #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) | ||
3698 | #define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2) | ||
3699 | diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h | ||
3700 | index 20dcb196b240..4e5f36a804b4 100644 | ||
3701 | --- a/arch/arm64/include/asm/futex.h | ||
3702 | +++ b/arch/arm64/include/asm/futex.h | ||
3703 | @@ -51,13 +51,14 @@ | ||
3704 | : "memory") | ||
3705 | |||
3706 | static inline int | ||
3707 | -futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) | ||
3708 | +futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *_uaddr) | ||
3709 | { | ||
3710 | int op = (encoded_op >> 28) & 7; | ||
3711 | int cmp = (encoded_op >> 24) & 15; | ||
3712 | int oparg = (int)(encoded_op << 8) >> 20; | ||
3713 | int cmparg = (int)(encoded_op << 20) >> 20; | ||
3714 | int oldval = 0, ret, tmp; | ||
3715 | + u32 __user *uaddr = __uaccess_mask_ptr(_uaddr); | ||
3716 | |||
3717 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
3718 | oparg = 1U << (oparg & 0x1f); | ||
3719 | @@ -109,15 +110,17 @@ futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) | ||
3720 | } | ||
3721 | |||
3722 | static inline int | ||
3723 | -futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, | ||
3724 | +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, | ||
3725 | u32 oldval, u32 newval) | ||
3726 | { | ||
3727 | int ret = 0; | ||
3728 | u32 val, tmp; | ||
3729 | + u32 __user *uaddr; | ||
3730 | |||
3731 | - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | ||
3732 | + if (!access_ok(VERIFY_WRITE, _uaddr, sizeof(u32))) | ||
3733 | return -EFAULT; | ||
3734 | |||
3735 | + uaddr = __uaccess_mask_ptr(_uaddr); | ||
3736 | asm volatile("// futex_atomic_cmpxchg_inatomic\n" | ||
3737 | ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN) | ||
3738 | " prfm pstl1strm, %2\n" | ||
3739 | diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h | ||
3740 | index e5050388e062..37d56e85036e 100644 | ||
3741 | --- a/arch/arm64/include/asm/kvm_host.h | ||
3742 | +++ b/arch/arm64/include/asm/kvm_host.h | ||
3743 | @@ -393,4 +393,9 @@ static inline void __cpu_init_stage2(void) | ||
3744 | "PARange is %d bits, unsupported configuration!", parange); | ||
3745 | } | ||
3746 | |||
3747 | +static inline bool kvm_arm_harden_branch_predictor(void) | ||
3748 | +{ | ||
3749 | + return cpus_have_cap(ARM64_HARDEN_BRANCH_PREDICTOR); | ||
3750 | +} | ||
3751 | + | ||
3752 | #endif /* __ARM64_KVM_HOST_H__ */ | ||
3753 | diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h | ||
3754 | index 6d22017ebbad..80bf33715ecb 100644 | ||
3755 | --- a/arch/arm64/include/asm/kvm_mmu.h | ||
3756 | +++ b/arch/arm64/include/asm/kvm_mmu.h | ||
3757 | @@ -313,5 +313,43 @@ static inline unsigned int kvm_get_vmid_bits(void) | ||
3758 | return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; | ||
3759 | } | ||
3760 | |||
3761 | +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR | ||
3762 | +#include <asm/mmu.h> | ||
3763 | + | ||
3764 | +static inline void *kvm_get_hyp_vector(void) | ||
3765 | +{ | ||
3766 | + struct bp_hardening_data *data = arm64_get_bp_hardening_data(); | ||
3767 | + void *vect = kvm_ksym_ref(__kvm_hyp_vector); | ||
3768 | + | ||
3769 | + if (data->fn) { | ||
3770 | + vect = __bp_harden_hyp_vecs_start + | ||
3771 | + data->hyp_vectors_slot * SZ_2K; | ||
3772 | + | ||
3773 | + if (!cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) | ||
3774 | + vect = lm_alias(vect); | ||
3775 | + } | ||
3776 | + | ||
3777 | + return vect; | ||
3778 | +} | ||
3779 | + | ||
3780 | +static inline int kvm_map_vectors(void) | ||
3781 | +{ | ||
3782 | + return create_hyp_mappings(kvm_ksym_ref(__bp_harden_hyp_vecs_start), | ||
3783 | + kvm_ksym_ref(__bp_harden_hyp_vecs_end), | ||
3784 | + PAGE_HYP_EXEC); | ||
3785 | +} | ||
3786 | + | ||
3787 | +#else | ||
3788 | +static inline void *kvm_get_hyp_vector(void) | ||
3789 | +{ | ||
3790 | + return kvm_ksym_ref(__kvm_hyp_vector); | ||
3791 | +} | ||
3792 | + | ||
3793 | +static inline int kvm_map_vectors(void) | ||
3794 | +{ | ||
3795 | + return 0; | ||
3796 | +} | ||
3797 | +#endif | ||
3798 | + | ||
3799 | #endif /* __ASSEMBLY__ */ | ||
3800 | #endif /* __ARM64_KVM_MMU_H__ */ | ||
3801 | diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h | ||
3802 | deleted file mode 100644 | ||
3803 | index bc39e557c56c..000000000000 | ||
3804 | --- a/arch/arm64/include/asm/kvm_psci.h | ||
3805 | +++ /dev/null | ||
3806 | @@ -1,27 +0,0 @@ | ||
3807 | -/* | ||
3808 | - * Copyright (C) 2012,2013 - ARM Ltd | ||
3809 | - * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
3810 | - * | ||
3811 | - * This program is free software; you can redistribute it and/or modify | ||
3812 | - * it under the terms of the GNU General Public License version 2 as | ||
3813 | - * published by the Free Software Foundation. | ||
3814 | - * | ||
3815 | - * This program is distributed in the hope that it will be useful, | ||
3816 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
3817 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
3818 | - * GNU General Public License for more details. | ||
3819 | - * | ||
3820 | - * You should have received a copy of the GNU General Public License | ||
3821 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
3822 | - */ | ||
3823 | - | ||
3824 | -#ifndef __ARM64_KVM_PSCI_H__ | ||
3825 | -#define __ARM64_KVM_PSCI_H__ | ||
3826 | - | ||
3827 | -#define KVM_ARM_PSCI_0_1 1 | ||
3828 | -#define KVM_ARM_PSCI_0_2 2 | ||
3829 | - | ||
3830 | -int kvm_psci_version(struct kvm_vcpu *vcpu); | ||
3831 | -int kvm_psci_call(struct kvm_vcpu *vcpu); | ||
3832 | - | ||
3833 | -#endif /* __ARM64_KVM_PSCI_H__ */ | ||
3834 | diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h | ||
3835 | index 5e3faba689e0..ba917be5565a 100644 | ||
3836 | --- a/arch/arm64/include/asm/memory.h | ||
3837 | +++ b/arch/arm64/include/asm/memory.h | ||
3838 | @@ -60,8 +60,6 @@ | ||
3839 | * KIMAGE_VADDR - the virtual address of the start of the kernel image | ||
3840 | * VA_BITS - the maximum number of bits for virtual addresses. | ||
3841 | * VA_START - the first kernel virtual address. | ||
3842 | - * TASK_SIZE - the maximum size of a user space task. | ||
3843 | - * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. | ||
3844 | */ | ||
3845 | #define VA_BITS (CONFIG_ARM64_VA_BITS) | ||
3846 | #define VA_START (UL(0xffffffffffffffff) - \ | ||
3847 | @@ -76,19 +74,6 @@ | ||
3848 | #define PCI_IO_END (VMEMMAP_START - SZ_2M) | ||
3849 | #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) | ||
3850 | #define FIXADDR_TOP (PCI_IO_START - SZ_2M) | ||
3851 | -#define TASK_SIZE_64 (UL(1) << VA_BITS) | ||
3852 | - | ||
3853 | -#ifdef CONFIG_COMPAT | ||
3854 | -#define TASK_SIZE_32 UL(0x100000000) | ||
3855 | -#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ | ||
3856 | - TASK_SIZE_32 : TASK_SIZE_64) | ||
3857 | -#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \ | ||
3858 | - TASK_SIZE_32 : TASK_SIZE_64) | ||
3859 | -#else | ||
3860 | -#define TASK_SIZE TASK_SIZE_64 | ||
3861 | -#endif /* CONFIG_COMPAT */ | ||
3862 | - | ||
3863 | -#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) | ||
3864 | |||
3865 | #define KERNEL_START _text | ||
3866 | #define KERNEL_END _end | ||
3867 | diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h | ||
3868 | index a813edf28737..d51158a61892 100644 | ||
3869 | --- a/arch/arm64/include/asm/mmu.h | ||
3870 | +++ b/arch/arm64/include/asm/mmu.h | ||
3871 | @@ -20,6 +20,8 @@ | ||
3872 | |||
3873 | #ifndef __ASSEMBLY__ | ||
3874 | |||
3875 | +#include <linux/percpu.h> | ||
3876 | + | ||
3877 | typedef struct { | ||
3878 | atomic64_t id; | ||
3879 | void *vdso; | ||
3880 | @@ -38,6 +40,43 @@ static inline bool arm64_kernel_unmapped_at_el0(void) | ||
3881 | cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0); | ||
3882 | } | ||
3883 | |||
3884 | +typedef void (*bp_hardening_cb_t)(void); | ||
3885 | + | ||
3886 | +struct bp_hardening_data { | ||
3887 | + int hyp_vectors_slot; | ||
3888 | + bp_hardening_cb_t fn; | ||
3889 | +}; | ||
3890 | + | ||
3891 | +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR | ||
3892 | +extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[]; | ||
3893 | + | ||
3894 | +DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); | ||
3895 | + | ||
3896 | +static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) | ||
3897 | +{ | ||
3898 | + return this_cpu_ptr(&bp_hardening_data); | ||
3899 | +} | ||
3900 | + | ||
3901 | +static inline void arm64_apply_bp_hardening(void) | ||
3902 | +{ | ||
3903 | + struct bp_hardening_data *d; | ||
3904 | + | ||
3905 | + if (!cpus_have_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) | ||
3906 | + return; | ||
3907 | + | ||
3908 | + d = arm64_get_bp_hardening_data(); | ||
3909 | + if (d->fn) | ||
3910 | + d->fn(); | ||
3911 | +} | ||
3912 | +#else | ||
3913 | +static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) | ||
3914 | +{ | ||
3915 | + return NULL; | ||
3916 | +} | ||
3917 | + | ||
3918 | +static inline void arm64_apply_bp_hardening(void) { } | ||
3919 | +#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ | ||
3920 | + | ||
3921 | extern void paging_init(void); | ||
3922 | extern void bootmem_init(void); | ||
3923 | extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); | ||
3924 | diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h | ||
3925 | index 60e34824e18c..5917147af0c4 100644 | ||
3926 | --- a/arch/arm64/include/asm/processor.h | ||
3927 | +++ b/arch/arm64/include/asm/processor.h | ||
3928 | @@ -19,6 +19,13 @@ | ||
3929 | #ifndef __ASM_PROCESSOR_H | ||
3930 | #define __ASM_PROCESSOR_H | ||
3931 | |||
3932 | +#define TASK_SIZE_64 (UL(1) << VA_BITS) | ||
3933 | + | ||
3934 | +#define KERNEL_DS UL(-1) | ||
3935 | +#define USER_DS (TASK_SIZE_64 - 1) | ||
3936 | + | ||
3937 | +#ifndef __ASSEMBLY__ | ||
3938 | + | ||
3939 | /* | ||
3940 | * Default implementation of macro that returns current | ||
3941 | * instruction pointer ("program counter"). | ||
3942 | @@ -37,6 +44,22 @@ | ||
3943 | #include <asm/ptrace.h> | ||
3944 | #include <asm/types.h> | ||
3945 | |||
3946 | +/* | ||
3947 | + * TASK_SIZE - the maximum size of a user space task. | ||
3948 | + * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. | ||
3949 | + */ | ||
3950 | +#ifdef CONFIG_COMPAT | ||
3951 | +#define TASK_SIZE_32 UL(0x100000000) | ||
3952 | +#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ | ||
3953 | + TASK_SIZE_32 : TASK_SIZE_64) | ||
3954 | +#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \ | ||
3955 | + TASK_SIZE_32 : TASK_SIZE_64) | ||
3956 | +#else | ||
3957 | +#define TASK_SIZE TASK_SIZE_64 | ||
3958 | +#endif /* CONFIG_COMPAT */ | ||
3959 | + | ||
3960 | +#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) | ||
3961 | + | ||
3962 | #define STACK_TOP_MAX TASK_SIZE_64 | ||
3963 | #ifdef CONFIG_COMPAT | ||
3964 | #define AARCH32_VECTORS_BASE 0xffff0000 | ||
3965 | @@ -192,4 +215,5 @@ int cpu_enable_pan(void *__unused); | ||
3966 | int cpu_enable_uao(void *__unused); | ||
3967 | int cpu_enable_cache_maint_trap(void *__unused); | ||
3968 | |||
3969 | +#endif /* __ASSEMBLY__ */ | ||
3970 | #endif /* __ASM_PROCESSOR_H */ | ||
3971 | diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h | ||
3972 | index 7cb7f7cdcfbc..88bbe364b6ae 100644 | ||
3973 | --- a/arch/arm64/include/asm/sysreg.h | ||
3974 | +++ b/arch/arm64/include/asm/sysreg.h | ||
3975 | @@ -118,6 +118,8 @@ | ||
3976 | |||
3977 | /* id_aa64pfr0 */ | ||
3978 | #define ID_AA64PFR0_CSV3_SHIFT 60 | ||
3979 | +#define ID_AA64PFR0_CSV2_SHIFT 56 | ||
3980 | +#define ID_AA64PFR0_SVE_SHIFT 32 | ||
3981 | #define ID_AA64PFR0_GIC_SHIFT 24 | ||
3982 | #define ID_AA64PFR0_ASIMD_SHIFT 20 | ||
3983 | #define ID_AA64PFR0_FP_SHIFT 16 | ||
3984 | diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h | ||
3985 | index 811cf16a65f9..1d047d6c421b 100644 | ||
3986 | --- a/arch/arm64/include/asm/uaccess.h | ||
3987 | +++ b/arch/arm64/include/asm/uaccess.h | ||
3988 | @@ -28,6 +28,7 @@ | ||
3989 | |||
3990 | #include <asm/alternative.h> | ||
3991 | #include <asm/cpufeature.h> | ||
3992 | +#include <asm/processor.h> | ||
3993 | #include <asm/ptrace.h> | ||
3994 | #include <asm/sysreg.h> | ||
3995 | #include <asm/errno.h> | ||
3996 | @@ -59,16 +60,20 @@ struct exception_table_entry | ||
3997 | |||
3998 | extern int fixup_exception(struct pt_regs *regs); | ||
3999 | |||
4000 | -#define KERNEL_DS (-1UL) | ||
4001 | #define get_ds() (KERNEL_DS) | ||
4002 | - | ||
4003 | -#define USER_DS TASK_SIZE_64 | ||
4004 | #define get_fs() (current_thread_info()->addr_limit) | ||
4005 | |||
4006 | static inline void set_fs(mm_segment_t fs) | ||
4007 | { | ||
4008 | current_thread_info()->addr_limit = fs; | ||
4009 | |||
4010 | + /* | ||
4011 | + * Prevent a mispredicted conditional call to set_fs from forwarding | ||
4012 | + * the wrong address limit to access_ok under speculation. | ||
4013 | + */ | ||
4014 | + dsb(nsh); | ||
4015 | + isb(); | ||
4016 | + | ||
4017 | /* | ||
4018 | * Enable/disable UAO so that copy_to_user() etc can access | ||
4019 | * kernel memory with the unprivileged instructions. | ||
4020 | @@ -87,22 +92,32 @@ static inline void set_fs(mm_segment_t fs) | ||
4021 | * Returns 1 if the range is valid, 0 otherwise. | ||
4022 | * | ||
4023 | * This is equivalent to the following test: | ||
4024 | - * (u65)addr + (u65)size <= current->addr_limit | ||
4025 | - * | ||
4026 | - * This needs 65-bit arithmetic. | ||
4027 | + * (u65)addr + (u65)size <= (u65)current->addr_limit + 1 | ||
4028 | */ | ||
4029 | -#define __range_ok(addr, size) \ | ||
4030 | -({ \ | ||
4031 | - unsigned long __addr = (unsigned long __force)(addr); \ | ||
4032 | - unsigned long flag, roksum; \ | ||
4033 | - __chk_user_ptr(addr); \ | ||
4034 | - asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls" \ | ||
4035 | - : "=&r" (flag), "=&r" (roksum) \ | ||
4036 | - : "1" (__addr), "Ir" (size), \ | ||
4037 | - "r" (current_thread_info()->addr_limit) \ | ||
4038 | - : "cc"); \ | ||
4039 | - flag; \ | ||
4040 | -}) | ||
4041 | +static inline unsigned long __range_ok(unsigned long addr, unsigned long size) | ||
4042 | +{ | ||
4043 | + unsigned long limit = current_thread_info()->addr_limit; | ||
4044 | + | ||
4045 | + __chk_user_ptr(addr); | ||
4046 | + asm volatile( | ||
4047 | + // A + B <= C + 1 for all A,B,C, in four easy steps: | ||
4048 | + // 1: X = A + B; X' = X % 2^64 | ||
4049 | + " adds %0, %0, %2\n" | ||
4050 | + // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4 | ||
4051 | + " csel %1, xzr, %1, hi\n" | ||
4052 | + // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X' | ||
4053 | + // to compensate for the carry flag being set in step 4. For | ||
4054 | + // X > 2^64, X' merely has to remain nonzero, which it does. | ||
4055 | + " csinv %0, %0, xzr, cc\n" | ||
4056 | + // 4: For X < 2^64, this gives us X' - C - 1 <= 0, where the -1 | ||
4057 | + // comes from the carry in being clear. Otherwise, we are | ||
4058 | + // testing X' - C == 0, subject to the previous adjustments. | ||
4059 | + " sbcs xzr, %0, %1\n" | ||
4060 | + " cset %0, ls\n" | ||
4061 | + : "+r" (addr), "+r" (limit) : "Ir" (size) : "cc"); | ||
4062 | + | ||
4063 | + return addr; | ||
4064 | +} | ||
4065 | |||
4066 | /* | ||
4067 | * When dealing with data aborts, watchpoints, or instruction traps we may end | ||
4068 | @@ -111,7 +126,7 @@ static inline void set_fs(mm_segment_t fs) | ||
4069 | */ | ||
4070 | #define untagged_addr(addr) sign_extend64(addr, 55) | ||
4071 | |||
4072 | -#define access_ok(type, addr, size) __range_ok(addr, size) | ||
4073 | +#define access_ok(type, addr, size) __range_ok((unsigned long)(addr), size) | ||
4074 | #define user_addr_max get_fs | ||
4075 | |||
4076 | #define _ASM_EXTABLE(from, to) \ | ||
4077 | @@ -120,6 +135,26 @@ static inline void set_fs(mm_segment_t fs) | ||
4078 | " .long (" #from " - .), (" #to " - .)\n" \ | ||
4079 | " .popsection\n" | ||
4080 | |||
4081 | +/* | ||
4082 | + * Sanitise a uaccess pointer such that it becomes NULL if above the | ||
4083 | + * current addr_limit. | ||
4084 | + */ | ||
4085 | +#define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr) | ||
4086 | +static inline void __user *__uaccess_mask_ptr(const void __user *ptr) | ||
4087 | +{ | ||
4088 | + void __user *safe_ptr; | ||
4089 | + | ||
4090 | + asm volatile( | ||
4091 | + " bics xzr, %1, %2\n" | ||
4092 | + " csel %0, %1, xzr, eq\n" | ||
4093 | + : "=&r" (safe_ptr) | ||
4094 | + : "r" (ptr), "r" (current_thread_info()->addr_limit) | ||
4095 | + : "cc"); | ||
4096 | + | ||
4097 | + csdb(); | ||
4098 | + return safe_ptr; | ||
4099 | +} | ||
4100 | + | ||
4101 | /* | ||
4102 | * The "__xxx" versions of the user access functions do not verify the address | ||
4103 | * space - it must have been done previously with a separate "access_ok()" | ||
4104 | @@ -174,30 +209,35 @@ do { \ | ||
4105 | CONFIG_ARM64_PAN)); \ | ||
4106 | } while (0) | ||
4107 | |||
4108 | -#define __get_user(x, ptr) \ | ||
4109 | +#define __get_user_check(x, ptr, err) \ | ||
4110 | ({ \ | ||
4111 | - int __gu_err = 0; \ | ||
4112 | - __get_user_err((x), (ptr), __gu_err); \ | ||
4113 | - __gu_err; \ | ||
4114 | + __typeof__(*(ptr)) __user *__p = (ptr); \ | ||
4115 | + might_fault(); \ | ||
4116 | + if (access_ok(VERIFY_READ, __p, sizeof(*__p))) { \ | ||
4117 | + __p = uaccess_mask_ptr(__p); \ | ||
4118 | + __get_user_err((x), __p, (err)); \ | ||
4119 | + } else { \ | ||
4120 | + (x) = 0; (err) = -EFAULT; \ | ||
4121 | + } \ | ||
4122 | }) | ||
4123 | |||
4124 | #define __get_user_error(x, ptr, err) \ | ||
4125 | ({ \ | ||
4126 | - __get_user_err((x), (ptr), (err)); \ | ||
4127 | + __get_user_check((x), (ptr), (err)); \ | ||
4128 | (void)0; \ | ||
4129 | }) | ||
4130 | |||
4131 | -#define __get_user_unaligned __get_user | ||
4132 | - | ||
4133 | -#define get_user(x, ptr) \ | ||
4134 | +#define __get_user(x, ptr) \ | ||
4135 | ({ \ | ||
4136 | - __typeof__(*(ptr)) __user *__p = (ptr); \ | ||
4137 | - might_fault(); \ | ||
4138 | - access_ok(VERIFY_READ, __p, sizeof(*__p)) ? \ | ||
4139 | - __get_user((x), __p) : \ | ||
4140 | - ((x) = 0, -EFAULT); \ | ||
4141 | + int __gu_err = 0; \ | ||
4142 | + __get_user_check((x), (ptr), __gu_err); \ | ||
4143 | + __gu_err; \ | ||
4144 | }) | ||
4145 | |||
4146 | +#define __get_user_unaligned __get_user | ||
4147 | + | ||
4148 | +#define get_user __get_user | ||
4149 | + | ||
4150 | #define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature) \ | ||
4151 | asm volatile( \ | ||
4152 | "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \ | ||
4153 | @@ -242,47 +282,51 @@ do { \ | ||
4154 | CONFIG_ARM64_PAN)); \ | ||
4155 | } while (0) | ||
4156 | |||
4157 | -#define __put_user(x, ptr) \ | ||
4158 | +#define __put_user_check(x, ptr, err) \ | ||
4159 | ({ \ | ||
4160 | - int __pu_err = 0; \ | ||
4161 | - __put_user_err((x), (ptr), __pu_err); \ | ||
4162 | - __pu_err; \ | ||
4163 | + __typeof__(*(ptr)) __user *__p = (ptr); \ | ||
4164 | + might_fault(); \ | ||
4165 | + if (access_ok(VERIFY_WRITE, __p, sizeof(*__p))) { \ | ||
4166 | + __p = uaccess_mask_ptr(__p); \ | ||
4167 | + __put_user_err((x), __p, (err)); \ | ||
4168 | + } else { \ | ||
4169 | + (err) = -EFAULT; \ | ||
4170 | + } \ | ||
4171 | }) | ||
4172 | |||
4173 | #define __put_user_error(x, ptr, err) \ | ||
4174 | ({ \ | ||
4175 | - __put_user_err((x), (ptr), (err)); \ | ||
4176 | + __put_user_check((x), (ptr), (err)); \ | ||
4177 | (void)0; \ | ||
4178 | }) | ||
4179 | |||
4180 | -#define __put_user_unaligned __put_user | ||
4181 | - | ||
4182 | -#define put_user(x, ptr) \ | ||
4183 | +#define __put_user(x, ptr) \ | ||
4184 | ({ \ | ||
4185 | - __typeof__(*(ptr)) __user *__p = (ptr); \ | ||
4186 | - might_fault(); \ | ||
4187 | - access_ok(VERIFY_WRITE, __p, sizeof(*__p)) ? \ | ||
4188 | - __put_user((x), __p) : \ | ||
4189 | - -EFAULT; \ | ||
4190 | + int __pu_err = 0; \ | ||
4191 | + __put_user_check((x), (ptr), __pu_err); \ | ||
4192 | + __pu_err; \ | ||
4193 | }) | ||
4194 | |||
4195 | +#define __put_user_unaligned __put_user | ||
4196 | + | ||
4197 | +#define put_user __put_user | ||
4198 | + | ||
4199 | extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); | ||
4200 | extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n); | ||
4201 | -extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n); | ||
4202 | -extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); | ||
4203 | +extern unsigned long __must_check __arch_copy_in_user(void __user *to, const void __user *from, unsigned long n); | ||
4204 | |||
4205 | static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) | ||
4206 | { | ||
4207 | kasan_check_write(to, n); | ||
4208 | check_object_size(to, n, false); | ||
4209 | - return __arch_copy_from_user(to, from, n); | ||
4210 | + return __arch_copy_from_user(to, __uaccess_mask_ptr(from), n); | ||
4211 | } | ||
4212 | |||
4213 | static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) | ||
4214 | { | ||
4215 | kasan_check_read(from, n); | ||
4216 | check_object_size(from, n, true); | ||
4217 | - return __arch_copy_to_user(to, from, n); | ||
4218 | + return __arch_copy_to_user(__uaccess_mask_ptr(to), from, n); | ||
4219 | } | ||
4220 | |||
4221 | static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) | ||
4222 | @@ -310,22 +354,25 @@ static inline unsigned long __must_check copy_to_user(void __user *to, const voi | ||
4223 | return n; | ||
4224 | } | ||
4225 | |||
4226 | -static inline unsigned long __must_check copy_in_user(void __user *to, const void __user *from, unsigned long n) | ||
4227 | +static inline unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n) | ||
4228 | { | ||
4229 | if (access_ok(VERIFY_READ, from, n) && access_ok(VERIFY_WRITE, to, n)) | ||
4230 | - n = __copy_in_user(to, from, n); | ||
4231 | + n = __arch_copy_in_user(__uaccess_mask_ptr(to), __uaccess_mask_ptr(from), n); | ||
4232 | return n; | ||
4233 | } | ||
4234 | +#define copy_in_user __copy_in_user | ||
4235 | |||
4236 | #define __copy_to_user_inatomic __copy_to_user | ||
4237 | #define __copy_from_user_inatomic __copy_from_user | ||
4238 | |||
4239 | -static inline unsigned long __must_check clear_user(void __user *to, unsigned long n) | ||
4240 | +extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned long n); | ||
4241 | +static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n) | ||
4242 | { | ||
4243 | if (access_ok(VERIFY_WRITE, to, n)) | ||
4244 | - n = __clear_user(to, n); | ||
4245 | + n = __arch_clear_user(__uaccess_mask_ptr(to), n); | ||
4246 | return n; | ||
4247 | } | ||
4248 | +#define clear_user __clear_user | ||
4249 | |||
4250 | extern long strncpy_from_user(char *dest, const char __user *src, long count); | ||
4251 | |||
4252 | diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile | ||
4253 | index 7d66bbaafc0c..74b8fd860714 100644 | ||
4254 | --- a/arch/arm64/kernel/Makefile | ||
4255 | +++ b/arch/arm64/kernel/Makefile | ||
4256 | @@ -51,6 +51,10 @@ arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o | ||
4257 | arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ | ||
4258 | cpu-reset.o | ||
4259 | |||
4260 | +ifeq ($(CONFIG_KVM),y) | ||
4261 | +arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o | ||
4262 | +endif | ||
4263 | + | ||
4264 | obj-y += $(arm64-obj-y) vdso/ probes/ | ||
4265 | obj-m += $(arm64-obj-m) | ||
4266 | head-y := head.o | ||
4267 | diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c | ||
4268 | index e9c4dc9e0ada..66be504edb6c 100644 | ||
4269 | --- a/arch/arm64/kernel/arm64ksyms.c | ||
4270 | +++ b/arch/arm64/kernel/arm64ksyms.c | ||
4271 | @@ -37,8 +37,8 @@ EXPORT_SYMBOL(clear_page); | ||
4272 | /* user mem (segment) */ | ||
4273 | EXPORT_SYMBOL(__arch_copy_from_user); | ||
4274 | EXPORT_SYMBOL(__arch_copy_to_user); | ||
4275 | -EXPORT_SYMBOL(__clear_user); | ||
4276 | -EXPORT_SYMBOL(__copy_in_user); | ||
4277 | +EXPORT_SYMBOL(__arch_clear_user); | ||
4278 | +EXPORT_SYMBOL(__arch_copy_in_user); | ||
4279 | |||
4280 | /* physical memory */ | ||
4281 | EXPORT_SYMBOL(memstart_addr); | ||
4282 | diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S | ||
4283 | new file mode 100644 | ||
4284 | index 000000000000..dc4eb154e33b | ||
4285 | --- /dev/null | ||
4286 | +++ b/arch/arm64/kernel/bpi.S | ||
4287 | @@ -0,0 +1,75 @@ | ||
4288 | +/* | ||
4289 | + * Contains CPU specific branch predictor invalidation sequences | ||
4290 | + * | ||
4291 | + * Copyright (C) 2018 ARM Ltd. | ||
4292 | + * | ||
4293 | + * This program is free software; you can redistribute it and/or modify | ||
4294 | + * it under the terms of the GNU General Public License version 2 as | ||
4295 | + * published by the Free Software Foundation. | ||
4296 | + * | ||
4297 | + * This program is distributed in the hope that it will be useful, | ||
4298 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
4299 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
4300 | + * GNU General Public License for more details. | ||
4301 | + * | ||
4302 | + * You should have received a copy of the GNU General Public License | ||
4303 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
4304 | + */ | ||
4305 | + | ||
4306 | +#include <linux/linkage.h> | ||
4307 | +#include <linux/arm-smccc.h> | ||
4308 | + | ||
4309 | +.macro ventry target | ||
4310 | + .rept 31 | ||
4311 | + nop | ||
4312 | + .endr | ||
4313 | + b \target | ||
4314 | +.endm | ||
4315 | + | ||
4316 | +.macro vectors target | ||
4317 | + ventry \target + 0x000 | ||
4318 | + ventry \target + 0x080 | ||
4319 | + ventry \target + 0x100 | ||
4320 | + ventry \target + 0x180 | ||
4321 | + | ||
4322 | + ventry \target + 0x200 | ||
4323 | + ventry \target + 0x280 | ||
4324 | + ventry \target + 0x300 | ||
4325 | + ventry \target + 0x380 | ||
4326 | + | ||
4327 | + ventry \target + 0x400 | ||
4328 | + ventry \target + 0x480 | ||
4329 | + ventry \target + 0x500 | ||
4330 | + ventry \target + 0x580 | ||
4331 | + | ||
4332 | + ventry \target + 0x600 | ||
4333 | + ventry \target + 0x680 | ||
4334 | + ventry \target + 0x700 | ||
4335 | + ventry \target + 0x780 | ||
4336 | +.endm | ||
4337 | + | ||
4338 | + .align 11 | ||
4339 | +ENTRY(__bp_harden_hyp_vecs_start) | ||
4340 | + .rept 4 | ||
4341 | + vectors __kvm_hyp_vector | ||
4342 | + .endr | ||
4343 | +ENTRY(__bp_harden_hyp_vecs_end) | ||
4344 | + | ||
4345 | +.macro smccc_workaround_1 inst | ||
4346 | + sub sp, sp, #(8 * 4) | ||
4347 | + stp x2, x3, [sp, #(8 * 0)] | ||
4348 | + stp x0, x1, [sp, #(8 * 2)] | ||
4349 | + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 | ||
4350 | + \inst #0 | ||
4351 | + ldp x2, x3, [sp, #(8 * 0)] | ||
4352 | + ldp x0, x1, [sp, #(8 * 2)] | ||
4353 | + add sp, sp, #(8 * 4) | ||
4354 | +.endm | ||
4355 | + | ||
4356 | +ENTRY(__smccc_workaround_1_smc_start) | ||
4357 | + smccc_workaround_1 smc | ||
4358 | +ENTRY(__smccc_workaround_1_smc_end) | ||
4359 | + | ||
4360 | +ENTRY(__smccc_workaround_1_hvc_start) | ||
4361 | + smccc_workaround_1 hvc | ||
4362 | +ENTRY(__smccc_workaround_1_hvc_end) | ||
4363 | diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c | ||
4364 | index b75e917aac46..74107134cc30 100644 | ||
4365 | --- a/arch/arm64/kernel/cpu_errata.c | ||
4366 | +++ b/arch/arm64/kernel/cpu_errata.c | ||
4367 | @@ -46,6 +46,147 @@ static int cpu_enable_trap_ctr_access(void *__unused) | ||
4368 | return 0; | ||
4369 | } | ||
4370 | |||
4371 | +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR | ||
4372 | +#include <asm/mmu_context.h> | ||
4373 | +#include <asm/cacheflush.h> | ||
4374 | + | ||
4375 | +DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); | ||
4376 | + | ||
4377 | +#ifdef CONFIG_KVM | ||
4378 | +extern char __smccc_workaround_1_smc_start[]; | ||
4379 | +extern char __smccc_workaround_1_smc_end[]; | ||
4380 | +extern char __smccc_workaround_1_hvc_start[]; | ||
4381 | +extern char __smccc_workaround_1_hvc_end[]; | ||
4382 | + | ||
4383 | +static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, | ||
4384 | + const char *hyp_vecs_end) | ||
4385 | +{ | ||
4386 | + void *dst = __bp_harden_hyp_vecs_start + slot * SZ_2K; | ||
4387 | + int i; | ||
4388 | + | ||
4389 | + for (i = 0; i < SZ_2K; i += 0x80) | ||
4390 | + memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start); | ||
4391 | + | ||
4392 | + flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); | ||
4393 | +} | ||
4394 | + | ||
4395 | +static void __install_bp_hardening_cb(bp_hardening_cb_t fn, | ||
4396 | + const char *hyp_vecs_start, | ||
4397 | + const char *hyp_vecs_end) | ||
4398 | +{ | ||
4399 | + static int last_slot = -1; | ||
4400 | + static DEFINE_SPINLOCK(bp_lock); | ||
4401 | + int cpu, slot = -1; | ||
4402 | + | ||
4403 | + spin_lock(&bp_lock); | ||
4404 | + for_each_possible_cpu(cpu) { | ||
4405 | + if (per_cpu(bp_hardening_data.fn, cpu) == fn) { | ||
4406 | + slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu); | ||
4407 | + break; | ||
4408 | + } | ||
4409 | + } | ||
4410 | + | ||
4411 | + if (slot == -1) { | ||
4412 | + last_slot++; | ||
4413 | + BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start) | ||
4414 | + / SZ_2K) <= last_slot); | ||
4415 | + slot = last_slot; | ||
4416 | + __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); | ||
4417 | + } | ||
4418 | + | ||
4419 | + __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); | ||
4420 | + __this_cpu_write(bp_hardening_data.fn, fn); | ||
4421 | + spin_unlock(&bp_lock); | ||
4422 | +} | ||
4423 | +#else | ||
4424 | +#define __smccc_workaround_1_smc_start NULL | ||
4425 | +#define __smccc_workaround_1_smc_end NULL | ||
4426 | +#define __smccc_workaround_1_hvc_start NULL | ||
4427 | +#define __smccc_workaround_1_hvc_end NULL | ||
4428 | + | ||
4429 | +static void __install_bp_hardening_cb(bp_hardening_cb_t fn, | ||
4430 | + const char *hyp_vecs_start, | ||
4431 | + const char *hyp_vecs_end) | ||
4432 | +{ | ||
4433 | + __this_cpu_write(bp_hardening_data.fn, fn); | ||
4434 | +} | ||
4435 | +#endif /* CONFIG_KVM */ | ||
4436 | + | ||
4437 | +static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry, | ||
4438 | + bp_hardening_cb_t fn, | ||
4439 | + const char *hyp_vecs_start, | ||
4440 | + const char *hyp_vecs_end) | ||
4441 | +{ | ||
4442 | + u64 pfr0; | ||
4443 | + | ||
4444 | + if (!entry->matches(entry, SCOPE_LOCAL_CPU)) | ||
4445 | + return; | ||
4446 | + | ||
4447 | + pfr0 = read_cpuid(ID_AA64PFR0_EL1); | ||
4448 | + if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT)) | ||
4449 | + return; | ||
4450 | + | ||
4451 | + __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end); | ||
4452 | +} | ||
4453 | + | ||
4454 | +#include <uapi/linux/psci.h> | ||
4455 | +#include <linux/arm-smccc.h> | ||
4456 | +#include <linux/psci.h> | ||
4457 | + | ||
4458 | +static void call_smc_arch_workaround_1(void) | ||
4459 | +{ | ||
4460 | + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); | ||
4461 | +} | ||
4462 | + | ||
4463 | +static void call_hvc_arch_workaround_1(void) | ||
4464 | +{ | ||
4465 | + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); | ||
4466 | +} | ||
4467 | + | ||
4468 | +static int enable_smccc_arch_workaround_1(void *data) | ||
4469 | +{ | ||
4470 | + const struct arm64_cpu_capabilities *entry = data; | ||
4471 | + bp_hardening_cb_t cb; | ||
4472 | + void *smccc_start, *smccc_end; | ||
4473 | + struct arm_smccc_res res; | ||
4474 | + | ||
4475 | + if (!entry->matches(entry, SCOPE_LOCAL_CPU)) | ||
4476 | + return 0; | ||
4477 | + | ||
4478 | + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) | ||
4479 | + return 0; | ||
4480 | + | ||
4481 | + switch (psci_ops.conduit) { | ||
4482 | + case PSCI_CONDUIT_HVC: | ||
4483 | + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, | ||
4484 | + ARM_SMCCC_ARCH_WORKAROUND_1, &res); | ||
4485 | + if (res.a0) | ||
4486 | + return 0; | ||
4487 | + cb = call_hvc_arch_workaround_1; | ||
4488 | + smccc_start = __smccc_workaround_1_hvc_start; | ||
4489 | + smccc_end = __smccc_workaround_1_hvc_end; | ||
4490 | + break; | ||
4491 | + | ||
4492 | + case PSCI_CONDUIT_SMC: | ||
4493 | + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, | ||
4494 | + ARM_SMCCC_ARCH_WORKAROUND_1, &res); | ||
4495 | + if (res.a0) | ||
4496 | + return 0; | ||
4497 | + cb = call_smc_arch_workaround_1; | ||
4498 | + smccc_start = __smccc_workaround_1_smc_start; | ||
4499 | + smccc_end = __smccc_workaround_1_smc_end; | ||
4500 | + break; | ||
4501 | + | ||
4502 | + default: | ||
4503 | + return 0; | ||
4504 | + } | ||
4505 | + | ||
4506 | + install_bp_hardening_cb(entry, cb, smccc_start, smccc_end); | ||
4507 | + | ||
4508 | + return 0; | ||
4509 | +} | ||
4510 | +#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ | ||
4511 | + | ||
4512 | #define MIDR_RANGE(model, min, max) \ | ||
4513 | .def_scope = SCOPE_LOCAL_CPU, \ | ||
4514 | .matches = is_affected_midr_range, \ | ||
4515 | @@ -53,6 +194,13 @@ static int cpu_enable_trap_ctr_access(void *__unused) | ||
4516 | .midr_range_min = min, \ | ||
4517 | .midr_range_max = max | ||
4518 | |||
4519 | +#define MIDR_ALL_VERSIONS(model) \ | ||
4520 | + .def_scope = SCOPE_LOCAL_CPU, \ | ||
4521 | + .matches = is_affected_midr_range, \ | ||
4522 | + .midr_model = model, \ | ||
4523 | + .midr_range_min = 0, \ | ||
4524 | + .midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK) | ||
4525 | + | ||
4526 | const struct arm64_cpu_capabilities arm64_errata[] = { | ||
4527 | #if defined(CONFIG_ARM64_ERRATUM_826319) || \ | ||
4528 | defined(CONFIG_ARM64_ERRATUM_827319) || \ | ||
4529 | @@ -130,6 +278,38 @@ const struct arm64_cpu_capabilities arm64_errata[] = { | ||
4530 | .def_scope = SCOPE_LOCAL_CPU, | ||
4531 | .enable = cpu_enable_trap_ctr_access, | ||
4532 | }, | ||
4533 | +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR | ||
4534 | + { | ||
4535 | + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, | ||
4536 | + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), | ||
4537 | + .enable = enable_smccc_arch_workaround_1, | ||
4538 | + }, | ||
4539 | + { | ||
4540 | + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, | ||
4541 | + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), | ||
4542 | + .enable = enable_smccc_arch_workaround_1, | ||
4543 | + }, | ||
4544 | + { | ||
4545 | + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, | ||
4546 | + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), | ||
4547 | + .enable = enable_smccc_arch_workaround_1, | ||
4548 | + }, | ||
4549 | + { | ||
4550 | + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, | ||
4551 | + MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), | ||
4552 | + .enable = enable_smccc_arch_workaround_1, | ||
4553 | + }, | ||
4554 | + { | ||
4555 | + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, | ||
4556 | + MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), | ||
4557 | + .enable = enable_smccc_arch_workaround_1, | ||
4558 | + }, | ||
4559 | + { | ||
4560 | + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, | ||
4561 | + MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), | ||
4562 | + .enable = enable_smccc_arch_workaround_1, | ||
4563 | + }, | ||
4564 | +#endif | ||
4565 | { | ||
4566 | } | ||
4567 | }; | ||
4568 | @@ -143,15 +323,18 @@ void verify_local_cpu_errata_workarounds(void) | ||
4569 | { | ||
4570 | const struct arm64_cpu_capabilities *caps = arm64_errata; | ||
4571 | |||
4572 | - for (; caps->matches; caps++) | ||
4573 | - if (!cpus_have_cap(caps->capability) && | ||
4574 | - caps->matches(caps, SCOPE_LOCAL_CPU)) { | ||
4575 | + for (; caps->matches; caps++) { | ||
4576 | + if (cpus_have_cap(caps->capability)) { | ||
4577 | + if (caps->enable) | ||
4578 | + caps->enable((void *)caps); | ||
4579 | + } else if (caps->matches(caps, SCOPE_LOCAL_CPU)) { | ||
4580 | pr_crit("CPU%d: Requires work around for %s, not detected" | ||
4581 | " at boot time\n", | ||
4582 | smp_processor_id(), | ||
4583 | caps->desc ? : "an erratum"); | ||
4584 | cpu_die_early(); | ||
4585 | } | ||
4586 | + } | ||
4587 | } | ||
4588 | |||
4589 | void update_cpu_errata_workarounds(void) | ||
4590 | diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c | ||
4591 | index 5056fc597ae9..a0ee01202503 100644 | ||
4592 | --- a/arch/arm64/kernel/cpufeature.c | ||
4593 | +++ b/arch/arm64/kernel/cpufeature.c | ||
4594 | @@ -94,7 +94,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { | ||
4595 | |||
4596 | static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { | ||
4597 | ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0), | ||
4598 | - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 28, 0), | ||
4599 | + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), | ||
4600 | + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 24, 0), | ||
4601 | ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), | ||
4602 | ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), | ||
4603 | S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), | ||
4604 | @@ -1024,9 +1025,8 @@ static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array, | ||
4605 | if (WARN_ON(preemptible())) | ||
4606 | return false; | ||
4607 | |||
4608 | - for (caps = cap_array; caps->desc; caps++) | ||
4609 | + for (caps = cap_array; caps->matches; caps++) | ||
4610 | if (caps->capability == cap && | ||
4611 | - caps->matches && | ||
4612 | caps->matches(caps, SCOPE_LOCAL_CPU)) | ||
4613 | return true; | ||
4614 | return false; | ||
4615 | @@ -1059,7 +1059,7 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) | ||
4616 | * uses an IPI, giving us a PSTATE that disappears when | ||
4617 | * we return. | ||
4618 | */ | ||
4619 | - stop_machine(caps->enable, NULL, cpu_online_mask); | ||
4620 | + stop_machine(caps->enable, (void *)caps, cpu_online_mask); | ||
4621 | } | ||
4622 | |||
4623 | /* | ||
4624 | @@ -1116,7 +1116,7 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps_list) | ||
4625 | cpu_die_early(); | ||
4626 | } | ||
4627 | if (caps->enable) | ||
4628 | - caps->enable(NULL); | ||
4629 | + caps->enable((void *)caps); | ||
4630 | } | ||
4631 | } | ||
4632 | |||
4633 | diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S | ||
4634 | index 8d1600b18562..b79e302d2a3e 100644 | ||
4635 | --- a/arch/arm64/kernel/entry.S | ||
4636 | +++ b/arch/arm64/kernel/entry.S | ||
4637 | @@ -30,6 +30,7 @@ | ||
4638 | #include <asm/irq.h> | ||
4639 | #include <asm/memory.h> | ||
4640 | #include <asm/mmu.h> | ||
4641 | +#include <asm/processor.h> | ||
4642 | #include <asm/thread_info.h> | ||
4643 | #include <asm/asm-uaccess.h> | ||
4644 | #include <asm/unistd.h> | ||
4645 | @@ -125,10 +126,10 @@ alternative_else_nop_endif | ||
4646 | .else | ||
4647 | add x21, sp, #S_FRAME_SIZE | ||
4648 | get_thread_info tsk | ||
4649 | - /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */ | ||
4650 | + /* Save the task's original addr_limit and set USER_DS */ | ||
4651 | ldr x20, [tsk, #TI_ADDR_LIMIT] | ||
4652 | str x20, [sp, #S_ORIG_ADDR_LIMIT] | ||
4653 | - mov x20, #TASK_SIZE_64 | ||
4654 | + mov x20, #USER_DS | ||
4655 | str x20, [tsk, #TI_ADDR_LIMIT] | ||
4656 | /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */ | ||
4657 | .endif /* \el == 0 */ | ||
4658 | @@ -588,13 +589,15 @@ el0_ia: | ||
4659 | * Instruction abort handling | ||
4660 | */ | ||
4661 | mrs x26, far_el1 | ||
4662 | - // enable interrupts before calling the main handler | ||
4663 | - enable_dbg_and_irq | ||
4664 | + msr daifclr, #(8 | 4 | 1) | ||
4665 | +#ifdef CONFIG_TRACE_IRQFLAGS | ||
4666 | + bl trace_hardirqs_off | ||
4667 | +#endif | ||
4668 | ct_user_exit | ||
4669 | mov x0, x26 | ||
4670 | mov x1, x25 | ||
4671 | mov x2, sp | ||
4672 | - bl do_mem_abort | ||
4673 | + bl do_el0_ia_bp_hardening | ||
4674 | b ret_to_user | ||
4675 | el0_fpsimd_acc: | ||
4676 | /* | ||
4677 | @@ -621,8 +624,10 @@ el0_sp_pc: | ||
4678 | * Stack or PC alignment exception handling | ||
4679 | */ | ||
4680 | mrs x26, far_el1 | ||
4681 | - // enable interrupts before calling the main handler | ||
4682 | - enable_dbg_and_irq | ||
4683 | + enable_dbg | ||
4684 | +#ifdef CONFIG_TRACE_IRQFLAGS | ||
4685 | + bl trace_hardirqs_off | ||
4686 | +#endif | ||
4687 | ct_user_exit | ||
4688 | mov x0, x26 | ||
4689 | mov x1, x25 | ||
4690 | @@ -681,6 +686,11 @@ el0_irq_naked: | ||
4691 | #endif | ||
4692 | |||
4693 | ct_user_exit | ||
4694 | +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR | ||
4695 | + tbz x22, #55, 1f | ||
4696 | + bl do_el0_irq_bp_hardening | ||
4697 | +1: | ||
4698 | +#endif | ||
4699 | irq_handler | ||
4700 | |||
4701 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
4702 | @@ -794,6 +804,7 @@ el0_svc_naked: // compat entry point | ||
4703 | b.ne __sys_trace | ||
4704 | cmp scno, sc_nr // check upper syscall limit | ||
4705 | b.hs ni_sys | ||
4706 | + mask_nospec64 scno, sc_nr, x19 // enforce bounds for syscall number | ||
4707 | ldr x16, [stbl, scno, lsl #3] // address in the syscall table | ||
4708 | blr x16 // call sys_* routine | ||
4709 | b ret_fast_syscall | ||
4710 | diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c | ||
4711 | index 2e6e9e99977b..efe43c5f2dc1 100644 | ||
4712 | --- a/arch/arm64/kvm/handle_exit.c | ||
4713 | +++ b/arch/arm64/kvm/handle_exit.c | ||
4714 | @@ -22,12 +22,15 @@ | ||
4715 | #include <linux/kvm.h> | ||
4716 | #include <linux/kvm_host.h> | ||
4717 | |||
4718 | +#include <kvm/arm_psci.h> | ||
4719 | + | ||
4720 | #include <asm/esr.h> | ||
4721 | #include <asm/kvm_asm.h> | ||
4722 | #include <asm/kvm_coproc.h> | ||
4723 | #include <asm/kvm_emulate.h> | ||
4724 | #include <asm/kvm_mmu.h> | ||
4725 | -#include <asm/kvm_psci.h> | ||
4726 | +#include <asm/debug-monitors.h> | ||
4727 | +#include <asm/traps.h> | ||
4728 | |||
4729 | #define CREATE_TRACE_POINTS | ||
4730 | #include "trace.h" | ||
4731 | @@ -42,7 +45,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) | ||
4732 | kvm_vcpu_hvc_get_imm(vcpu)); | ||
4733 | vcpu->stat.hvc_exit_stat++; | ||
4734 | |||
4735 | - ret = kvm_psci_call(vcpu); | ||
4736 | + ret = kvm_hvc_call_handler(vcpu); | ||
4737 | if (ret < 0) { | ||
4738 | vcpu_set_reg(vcpu, 0, ~0UL); | ||
4739 | return 1; | ||
4740 | @@ -53,7 +56,16 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) | ||
4741 | |||
4742 | static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) | ||
4743 | { | ||
4744 | + /* | ||
4745 | + * "If an SMC instruction executed at Non-secure EL1 is | ||
4746 | + * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a | ||
4747 | + * Trap exception, not a Secure Monitor Call exception [...]" | ||
4748 | + * | ||
4749 | + * We need to advance the PC after the trap, as it would | ||
4750 | + * otherwise return to the same address... | ||
4751 | + */ | ||
4752 | vcpu_set_reg(vcpu, 0, ~0UL); | ||
4753 | + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); | ||
4754 | return 1; | ||
4755 | } | ||
4756 | |||
4757 | diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S | ||
4758 | index 4e92399f7105..4e9d50c3e658 100644 | ||
4759 | --- a/arch/arm64/kvm/hyp/hyp-entry.S | ||
4760 | +++ b/arch/arm64/kvm/hyp/hyp-entry.S | ||
4761 | @@ -15,6 +15,7 @@ | ||
4762 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
4763 | */ | ||
4764 | |||
4765 | +#include <linux/arm-smccc.h> | ||
4766 | #include <linux/linkage.h> | ||
4767 | |||
4768 | #include <asm/alternative.h> | ||
4769 | @@ -79,10 +80,11 @@ alternative_endif | ||
4770 | lsr x0, x1, #ESR_ELx_EC_SHIFT | ||
4771 | |||
4772 | cmp x0, #ESR_ELx_EC_HVC64 | ||
4773 | + ccmp x0, #ESR_ELx_EC_HVC32, #4, ne | ||
4774 | b.ne el1_trap | ||
4775 | |||
4776 | - mrs x1, vttbr_el2 // If vttbr is valid, the 64bit guest | ||
4777 | - cbnz x1, el1_trap // called HVC | ||
4778 | + mrs x1, vttbr_el2 // If vttbr is valid, the guest | ||
4779 | + cbnz x1, el1_hvc_guest // called HVC | ||
4780 | |||
4781 | /* Here, we're pretty sure the host called HVC. */ | ||
4782 | ldp x0, x1, [sp], #16 | ||
4783 | @@ -101,6 +103,20 @@ alternative_endif | ||
4784 | |||
4785 | 2: eret | ||
4786 | |||
4787 | +el1_hvc_guest: | ||
4788 | + /* | ||
4789 | + * Fastest possible path for ARM_SMCCC_ARCH_WORKAROUND_1. | ||
4790 | + * The workaround has already been applied on the host, | ||
4791 | + * so let's quickly get back to the guest. We don't bother | ||
4792 | + * restoring x1, as it can be clobbered anyway. | ||
4793 | + */ | ||
4794 | + ldr x1, [sp] // Guest's x0 | ||
4795 | + eor w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1 | ||
4796 | + cbnz w1, el1_trap | ||
4797 | + mov x0, x1 | ||
4798 | + add sp, sp, #16 | ||
4799 | + eret | ||
4800 | + | ||
4801 | el1_trap: | ||
4802 | /* | ||
4803 | * x0: ESR_EC | ||
4804 | diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c | ||
4805 | index 9174ba917d65..c49d09387192 100644 | ||
4806 | --- a/arch/arm64/kvm/hyp/switch.c | ||
4807 | +++ b/arch/arm64/kvm/hyp/switch.c | ||
4808 | @@ -17,6 +17,9 @@ | ||
4809 | |||
4810 | #include <linux/types.h> | ||
4811 | #include <linux/jump_label.h> | ||
4812 | +#include <uapi/linux/psci.h> | ||
4813 | + | ||
4814 | +#include <kvm/arm_psci.h> | ||
4815 | |||
4816 | #include <asm/kvm_asm.h> | ||
4817 | #include <asm/kvm_emulate.h> | ||
4818 | @@ -50,7 +53,7 @@ static void __hyp_text __activate_traps_vhe(void) | ||
4819 | val &= ~CPACR_EL1_FPEN; | ||
4820 | write_sysreg(val, cpacr_el1); | ||
4821 | |||
4822 | - write_sysreg(__kvm_hyp_vector, vbar_el1); | ||
4823 | + write_sysreg(kvm_get_hyp_vector(), vbar_el1); | ||
4824 | } | ||
4825 | |||
4826 | static void __hyp_text __activate_traps_nvhe(void) | ||
4827 | diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S | ||
4828 | index 5d1cad3ce6d6..efbf610eaf4e 100644 | ||
4829 | --- a/arch/arm64/lib/clear_user.S | ||
4830 | +++ b/arch/arm64/lib/clear_user.S | ||
4831 | @@ -24,7 +24,7 @@ | ||
4832 | |||
4833 | .text | ||
4834 | |||
4835 | -/* Prototype: int __clear_user(void *addr, size_t sz) | ||
4836 | +/* Prototype: int __arch_clear_user(void *addr, size_t sz) | ||
4837 | * Purpose : clear some user memory | ||
4838 | * Params : addr - user memory address to clear | ||
4839 | * : sz - number of bytes to clear | ||
4840 | @@ -32,7 +32,7 @@ | ||
4841 | * | ||
4842 | * Alignment fixed up by hardware. | ||
4843 | */ | ||
4844 | -ENTRY(__clear_user) | ||
4845 | +ENTRY(__arch_clear_user) | ||
4846 | ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ | ||
4847 | CONFIG_ARM64_PAN) | ||
4848 | mov x2, x1 // save the size for fixup return | ||
4849 | @@ -57,7 +57,7 @@ uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 | ||
4850 | ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ | ||
4851 | CONFIG_ARM64_PAN) | ||
4852 | ret | ||
4853 | -ENDPROC(__clear_user) | ||
4854 | +ENDPROC(__arch_clear_user) | ||
4855 | |||
4856 | .section .fixup,"ax" | ||
4857 | .align 2 | ||
4858 | diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S | ||
4859 | index f7292dd08c84..841bf8f7fab7 100644 | ||
4860 | --- a/arch/arm64/lib/copy_in_user.S | ||
4861 | +++ b/arch/arm64/lib/copy_in_user.S | ||
4862 | @@ -67,7 +67,7 @@ | ||
4863 | .endm | ||
4864 | |||
4865 | end .req x5 | ||
4866 | -ENTRY(__copy_in_user) | ||
4867 | +ENTRY(__arch_copy_in_user) | ||
4868 | ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ | ||
4869 | CONFIG_ARM64_PAN) | ||
4870 | add end, x0, x2 | ||
4871 | @@ -76,7 +76,7 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ | ||
4872 | CONFIG_ARM64_PAN) | ||
4873 | mov x0, #0 | ||
4874 | ret | ||
4875 | -ENDPROC(__copy_in_user) | ||
4876 | +ENDPROC(__arch_copy_in_user) | ||
4877 | |||
4878 | .section .fixup,"ax" | ||
4879 | .align 2 | ||
4880 | diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c | ||
4881 | index f00f5eeb556f..62d976e843fc 100644 | ||
4882 | --- a/arch/arm64/mm/context.c | ||
4883 | +++ b/arch/arm64/mm/context.c | ||
4884 | @@ -230,9 +230,21 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) | ||
4885 | raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); | ||
4886 | |||
4887 | switch_mm_fastpath: | ||
4888 | + | ||
4889 | + arm64_apply_bp_hardening(); | ||
4890 | + | ||
4891 | cpu_switch_mm(mm->pgd, mm); | ||
4892 | } | ||
4893 | |||
4894 | +/* Errata workaround post TTBRx_EL1 update. */ | ||
4895 | +asmlinkage void post_ttbr_update_workaround(void) | ||
4896 | +{ | ||
4897 | + asm(ALTERNATIVE("nop; nop; nop", | ||
4898 | + "ic iallu; dsb nsh; isb", | ||
4899 | + ARM64_WORKAROUND_CAVIUM_27456, | ||
4900 | + CONFIG_CAVIUM_ERRATUM_27456)); | ||
4901 | +} | ||
4902 | + | ||
4903 | static int asids_init(void) | ||
4904 | { | ||
4905 | asid_bits = get_cpu_asid_bits(); | ||
4906 | diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c | ||
4907 | index 403fe9e57135..ad49ae8f3967 100644 | ||
4908 | --- a/arch/arm64/mm/fault.c | ||
4909 | +++ b/arch/arm64/mm/fault.c | ||
4910 | @@ -332,7 +332,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, | ||
4911 | mm_flags |= FAULT_FLAG_WRITE; | ||
4912 | } | ||
4913 | |||
4914 | - if (is_permission_fault(esr) && (addr < USER_DS)) { | ||
4915 | + if (is_permission_fault(esr) && (addr < TASK_SIZE)) { | ||
4916 | /* regs->orig_addr_limit may be 0 if we entered from EL0 */ | ||
4917 | if (regs->orig_addr_limit == KERNEL_DS) | ||
4918 | die("Accessing user space memory with fs=KERNEL_DS", regs, esr); | ||
4919 | @@ -590,6 +590,29 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, | ||
4920 | arm64_notify_die("", regs, &info, esr); | ||
4921 | } | ||
4922 | |||
4923 | +asmlinkage void __exception do_el0_irq_bp_hardening(void) | ||
4924 | +{ | ||
4925 | + /* PC has already been checked in entry.S */ | ||
4926 | + arm64_apply_bp_hardening(); | ||
4927 | +} | ||
4928 | + | ||
4929 | +asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, | ||
4930 | + unsigned int esr, | ||
4931 | + struct pt_regs *regs) | ||
4932 | +{ | ||
4933 | + /* | ||
4934 | + * We've taken an instruction abort from userspace and not yet | ||
4935 | + * re-enabled IRQs. If the address is a kernel address, apply | ||
4936 | + * BP hardening prior to enabling IRQs and pre-emption. | ||
4937 | + */ | ||
4938 | + if (addr > TASK_SIZE) | ||
4939 | + arm64_apply_bp_hardening(); | ||
4940 | + | ||
4941 | + local_irq_enable(); | ||
4942 | + do_mem_abort(addr, esr, regs); | ||
4943 | +} | ||
4944 | + | ||
4945 | + | ||
4946 | /* | ||
4947 | * Handle stack alignment exceptions. | ||
4948 | */ | ||
4949 | @@ -600,6 +623,12 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr, | ||
4950 | struct siginfo info; | ||
4951 | struct task_struct *tsk = current; | ||
4952 | |||
4953 | + if (user_mode(regs)) { | ||
4954 | + if (instruction_pointer(regs) > TASK_SIZE) | ||
4955 | + arm64_apply_bp_hardening(); | ||
4956 | + local_irq_enable(); | ||
4957 | + } | ||
4958 | + | ||
4959 | if (show_unhandled_signals && unhandled_signal(tsk, SIGBUS)) | ||
4960 | pr_info_ratelimited("%s[%d]: %s exception: pc=%p sp=%p\n", | ||
4961 | tsk->comm, task_pid_nr(tsk), | ||
4962 | @@ -659,6 +688,9 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, | ||
4963 | if (interrupts_enabled(regs)) | ||
4964 | trace_hardirqs_off(); | ||
4965 | |||
4966 | + if (user_mode(regs) && instruction_pointer(regs) > TASK_SIZE) | ||
4967 | + arm64_apply_bp_hardening(); | ||
4968 | + | ||
4969 | if (!inf->fn(addr, esr, regs)) { | ||
4970 | rv = 1; | ||
4971 | } else { | ||
4972 | diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S | ||
4973 | index c07d9cc057e6..619da1cbd32b 100644 | ||
4974 | --- a/arch/arm64/mm/proc.S | ||
4975 | +++ b/arch/arm64/mm/proc.S | ||
4976 | @@ -139,12 +139,7 @@ ENTRY(cpu_do_switch_mm) | ||
4977 | isb | ||
4978 | msr ttbr0_el1, x0 // now update TTBR0 | ||
4979 | isb | ||
4980 | -alternative_if ARM64_WORKAROUND_CAVIUM_27456 | ||
4981 | - ic iallu | ||
4982 | - dsb nsh | ||
4983 | - isb | ||
4984 | -alternative_else_nop_endif | ||
4985 | - ret | ||
4986 | + b post_ttbr_update_workaround // Back to C code... | ||
4987 | ENDPROC(cpu_do_switch_mm) | ||
4988 | |||
4989 | .pushsection ".idmap.text", "awx" | ||
4990 | diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c | ||
4991 | index 700e2d2da096..2e68ca1fe0db 100644 | ||
4992 | --- a/arch/parisc/kernel/drivers.c | ||
4993 | +++ b/arch/parisc/kernel/drivers.c | ||
4994 | @@ -648,6 +648,10 @@ static int match_pci_device(struct device *dev, int index, | ||
4995 | (modpath->mod == PCI_FUNC(devfn))); | ||
4996 | } | ||
4997 | |||
4998 | + /* index might be out of bounds for bc[] */ | ||
4999 | + if (index >= 6) | ||
5000 | + return 0; | ||
5001 | + | ||
5002 | id = PCI_SLOT(pdev->devfn) | (PCI_FUNC(pdev->devfn) << 5); | ||
5003 | return (modpath->bc[index] == id); | ||
5004 | } | ||
5005 | diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c | ||
5006 | index 295bfb7124bc..39127b691b78 100644 | ||
5007 | --- a/arch/s390/kernel/ipl.c | ||
5008 | +++ b/arch/s390/kernel/ipl.c | ||
5009 | @@ -798,6 +798,7 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb, | ||
5010 | /* copy and convert to ebcdic */ | ||
5011 | memcpy(ipb->hdr.loadparm, buf, lp_len); | ||
5012 | ASCEBC(ipb->hdr.loadparm, LOADPARM_LEN); | ||
5013 | + ipb->hdr.flags |= DIAG308_FLAGS_LP_VALID; | ||
5014 | return len; | ||
5015 | } | ||
5016 | |||
5017 | diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c | ||
5018 | index b1815b20a99c..37032545c58e 100644 | ||
5019 | --- a/drivers/acpi/nfit/core.c | ||
5020 | +++ b/drivers/acpi/nfit/core.c | ||
5021 | @@ -2547,15 +2547,21 @@ static void acpi_nfit_scrub(struct work_struct *work) | ||
5022 | static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) | ||
5023 | { | ||
5024 | struct nfit_spa *nfit_spa; | ||
5025 | - int rc; | ||
5026 | |||
5027 | - list_for_each_entry(nfit_spa, &acpi_desc->spas, list) | ||
5028 | - if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) { | ||
5029 | - /* BLK regions don't need to wait for ars results */ | ||
5030 | - rc = acpi_nfit_register_region(acpi_desc, nfit_spa); | ||
5031 | - if (rc) | ||
5032 | - return rc; | ||
5033 | - } | ||
5034 | + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { | ||
5035 | + int rc, type = nfit_spa_type(nfit_spa->spa); | ||
5036 | + | ||
5037 | + /* PMEM and VMEM will be registered by the ARS workqueue */ | ||
5038 | + if (type == NFIT_SPA_PM || type == NFIT_SPA_VOLATILE) | ||
5039 | + continue; | ||
5040 | + /* BLK apertures belong to BLK region registration below */ | ||
5041 | + if (type == NFIT_SPA_BDW) | ||
5042 | + continue; | ||
5043 | + /* BLK regions don't need to wait for ARS results */ | ||
5044 | + rc = acpi_nfit_register_region(acpi_desc, nfit_spa); | ||
5045 | + if (rc) | ||
5046 | + return rc; | ||
5047 | + } | ||
5048 | |||
5049 | queue_work(nfit_wq, &acpi_desc->work); | ||
5050 | return 0; | ||
5051 | diff --git a/drivers/block/loop.c b/drivers/block/loop.c | ||
5052 | index dc318b9100c2..ff1c4d7aa025 100644 | ||
5053 | --- a/drivers/block/loop.c | ||
5054 | +++ b/drivers/block/loop.c | ||
5055 | @@ -1110,11 +1110,15 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) | ||
5056 | if (info->lo_encrypt_type) { | ||
5057 | unsigned int type = info->lo_encrypt_type; | ||
5058 | |||
5059 | - if (type >= MAX_LO_CRYPT) | ||
5060 | - return -EINVAL; | ||
5061 | + if (type >= MAX_LO_CRYPT) { | ||
5062 | + err = -EINVAL; | ||
5063 | + goto exit; | ||
5064 | + } | ||
5065 | xfer = xfer_funcs[type]; | ||
5066 | - if (xfer == NULL) | ||
5067 | - return -EINVAL; | ||
5068 | + if (xfer == NULL) { | ||
5069 | + err = -EINVAL; | ||
5070 | + goto exit; | ||
5071 | + } | ||
5072 | } else | ||
5073 | xfer = NULL; | ||
5074 | |||
5075 | diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c | ||
5076 | index 8263429e21b8..79a48c37fb35 100644 | ||
5077 | --- a/drivers/firmware/psci.c | ||
5078 | +++ b/drivers/firmware/psci.c | ||
5079 | @@ -59,7 +59,10 @@ bool psci_tos_resident_on(int cpu) | ||
5080 | return cpu == resident_cpu; | ||
5081 | } | ||
5082 | |||
5083 | -struct psci_operations psci_ops; | ||
5084 | +struct psci_operations psci_ops = { | ||
5085 | + .conduit = PSCI_CONDUIT_NONE, | ||
5086 | + .smccc_version = SMCCC_VERSION_1_0, | ||
5087 | +}; | ||
5088 | |||
5089 | typedef unsigned long (psci_fn)(unsigned long, unsigned long, | ||
5090 | unsigned long, unsigned long); | ||
5091 | @@ -210,6 +213,22 @@ static unsigned long psci_migrate_info_up_cpu(void) | ||
5092 | 0, 0, 0); | ||
5093 | } | ||
5094 | |||
5095 | +static void set_conduit(enum psci_conduit conduit) | ||
5096 | +{ | ||
5097 | + switch (conduit) { | ||
5098 | + case PSCI_CONDUIT_HVC: | ||
5099 | + invoke_psci_fn = __invoke_psci_fn_hvc; | ||
5100 | + break; | ||
5101 | + case PSCI_CONDUIT_SMC: | ||
5102 | + invoke_psci_fn = __invoke_psci_fn_smc; | ||
5103 | + break; | ||
5104 | + default: | ||
5105 | + WARN(1, "Unexpected PSCI conduit %d\n", conduit); | ||
5106 | + } | ||
5107 | + | ||
5108 | + psci_ops.conduit = conduit; | ||
5109 | +} | ||
5110 | + | ||
5111 | static int get_set_conduit_method(struct device_node *np) | ||
5112 | { | ||
5113 | const char *method; | ||
5114 | @@ -222,9 +241,9 @@ static int get_set_conduit_method(struct device_node *np) | ||
5115 | } | ||
5116 | |||
5117 | if (!strcmp("hvc", method)) { | ||
5118 | - invoke_psci_fn = __invoke_psci_fn_hvc; | ||
5119 | + set_conduit(PSCI_CONDUIT_HVC); | ||
5120 | } else if (!strcmp("smc", method)) { | ||
5121 | - invoke_psci_fn = __invoke_psci_fn_smc; | ||
5122 | + set_conduit(PSCI_CONDUIT_SMC); | ||
5123 | } else { | ||
5124 | pr_warn("invalid \"method\" property: %s\n", method); | ||
5125 | return -EINVAL; | ||
5126 | @@ -493,9 +512,36 @@ static void __init psci_init_migrate(void) | ||
5127 | pr_info("Trusted OS resident on physical CPU 0x%lx\n", cpuid); | ||
5128 | } | ||
5129 | |||
5130 | +static void __init psci_init_smccc(void) | ||
5131 | +{ | ||
5132 | + u32 ver = ARM_SMCCC_VERSION_1_0; | ||
5133 | + int feature; | ||
5134 | + | ||
5135 | + feature = psci_features(ARM_SMCCC_VERSION_FUNC_ID); | ||
5136 | + | ||
5137 | + if (feature != PSCI_RET_NOT_SUPPORTED) { | ||
5138 | + u32 ret; | ||
5139 | + ret = invoke_psci_fn(ARM_SMCCC_VERSION_FUNC_ID, 0, 0, 0); | ||
5140 | + if (ret == ARM_SMCCC_VERSION_1_1) { | ||
5141 | + psci_ops.smccc_version = SMCCC_VERSION_1_1; | ||
5142 | + ver = ret; | ||
5143 | + } | ||
5144 | + } | ||
5145 | + | ||
5146 | + /* | ||
5147 | + * Conveniently, the SMCCC and PSCI versions are encoded the | ||
5148 | + * same way. No, this isn't accidental. | ||
5149 | + */ | ||
5150 | + pr_info("SMC Calling Convention v%d.%d\n", | ||
5151 | + PSCI_VERSION_MAJOR(ver), PSCI_VERSION_MINOR(ver)); | ||
5152 | + | ||
5153 | +} | ||
5154 | + | ||
5155 | static void __init psci_0_2_set_functions(void) | ||
5156 | { | ||
5157 | pr_info("Using standard PSCI v0.2 function IDs\n"); | ||
5158 | + psci_ops.get_version = psci_get_version; | ||
5159 | + | ||
5160 | psci_function_id[PSCI_FN_CPU_SUSPEND] = | ||
5161 | PSCI_FN_NATIVE(0_2, CPU_SUSPEND); | ||
5162 | psci_ops.cpu_suspend = psci_cpu_suspend; | ||
5163 | @@ -539,6 +585,7 @@ static int __init psci_probe(void) | ||
5164 | psci_init_migrate(); | ||
5165 | |||
5166 | if (PSCI_VERSION_MAJOR(ver) >= 1) { | ||
5167 | + psci_init_smccc(); | ||
5168 | psci_init_cpu_suspend(); | ||
5169 | psci_init_system_suspend(); | ||
5170 | } | ||
5171 | @@ -652,9 +699,9 @@ int __init psci_acpi_init(void) | ||
5172 | pr_info("probing for conduit method from ACPI.\n"); | ||
5173 | |||
5174 | if (acpi_psci_use_hvc()) | ||
5175 | - invoke_psci_fn = __invoke_psci_fn_hvc; | ||
5176 | + set_conduit(PSCI_CONDUIT_HVC); | ||
5177 | else | ||
5178 | - invoke_psci_fn = __invoke_psci_fn_smc; | ||
5179 | + set_conduit(PSCI_CONDUIT_SMC); | ||
5180 | |||
5181 | return psci_probe(); | ||
5182 | } | ||
5183 | diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c | ||
5184 | index 41b72ce6613f..83e1345db9e2 100644 | ||
5185 | --- a/drivers/gpu/drm/radeon/radeon_object.c | ||
5186 | +++ b/drivers/gpu/drm/radeon/radeon_object.c | ||
5187 | @@ -238,9 +238,10 @@ int radeon_bo_create(struct radeon_device *rdev, | ||
5188 | * may be slow | ||
5189 | * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 | ||
5190 | */ | ||
5191 | - | ||
5192 | +#ifndef CONFIG_COMPILE_TEST | ||
5193 | #warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ | ||
5194 | thanks to write-combining | ||
5195 | +#endif | ||
5196 | |||
5197 | if (bo->flags & RADEON_GEM_GTT_WC) | ||
5198 | DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " | ||
5199 | diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c | ||
5200 | index d8bc4b910192..9360cdce740e 100644 | ||
5201 | --- a/drivers/hv/channel_mgmt.c | ||
5202 | +++ b/drivers/hv/channel_mgmt.c | ||
5203 | @@ -70,7 +70,7 @@ static const struct vmbus_device vmbus_devs[] = { | ||
5204 | /* PCIE */ | ||
5205 | { .dev_type = HV_PCIE, | ||
5206 | HV_PCIE_GUID, | ||
5207 | - .perf_device = true, | ||
5208 | + .perf_device = false, | ||
5209 | }, | ||
5210 | |||
5211 | /* Synthetic Frame Buffer */ | ||
5212 | diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c | ||
5213 | index a629f7c130f0..ac63e562071f 100644 | ||
5214 | --- a/drivers/hwmon/ina2xx.c | ||
5215 | +++ b/drivers/hwmon/ina2xx.c | ||
5216 | @@ -447,6 +447,7 @@ static int ina2xx_probe(struct i2c_client *client, | ||
5217 | |||
5218 | /* set the device type */ | ||
5219 | data->config = &ina2xx_config[id->driver_data]; | ||
5220 | + mutex_init(&data->config_lock); | ||
5221 | |||
5222 | if (of_property_read_u32(dev->of_node, "shunt-resistor", &val) < 0) { | ||
5223 | struct ina2xx_platform_data *pdata = dev_get_platdata(dev); | ||
5224 | @@ -473,8 +474,6 @@ static int ina2xx_probe(struct i2c_client *client, | ||
5225 | return -ENODEV; | ||
5226 | } | ||
5227 | |||
5228 | - mutex_init(&data->config_lock); | ||
5229 | - | ||
5230 | data->groups[group++] = &ina2xx_group; | ||
5231 | if (id->driver_data == ina226) | ||
5232 | data->groups[group++] = &ina226_group; | ||
5233 | diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c | ||
5234 | index 48a39222fdf9..a9fc64557c53 100644 | ||
5235 | --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c | ||
5236 | +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c | ||
5237 | @@ -101,7 +101,7 @@ static int get_v4l2_window32(struct v4l2_window __user *kp, | ||
5238 | static int put_v4l2_window32(struct v4l2_window __user *kp, | ||
5239 | struct v4l2_window32 __user *up) | ||
5240 | { | ||
5241 | - struct v4l2_clip __user *kclips = kp->clips; | ||
5242 | + struct v4l2_clip __user *kclips; | ||
5243 | struct v4l2_clip32 __user *uclips; | ||
5244 | compat_caddr_t p; | ||
5245 | u32 clipcount; | ||
5246 | @@ -116,6 +116,8 @@ static int put_v4l2_window32(struct v4l2_window __user *kp, | ||
5247 | if (!clipcount) | ||
5248 | return 0; | ||
5249 | |||
5250 | + if (get_user(kclips, &kp->clips)) | ||
5251 | + return -EFAULT; | ||
5252 | if (get_user(p, &up->clips)) | ||
5253 | return -EFAULT; | ||
5254 | uclips = compat_ptr(p); | ||
5255 | diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c | ||
5256 | index 4da73e2c37cf..2032a6de026b 100644 | ||
5257 | --- a/drivers/net/phy/micrel.c | ||
5258 | +++ b/drivers/net/phy/micrel.c | ||
5259 | @@ -268,12 +268,23 @@ static int kszphy_nand_tree_disable(struct phy_device *phydev) | ||
5260 | return ret; | ||
5261 | } | ||
5262 | |||
5263 | -/* Some config bits need to be set again on resume, handle them here. */ | ||
5264 | -static int kszphy_config_reset(struct phy_device *phydev) | ||
5265 | +static int kszphy_config_init(struct phy_device *phydev) | ||
5266 | { | ||
5267 | struct kszphy_priv *priv = phydev->priv; | ||
5268 | + const struct kszphy_type *type; | ||
5269 | int ret; | ||
5270 | |||
5271 | + if (!priv) | ||
5272 | + return 0; | ||
5273 | + | ||
5274 | + type = priv->type; | ||
5275 | + | ||
5276 | + if (type->has_broadcast_disable) | ||
5277 | + kszphy_broadcast_disable(phydev); | ||
5278 | + | ||
5279 | + if (type->has_nand_tree_disable) | ||
5280 | + kszphy_nand_tree_disable(phydev); | ||
5281 | + | ||
5282 | if (priv->rmii_ref_clk_sel) { | ||
5283 | ret = kszphy_rmii_clk_sel(phydev, priv->rmii_ref_clk_sel_val); | ||
5284 | if (ret) { | ||
5285 | @@ -284,7 +295,7 @@ static int kszphy_config_reset(struct phy_device *phydev) | ||
5286 | } | ||
5287 | |||
5288 | if (priv->led_mode >= 0) | ||
5289 | - kszphy_setup_led(phydev, priv->type->led_mode_reg, priv->led_mode); | ||
5290 | + kszphy_setup_led(phydev, type->led_mode_reg, priv->led_mode); | ||
5291 | |||
5292 | if (phy_interrupt_is_valid(phydev)) { | ||
5293 | int ctl = phy_read(phydev, MII_BMCR); | ||
5294 | @@ -300,25 +311,6 @@ static int kszphy_config_reset(struct phy_device *phydev) | ||
5295 | return 0; | ||
5296 | } | ||
5297 | |||
5298 | -static int kszphy_config_init(struct phy_device *phydev) | ||
5299 | -{ | ||
5300 | - struct kszphy_priv *priv = phydev->priv; | ||
5301 | - const struct kszphy_type *type; | ||
5302 | - | ||
5303 | - if (!priv) | ||
5304 | - return 0; | ||
5305 | - | ||
5306 | - type = priv->type; | ||
5307 | - | ||
5308 | - if (type->has_broadcast_disable) | ||
5309 | - kszphy_broadcast_disable(phydev); | ||
5310 | - | ||
5311 | - if (type->has_nand_tree_disable) | ||
5312 | - kszphy_nand_tree_disable(phydev); | ||
5313 | - | ||
5314 | - return kszphy_config_reset(phydev); | ||
5315 | -} | ||
5316 | - | ||
5317 | static int ksz8041_config_init(struct phy_device *phydev) | ||
5318 | { | ||
5319 | struct device_node *of_node = phydev->mdio.dev.of_node; | ||
5320 | @@ -723,14 +715,8 @@ static int kszphy_suspend(struct phy_device *phydev) | ||
5321 | |||
5322 | static int kszphy_resume(struct phy_device *phydev) | ||
5323 | { | ||
5324 | - int ret; | ||
5325 | - | ||
5326 | genphy_resume(phydev); | ||
5327 | |||
5328 | - ret = kszphy_config_reset(phydev); | ||
5329 | - if (ret) | ||
5330 | - return ret; | ||
5331 | - | ||
5332 | /* Enable PHY Interrupts */ | ||
5333 | if (phy_interrupt_is_valid(phydev)) { | ||
5334 | phydev->interrupts = PHY_INTERRUPT_ENABLED; | ||
5335 | diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c | ||
5336 | index 27ed25252aac..cfd81eb1b532 100644 | ||
5337 | --- a/drivers/net/slip/slhc.c | ||
5338 | +++ b/drivers/net/slip/slhc.c | ||
5339 | @@ -509,6 +509,10 @@ slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize) | ||
5340 | if(x < 0 || x > comp->rslot_limit) | ||
5341 | goto bad; | ||
5342 | |||
5343 | + /* Check if the cstate is initialized */ | ||
5344 | + if (!comp->rstate[x].initialized) | ||
5345 | + goto bad; | ||
5346 | + | ||
5347 | comp->flags &=~ SLF_TOSS; | ||
5348 | comp->recv_current = x; | ||
5349 | } else { | ||
5350 | @@ -673,6 +677,7 @@ slhc_remember(struct slcompress *comp, unsigned char *icp, int isize) | ||
5351 | if (cs->cs_tcp.doff > 5) | ||
5352 | memcpy(cs->cs_tcpopt, icp + ihl*4 + sizeof(struct tcphdr), (cs->cs_tcp.doff - 5) * 4); | ||
5353 | cs->cs_hsize = ihl*2 + cs->cs_tcp.doff*2; | ||
5354 | + cs->initialized = true; | ||
5355 | /* Put headers back on packet | ||
5356 | * Neither header checksum is recalculated | ||
5357 | */ | ||
5358 | diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c | ||
5359 | index 1fca0024f294..4fb468666b19 100644 | ||
5360 | --- a/drivers/net/usb/cdc_ether.c | ||
5361 | +++ b/drivers/net/usb/cdc_ether.c | ||
5362 | @@ -773,6 +773,12 @@ static const struct usb_device_id products[] = { | ||
5363 | USB_CDC_SUBCLASS_ETHERNET, | ||
5364 | USB_CDC_PROTO_NONE), | ||
5365 | .driver_info = (unsigned long)&wwan_info, | ||
5366 | +}, { | ||
5367 | + /* Cinterion AHS3 modem by GEMALTO */ | ||
5368 | + USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0055, USB_CLASS_COMM, | ||
5369 | + USB_CDC_SUBCLASS_ETHERNET, | ||
5370 | + USB_CDC_PROTO_NONE), | ||
5371 | + .driver_info = (unsigned long)&wwan_info, | ||
5372 | }, { | ||
5373 | /* Telit modules */ | ||
5374 | USB_VENDOR_AND_INTERFACE_INFO(0x1bc7, USB_CLASS_COMM, | ||
5375 | diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c | ||
5376 | index c53385a0052f..f5a96678494b 100644 | ||
5377 | --- a/drivers/net/usb/lan78xx.c | ||
5378 | +++ b/drivers/net/usb/lan78xx.c | ||
5379 | @@ -873,7 +873,8 @@ static int lan78xx_read_otp(struct lan78xx_net *dev, u32 offset, | ||
5380 | offset += 0x100; | ||
5381 | else | ||
5382 | ret = -EINVAL; | ||
5383 | - ret = lan78xx_read_raw_otp(dev, offset, length, data); | ||
5384 | + if (!ret) | ||
5385 | + ret = lan78xx_read_raw_otp(dev, offset, length, data); | ||
5386 | } | ||
5387 | |||
5388 | return ret; | ||
5389 | diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c | ||
5390 | index 231f84db9ab0..6113624ccec3 100644 | ||
5391 | --- a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c | ||
5392 | +++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c | ||
5393 | @@ -1454,6 +1454,7 @@ static int rtl8187_probe(struct usb_interface *intf, | ||
5394 | goto err_free_dev; | ||
5395 | } | ||
5396 | mutex_init(&priv->io_mutex); | ||
5397 | + mutex_init(&priv->conf_mutex); | ||
5398 | |||
5399 | SET_IEEE80211_DEV(dev, &intf->dev); | ||
5400 | usb_set_intfdata(intf, dev); | ||
5401 | @@ -1627,7 +1628,6 @@ static int rtl8187_probe(struct usb_interface *intf, | ||
5402 | printk(KERN_ERR "rtl8187: Cannot register device\n"); | ||
5403 | goto err_free_dmabuf; | ||
5404 | } | ||
5405 | - mutex_init(&priv->conf_mutex); | ||
5406 | skb_queue_head_init(&priv->b_tx_status.queue); | ||
5407 | |||
5408 | wiphy_info(dev->wiphy, "hwaddr %pM, %s V%d + %s, rfkill mask %d\n", | ||
5409 | diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c | ||
5410 | index 71bf9bded485..66e9bb053629 100644 | ||
5411 | --- a/drivers/s390/cio/qdio_main.c | ||
5412 | +++ b/drivers/s390/cio/qdio_main.c | ||
5413 | @@ -126,7 +126,7 @@ static inline int qdio_check_ccq(struct qdio_q *q, unsigned int ccq) | ||
5414 | static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state, | ||
5415 | int start, int count, int auto_ack) | ||
5416 | { | ||
5417 | - int rc, tmp_count = count, tmp_start = start, nr = q->nr, retried = 0; | ||
5418 | + int rc, tmp_count = count, tmp_start = start, nr = q->nr; | ||
5419 | unsigned int ccq = 0; | ||
5420 | |||
5421 | qperf_inc(q, eqbs); | ||
5422 | @@ -149,14 +149,7 @@ static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state, | ||
5423 | qperf_inc(q, eqbs_partial); | ||
5424 | DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, "EQBS part:%02x", | ||
5425 | tmp_count); | ||
5426 | - /* | ||
5427 | - * Retry once, if that fails bail out and process the | ||
5428 | - * extracted buffers before trying again. | ||
5429 | - */ | ||
5430 | - if (!retried++) | ||
5431 | - goto again; | ||
5432 | - else | ||
5433 | - return count - tmp_count; | ||
5434 | + return count - tmp_count; | ||
5435 | } | ||
5436 | |||
5437 | DBF_ERROR("%4x EQBS ERROR", SCH_NO(q)); | ||
5438 | @@ -212,7 +205,10 @@ static int qdio_do_sqbs(struct qdio_q *q, unsigned char state, int start, | ||
5439 | return 0; | ||
5440 | } | ||
5441 | |||
5442 | -/* returns number of examined buffers and their common state in *state */ | ||
5443 | +/* | ||
5444 | + * Returns number of examined buffers and their common state in *state. | ||
5445 | + * Requested number of buffers-to-examine must be > 0. | ||
5446 | + */ | ||
5447 | static inline int get_buf_states(struct qdio_q *q, unsigned int bufnr, | ||
5448 | unsigned char *state, unsigned int count, | ||
5449 | int auto_ack, int merge_pending) | ||
5450 | @@ -223,17 +219,23 @@ static inline int get_buf_states(struct qdio_q *q, unsigned int bufnr, | ||
5451 | if (is_qebsm(q)) | ||
5452 | return qdio_do_eqbs(q, state, bufnr, count, auto_ack); | ||
5453 | |||
5454 | - for (i = 0; i < count; i++) { | ||
5455 | - if (!__state) { | ||
5456 | - __state = q->slsb.val[bufnr]; | ||
5457 | - if (merge_pending && __state == SLSB_P_OUTPUT_PENDING) | ||
5458 | - __state = SLSB_P_OUTPUT_EMPTY; | ||
5459 | - } else if (merge_pending) { | ||
5460 | - if ((q->slsb.val[bufnr] & __state) != __state) | ||
5461 | - break; | ||
5462 | - } else if (q->slsb.val[bufnr] != __state) | ||
5463 | - break; | ||
5464 | + /* get initial state: */ | ||
5465 | + __state = q->slsb.val[bufnr]; | ||
5466 | + if (merge_pending && __state == SLSB_P_OUTPUT_PENDING) | ||
5467 | + __state = SLSB_P_OUTPUT_EMPTY; | ||
5468 | + | ||
5469 | + for (i = 1; i < count; i++) { | ||
5470 | bufnr = next_buf(bufnr); | ||
5471 | + | ||
5472 | + /* merge PENDING into EMPTY: */ | ||
5473 | + if (merge_pending && | ||
5474 | + q->slsb.val[bufnr] == SLSB_P_OUTPUT_PENDING && | ||
5475 | + __state == SLSB_P_OUTPUT_EMPTY) | ||
5476 | + continue; | ||
5477 | + | ||
5478 | + /* stop if next state differs from initial state: */ | ||
5479 | + if (q->slsb.val[bufnr] != __state) | ||
5480 | + break; | ||
5481 | } | ||
5482 | *state = __state; | ||
5483 | return i; | ||
5484 | diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c | ||
5485 | index e2c37aeed45a..fce49ebc575d 100644 | ||
5486 | --- a/drivers/vhost/vhost.c | ||
5487 | +++ b/drivers/vhost/vhost.c | ||
5488 | @@ -1175,10 +1175,12 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, | ||
5489 | /* Caller should have vq mutex and device mutex */ | ||
5490 | int vhost_vq_access_ok(struct vhost_virtqueue *vq) | ||
5491 | { | ||
5492 | - int ret = vq_log_access_ok(vq, vq->log_base); | ||
5493 | + if (!vq_log_access_ok(vq, vq->log_base)) | ||
5494 | + return 0; | ||
5495 | |||
5496 | - if (ret || vq->iotlb) | ||
5497 | - return ret; | ||
5498 | + /* Access validation occurs at prefetch time with IOTLB */ | ||
5499 | + if (vq->iotlb) | ||
5500 | + return 1; | ||
5501 | |||
5502 | return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used); | ||
5503 | } | ||
5504 | diff --git a/fs/namei.c b/fs/namei.c | ||
5505 | index 891670e0956b..85ac38b99065 100644 | ||
5506 | --- a/fs/namei.c | ||
5507 | +++ b/fs/namei.c | ||
5508 | @@ -221,9 +221,10 @@ getname_kernel(const char * filename) | ||
5509 | if (len <= EMBEDDED_NAME_MAX) { | ||
5510 | result->name = (char *)result->iname; | ||
5511 | } else if (len <= PATH_MAX) { | ||
5512 | + const size_t size = offsetof(struct filename, iname[1]); | ||
5513 | struct filename *tmp; | ||
5514 | |||
5515 | - tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); | ||
5516 | + tmp = kmalloc(size, GFP_KERNEL); | ||
5517 | if (unlikely(!tmp)) { | ||
5518 | __putname(result); | ||
5519 | return ERR_PTR(-ENOMEM); | ||
5520 | diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h | ||
5521 | new file mode 100644 | ||
5522 | index 000000000000..e518e4e3dfb5 | ||
5523 | --- /dev/null | ||
5524 | +++ b/include/kvm/arm_psci.h | ||
5525 | @@ -0,0 +1,51 @@ | ||
5526 | +/* | ||
5527 | + * Copyright (C) 2012,2013 - ARM Ltd | ||
5528 | + * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
5529 | + * | ||
5530 | + * This program is free software; you can redistribute it and/or modify | ||
5531 | + * it under the terms of the GNU General Public License version 2 as | ||
5532 | + * published by the Free Software Foundation. | ||
5533 | + * | ||
5534 | + * This program is distributed in the hope that it will be useful, | ||
5535 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
5536 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
5537 | + * GNU General Public License for more details. | ||
5538 | + * | ||
5539 | + * You should have received a copy of the GNU General Public License | ||
5540 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
5541 | + */ | ||
5542 | + | ||
5543 | +#ifndef __KVM_ARM_PSCI_H__ | ||
5544 | +#define __KVM_ARM_PSCI_H__ | ||
5545 | + | ||
5546 | +#include <linux/kvm_host.h> | ||
5547 | +#include <uapi/linux/psci.h> | ||
5548 | + | ||
5549 | +#define KVM_ARM_PSCI_0_1 PSCI_VERSION(0, 1) | ||
5550 | +#define KVM_ARM_PSCI_0_2 PSCI_VERSION(0, 2) | ||
5551 | +#define KVM_ARM_PSCI_1_0 PSCI_VERSION(1, 0) | ||
5552 | + | ||
5553 | +#define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_0 | ||
5554 | + | ||
5555 | +/* | ||
5556 | + * We need the KVM pointer independently from the vcpu as we can call | ||
5557 | + * this from HYP, and need to apply kern_hyp_va on it... | ||
5558 | + */ | ||
5559 | +static inline int kvm_psci_version(struct kvm_vcpu *vcpu, struct kvm *kvm) | ||
5560 | +{ | ||
5561 | + /* | ||
5562 | + * Our PSCI implementation stays the same across versions from | ||
5563 | + * v0.2 onward, only adding the few mandatory functions (such | ||
5564 | + * as FEATURES with 1.0) that are required by newer | ||
5565 | + * revisions. It is thus safe to return the latest. | ||
5566 | + */ | ||
5567 | + if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) | ||
5568 | + return KVM_ARM_PSCI_LATEST; | ||
5569 | + | ||
5570 | + return KVM_ARM_PSCI_0_1; | ||
5571 | +} | ||
5572 | + | ||
5573 | + | ||
5574 | +int kvm_hvc_call_handler(struct kvm_vcpu *vcpu); | ||
5575 | + | ||
5576 | +#endif /* __KVM_ARM_PSCI_H__ */ | ||
5577 | diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h | ||
5578 | index 4c5bca38c653..a031897fca76 100644 | ||
5579 | --- a/include/linux/arm-smccc.h | ||
5580 | +++ b/include/linux/arm-smccc.h | ||
5581 | @@ -14,14 +14,16 @@ | ||
5582 | #ifndef __LINUX_ARM_SMCCC_H | ||
5583 | #define __LINUX_ARM_SMCCC_H | ||
5584 | |||
5585 | +#include <uapi/linux/const.h> | ||
5586 | + | ||
5587 | /* | ||
5588 | * This file provides common defines for ARM SMC Calling Convention as | ||
5589 | * specified in | ||
5590 | * http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html | ||
5591 | */ | ||
5592 | |||
5593 | -#define ARM_SMCCC_STD_CALL 0 | ||
5594 | -#define ARM_SMCCC_FAST_CALL 1 | ||
5595 | +#define ARM_SMCCC_STD_CALL _AC(0,U) | ||
5596 | +#define ARM_SMCCC_FAST_CALL _AC(1,U) | ||
5597 | #define ARM_SMCCC_TYPE_SHIFT 31 | ||
5598 | |||
5599 | #define ARM_SMCCC_SMC_32 0 | ||
5600 | @@ -60,6 +62,24 @@ | ||
5601 | #define ARM_SMCCC_QUIRK_NONE 0 | ||
5602 | #define ARM_SMCCC_QUIRK_QCOM_A6 1 /* Save/restore register a6 */ | ||
5603 | |||
5604 | +#define ARM_SMCCC_VERSION_1_0 0x10000 | ||
5605 | +#define ARM_SMCCC_VERSION_1_1 0x10001 | ||
5606 | + | ||
5607 | +#define ARM_SMCCC_VERSION_FUNC_ID \ | ||
5608 | + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ | ||
5609 | + ARM_SMCCC_SMC_32, \ | ||
5610 | + 0, 0) | ||
5611 | + | ||
5612 | +#define ARM_SMCCC_ARCH_FEATURES_FUNC_ID \ | ||
5613 | + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ | ||
5614 | + ARM_SMCCC_SMC_32, \ | ||
5615 | + 0, 1) | ||
5616 | + | ||
5617 | +#define ARM_SMCCC_ARCH_WORKAROUND_1 \ | ||
5618 | + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ | ||
5619 | + ARM_SMCCC_SMC_32, \ | ||
5620 | + 0, 0x8000) | ||
5621 | + | ||
5622 | #ifndef __ASSEMBLY__ | ||
5623 | |||
5624 | #include <linux/linkage.h> | ||
5625 | @@ -130,5 +150,146 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, | ||
5626 | |||
5627 | #define arm_smccc_hvc_quirk(...) __arm_smccc_hvc(__VA_ARGS__) | ||
5628 | |||
5629 | +/* SMCCC v1.1 implementation madness follows */ | ||
5630 | +#ifdef CONFIG_ARM64 | ||
5631 | + | ||
5632 | +#define SMCCC_SMC_INST "smc #0" | ||
5633 | +#define SMCCC_HVC_INST "hvc #0" | ||
5634 | + | ||
5635 | +#elif defined(CONFIG_ARM) | ||
5636 | +#include <asm/opcodes-sec.h> | ||
5637 | +#include <asm/opcodes-virt.h> | ||
5638 | + | ||
5639 | +#define SMCCC_SMC_INST __SMC(0) | ||
5640 | +#define SMCCC_HVC_INST __HVC(0) | ||
5641 | + | ||
5642 | +#endif | ||
5643 | + | ||
5644 | +#define ___count_args(_0, _1, _2, _3, _4, _5, _6, _7, _8, x, ...) x | ||
5645 | + | ||
5646 | +#define __count_args(...) \ | ||
5647 | + ___count_args(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0) | ||
5648 | + | ||
5649 | +#define __constraint_write_0 \ | ||
5650 | + "+r" (r0), "=&r" (r1), "=&r" (r2), "=&r" (r3) | ||
5651 | +#define __constraint_write_1 \ | ||
5652 | + "+r" (r0), "+r" (r1), "=&r" (r2), "=&r" (r3) | ||
5653 | +#define __constraint_write_2 \ | ||
5654 | + "+r" (r0), "+r" (r1), "+r" (r2), "=&r" (r3) | ||
5655 | +#define __constraint_write_3 \ | ||
5656 | + "+r" (r0), "+r" (r1), "+r" (r2), "+r" (r3) | ||
5657 | +#define __constraint_write_4 __constraint_write_3 | ||
5658 | +#define __constraint_write_5 __constraint_write_4 | ||
5659 | +#define __constraint_write_6 __constraint_write_5 | ||
5660 | +#define __constraint_write_7 __constraint_write_6 | ||
5661 | + | ||
5662 | +#define __constraint_read_0 | ||
5663 | +#define __constraint_read_1 | ||
5664 | +#define __constraint_read_2 | ||
5665 | +#define __constraint_read_3 | ||
5666 | +#define __constraint_read_4 "r" (r4) | ||
5667 | +#define __constraint_read_5 __constraint_read_4, "r" (r5) | ||
5668 | +#define __constraint_read_6 __constraint_read_5, "r" (r6) | ||
5669 | +#define __constraint_read_7 __constraint_read_6, "r" (r7) | ||
5670 | + | ||
5671 | +#define __declare_arg_0(a0, res) \ | ||
5672 | + struct arm_smccc_res *___res = res; \ | ||
5673 | + register u32 r0 asm("r0") = a0; \ | ||
5674 | + register unsigned long r1 asm("r1"); \ | ||
5675 | + register unsigned long r2 asm("r2"); \ | ||
5676 | + register unsigned long r3 asm("r3") | ||
5677 | + | ||
5678 | +#define __declare_arg_1(a0, a1, res) \ | ||
5679 | + struct arm_smccc_res *___res = res; \ | ||
5680 | + register u32 r0 asm("r0") = a0; \ | ||
5681 | + register typeof(a1) r1 asm("r1") = a1; \ | ||
5682 | + register unsigned long r2 asm("r2"); \ | ||
5683 | + register unsigned long r3 asm("r3") | ||
5684 | + | ||
5685 | +#define __declare_arg_2(a0, a1, a2, res) \ | ||
5686 | + struct arm_smccc_res *___res = res; \ | ||
5687 | + register u32 r0 asm("r0") = a0; \ | ||
5688 | + register typeof(a1) r1 asm("r1") = a1; \ | ||
5689 | + register typeof(a2) r2 asm("r2") = a2; \ | ||
5690 | + register unsigned long r3 asm("r3") | ||
5691 | + | ||
5692 | +#define __declare_arg_3(a0, a1, a2, a3, res) \ | ||
5693 | + struct arm_smccc_res *___res = res; \ | ||
5694 | + register u32 r0 asm("r0") = a0; \ | ||
5695 | + register typeof(a1) r1 asm("r1") = a1; \ | ||
5696 | + register typeof(a2) r2 asm("r2") = a2; \ | ||
5697 | + register typeof(a3) r3 asm("r3") = a3 | ||
5698 | + | ||
5699 | +#define __declare_arg_4(a0, a1, a2, a3, a4, res) \ | ||
5700 | + __declare_arg_3(a0, a1, a2, a3, res); \ | ||
5701 | + register typeof(a4) r4 asm("r4") = a4 | ||
5702 | + | ||
5703 | +#define __declare_arg_5(a0, a1, a2, a3, a4, a5, res) \ | ||
5704 | + __declare_arg_4(a0, a1, a2, a3, a4, res); \ | ||
5705 | + register typeof(a5) r5 asm("r5") = a5 | ||
5706 | + | ||
5707 | +#define __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res) \ | ||
5708 | + __declare_arg_5(a0, a1, a2, a3, a4, a5, res); \ | ||
5709 | + register typeof(a6) r6 asm("r6") = a6 | ||
5710 | + | ||
5711 | +#define __declare_arg_7(a0, a1, a2, a3, a4, a5, a6, a7, res) \ | ||
5712 | + __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res); \ | ||
5713 | + register typeof(a7) r7 asm("r7") = a7 | ||
5714 | + | ||
5715 | +#define ___declare_args(count, ...) __declare_arg_ ## count(__VA_ARGS__) | ||
5716 | +#define __declare_args(count, ...) ___declare_args(count, __VA_ARGS__) | ||
5717 | + | ||
5718 | +#define ___constraints(count) \ | ||
5719 | + : __constraint_write_ ## count \ | ||
5720 | + : __constraint_read_ ## count \ | ||
5721 | + : "memory" | ||
5722 | +#define __constraints(count) ___constraints(count) | ||
5723 | + | ||
5724 | +/* | ||
5725 | + * We have an output list that is not necessarily used, and GCC feels | ||
5726 | + * entitled to optimise the whole sequence away. "volatile" is what | ||
5727 | + * makes it stick. | ||
5728 | + */ | ||
5729 | +#define __arm_smccc_1_1(inst, ...) \ | ||
5730 | + do { \ | ||
5731 | + __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \ | ||
5732 | + asm volatile(inst "\n" \ | ||
5733 | + __constraints(__count_args(__VA_ARGS__))); \ | ||
5734 | + if (___res) \ | ||
5735 | + *___res = (typeof(*___res)){r0, r1, r2, r3}; \ | ||
5736 | + } while (0) | ||
5737 | + | ||
5738 | +/* | ||
5739 | + * arm_smccc_1_1_smc() - make an SMCCC v1.1 compliant SMC call | ||
5740 | + * | ||
5741 | + * This is a variadic macro taking one to eight source arguments, and | ||
5742 | + * an optional return structure. | ||
5743 | + * | ||
5744 | + * @a0-a7: arguments passed in registers 0 to 7 | ||
5745 | + * @res: result values from registers 0 to 3 | ||
5746 | + * | ||
5747 | + * This macro is used to make SMC calls following SMC Calling Convention v1.1. | ||
5748 | + * The content of the supplied param are copied to registers 0 to 7 prior | ||
5749 | + * to the SMC instruction. The return values are updated with the content | ||
5750 | + * from register 0 to 3 on return from the SMC instruction if not NULL. | ||
5751 | + */ | ||
5752 | +#define arm_smccc_1_1_smc(...) __arm_smccc_1_1(SMCCC_SMC_INST, __VA_ARGS__) | ||
5753 | + | ||
5754 | +/* | ||
5755 | + * arm_smccc_1_1_hvc() - make an SMCCC v1.1 compliant HVC call | ||
5756 | + * | ||
5757 | + * This is a variadic macro taking one to eight source arguments, and | ||
5758 | + * an optional return structure. | ||
5759 | + * | ||
5760 | + * @a0-a7: arguments passed in registers 0 to 7 | ||
5761 | + * @res: result values from registers 0 to 3 | ||
5762 | + * | ||
5763 | + * This macro is used to make HVC calls following SMC Calling Convention v1.1. | ||
5764 | + * The content of the supplied param are copied to registers 0 to 7 prior | ||
5765 | + * to the HVC instruction. The return values are updated with the content | ||
5766 | + * from register 0 to 3 on return from the HVC instruction if not NULL. | ||
5767 | + */ | ||
5768 | +#define arm_smccc_1_1_hvc(...) __arm_smccc_1_1(SMCCC_HVC_INST, __VA_ARGS__) | ||
5769 | + | ||
5770 | #endif /*__ASSEMBLY__*/ | ||
5771 | #endif /*__LINUX_ARM_SMCCC_H*/ | ||
5772 | diff --git a/include/linux/mm.h b/include/linux/mm.h | ||
5773 | index 8e506783631b..4a07ff4f38e1 100644 | ||
5774 | --- a/include/linux/mm.h | ||
5775 | +++ b/include/linux/mm.h | ||
5776 | @@ -76,6 +76,10 @@ extern int mmap_rnd_compat_bits __read_mostly; | ||
5777 | #define page_to_virt(x) __va(PFN_PHYS(page_to_pfn(x))) | ||
5778 | #endif | ||
5779 | |||
5780 | +#ifndef lm_alias | ||
5781 | +#define lm_alias(x) __va(__pa_symbol(x)) | ||
5782 | +#endif | ||
5783 | + | ||
5784 | /* | ||
5785 | * To prevent common memory management code establishing | ||
5786 | * a zero page mapping on a read fault. | ||
5787 | diff --git a/include/linux/psci.h b/include/linux/psci.h | ||
5788 | index bdea1cb5e1db..347077cf19c6 100644 | ||
5789 | --- a/include/linux/psci.h | ||
5790 | +++ b/include/linux/psci.h | ||
5791 | @@ -25,7 +25,19 @@ bool psci_tos_resident_on(int cpu); | ||
5792 | int psci_cpu_init_idle(unsigned int cpu); | ||
5793 | int psci_cpu_suspend_enter(unsigned long index); | ||
5794 | |||
5795 | +enum psci_conduit { | ||
5796 | + PSCI_CONDUIT_NONE, | ||
5797 | + PSCI_CONDUIT_SMC, | ||
5798 | + PSCI_CONDUIT_HVC, | ||
5799 | +}; | ||
5800 | + | ||
5801 | +enum smccc_version { | ||
5802 | + SMCCC_VERSION_1_0, | ||
5803 | + SMCCC_VERSION_1_1, | ||
5804 | +}; | ||
5805 | + | ||
5806 | struct psci_operations { | ||
5807 | + u32 (*get_version)(void); | ||
5808 | int (*cpu_suspend)(u32 state, unsigned long entry_point); | ||
5809 | int (*cpu_off)(u32 state); | ||
5810 | int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); | ||
5811 | @@ -33,6 +45,8 @@ struct psci_operations { | ||
5812 | int (*affinity_info)(unsigned long target_affinity, | ||
5813 | unsigned long lowest_affinity_level); | ||
5814 | int (*migrate_info_type)(void); | ||
5815 | + enum psci_conduit conduit; | ||
5816 | + enum smccc_version smccc_version; | ||
5817 | }; | ||
5818 | |||
5819 | extern struct psci_operations psci_ops; | ||
5820 | diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h | ||
5821 | index 554671c81f4a..4931787193c3 100644 | ||
5822 | --- a/include/net/bluetooth/hci_core.h | ||
5823 | +++ b/include/net/bluetooth/hci_core.h | ||
5824 | @@ -893,7 +893,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, | ||
5825 | u16 conn_timeout); | ||
5826 | struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, | ||
5827 | u8 dst_type, u8 sec_level, u16 conn_timeout, | ||
5828 | - u8 role); | ||
5829 | + u8 role, bdaddr_t *direct_rpa); | ||
5830 | struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, | ||
5831 | u8 sec_level, u8 auth_type); | ||
5832 | struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, | ||
5833 | diff --git a/include/net/slhc_vj.h b/include/net/slhc_vj.h | ||
5834 | index 8716d5942b65..8fcf8908a694 100644 | ||
5835 | --- a/include/net/slhc_vj.h | ||
5836 | +++ b/include/net/slhc_vj.h | ||
5837 | @@ -127,6 +127,7 @@ typedef __u32 int32; | ||
5838 | */ | ||
5839 | struct cstate { | ||
5840 | byte_t cs_this; /* connection id number (xmit) */ | ||
5841 | + bool initialized; /* true if initialized */ | ||
5842 | struct cstate *next; /* next in ring (xmit) */ | ||
5843 | struct iphdr cs_ip; /* ip/tcp hdr from most recent packet */ | ||
5844 | struct tcphdr cs_tcp; | ||
5845 | diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h | ||
5846 | index 3d7a0fc021a7..39930ca998cd 100644 | ||
5847 | --- a/include/uapi/linux/psci.h | ||
5848 | +++ b/include/uapi/linux/psci.h | ||
5849 | @@ -87,6 +87,9 @@ | ||
5850 | (((ver) & PSCI_VERSION_MAJOR_MASK) >> PSCI_VERSION_MAJOR_SHIFT) | ||
5851 | #define PSCI_VERSION_MINOR(ver) \ | ||
5852 | ((ver) & PSCI_VERSION_MINOR_MASK) | ||
5853 | +#define PSCI_VERSION(maj, min) \ | ||
5854 | + ((((maj) << PSCI_VERSION_MAJOR_SHIFT) & PSCI_VERSION_MAJOR_MASK) | \ | ||
5855 | + ((min) & PSCI_VERSION_MINOR_MASK)) | ||
5856 | |||
5857 | /* PSCI features decoding (>=1.0) */ | ||
5858 | #define PSCI_1_0_FEATURES_CPU_SUSPEND_PF_SHIFT 1 | ||
5859 | diff --git a/kernel/events/core.c b/kernel/events/core.c | ||
5860 | index c4100c38a467..74710fad35d5 100644 | ||
5861 | --- a/kernel/events/core.c | ||
5862 | +++ b/kernel/events/core.c | ||
5863 | @@ -4091,6 +4091,9 @@ static void _free_event(struct perf_event *event) | ||
5864 | if (event->ctx) | ||
5865 | put_ctx(event->ctx); | ||
5866 | |||
5867 | + if (event->hw.target) | ||
5868 | + put_task_struct(event->hw.target); | ||
5869 | + | ||
5870 | exclusive_event_destroy(event); | ||
5871 | module_put(event->pmu->module); | ||
5872 | |||
5873 | @@ -9214,6 +9217,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | ||
5874 | * and we cannot use the ctx information because we need the | ||
5875 | * pmu before we get a ctx. | ||
5876 | */ | ||
5877 | + get_task_struct(task); | ||
5878 | event->hw.target = task; | ||
5879 | } | ||
5880 | |||
5881 | @@ -9331,6 +9335,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | ||
5882 | perf_detach_cgroup(event); | ||
5883 | if (event->ns) | ||
5884 | put_pid_ns(event->ns); | ||
5885 | + if (event->hw.target) | ||
5886 | + put_task_struct(event->hw.target); | ||
5887 | kfree(event); | ||
5888 | |||
5889 | return ERR_PTR(err); | ||
5890 | diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c | ||
5891 | index dc59eae54717..cc061495f653 100644 | ||
5892 | --- a/net/bluetooth/hci_conn.c | ||
5893 | +++ b/net/bluetooth/hci_conn.c | ||
5894 | @@ -749,18 +749,31 @@ static bool conn_use_rpa(struct hci_conn *conn) | ||
5895 | } | ||
5896 | |||
5897 | static void hci_req_add_le_create_conn(struct hci_request *req, | ||
5898 | - struct hci_conn *conn) | ||
5899 | + struct hci_conn *conn, | ||
5900 | + bdaddr_t *direct_rpa) | ||
5901 | { | ||
5902 | struct hci_cp_le_create_conn cp; | ||
5903 | struct hci_dev *hdev = conn->hdev; | ||
5904 | u8 own_addr_type; | ||
5905 | |||
5906 | - /* Update random address, but set require_privacy to false so | ||
5907 | - * that we never connect with an non-resolvable address. | ||
5908 | + /* If direct address was provided we use it instead of current | ||
5909 | + * address. | ||
5910 | */ | ||
5911 | - if (hci_update_random_address(req, false, conn_use_rpa(conn), | ||
5912 | - &own_addr_type)) | ||
5913 | - return; | ||
5914 | + if (direct_rpa) { | ||
5915 | + if (bacmp(&req->hdev->random_addr, direct_rpa)) | ||
5916 | + hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, | ||
5917 | + direct_rpa); | ||
5918 | + | ||
5919 | + /* direct address is always RPA */ | ||
5920 | + own_addr_type = ADDR_LE_DEV_RANDOM; | ||
5921 | + } else { | ||
5922 | + /* Update random address, but set require_privacy to false so | ||
5923 | + * that we never connect with an non-resolvable address. | ||
5924 | + */ | ||
5925 | + if (hci_update_random_address(req, false, conn_use_rpa(conn), | ||
5926 | + &own_addr_type)) | ||
5927 | + return; | ||
5928 | + } | ||
5929 | |||
5930 | memset(&cp, 0, sizeof(cp)); | ||
5931 | |||
5932 | @@ -825,7 +838,7 @@ static void hci_req_directed_advertising(struct hci_request *req, | ||
5933 | |||
5934 | struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, | ||
5935 | u8 dst_type, u8 sec_level, u16 conn_timeout, | ||
5936 | - u8 role) | ||
5937 | + u8 role, bdaddr_t *direct_rpa) | ||
5938 | { | ||
5939 | struct hci_conn_params *params; | ||
5940 | struct hci_conn *conn; | ||
5941 | @@ -940,7 +953,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, | ||
5942 | hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED); | ||
5943 | } | ||
5944 | |||
5945 | - hci_req_add_le_create_conn(&req, conn); | ||
5946 | + hci_req_add_le_create_conn(&req, conn, direct_rpa); | ||
5947 | |||
5948 | create_conn: | ||
5949 | err = hci_req_run(&req, create_le_conn_complete); | ||
5950 | diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c | ||
5951 | index e17aacbc5630..d2f9eb169ba8 100644 | ||
5952 | --- a/net/bluetooth/hci_event.c | ||
5953 | +++ b/net/bluetooth/hci_event.c | ||
5954 | @@ -4646,7 +4646,8 @@ static void hci_le_conn_update_complete_evt(struct hci_dev *hdev, | ||
5955 | /* This function requires the caller holds hdev->lock */ | ||
5956 | static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, | ||
5957 | bdaddr_t *addr, | ||
5958 | - u8 addr_type, u8 adv_type) | ||
5959 | + u8 addr_type, u8 adv_type, | ||
5960 | + bdaddr_t *direct_rpa) | ||
5961 | { | ||
5962 | struct hci_conn *conn; | ||
5963 | struct hci_conn_params *params; | ||
5964 | @@ -4697,7 +4698,8 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, | ||
5965 | } | ||
5966 | |||
5967 | conn = hci_connect_le(hdev, addr, addr_type, BT_SECURITY_LOW, | ||
5968 | - HCI_LE_AUTOCONN_TIMEOUT, HCI_ROLE_MASTER); | ||
5969 | + HCI_LE_AUTOCONN_TIMEOUT, HCI_ROLE_MASTER, | ||
5970 | + direct_rpa); | ||
5971 | if (!IS_ERR(conn)) { | ||
5972 | /* If HCI_AUTO_CONN_EXPLICIT is set, conn is already owned | ||
5973 | * by higher layer that tried to connect, if no then | ||
5974 | @@ -4807,8 +4809,13 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, | ||
5975 | bdaddr_type = irk->addr_type; | ||
5976 | } | ||
5977 | |||
5978 | - /* Check if we have been requested to connect to this device */ | ||
5979 | - conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type); | ||
5980 | + /* Check if we have been requested to connect to this device. | ||
5981 | + * | ||
5982 | + * direct_addr is set only for directed advertising reports (it is NULL | ||
5983 | + * for advertising reports) and is already verified to be RPA above. | ||
5984 | + */ | ||
5985 | + conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type, | ||
5986 | + direct_addr); | ||
5987 | if (conn && type == LE_ADV_IND) { | ||
5988 | /* Store report for later inclusion by | ||
5989 | * mgmt_device_connected | ||
5990 | diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c | ||
5991 | index 2bbca23a9d05..1fc23cb4a3e0 100644 | ||
5992 | --- a/net/bluetooth/l2cap_core.c | ||
5993 | +++ b/net/bluetooth/l2cap_core.c | ||
5994 | @@ -7148,7 +7148,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, | ||
5995 | hcon = hci_connect_le(hdev, dst, dst_type, | ||
5996 | chan->sec_level, | ||
5997 | HCI_LE_CONN_TIMEOUT, | ||
5998 | - HCI_ROLE_SLAVE); | ||
5999 | + HCI_ROLE_SLAVE, NULL); | ||
6000 | else | ||
6001 | hcon = hci_connect_le_scan(hdev, dst, dst_type, | ||
6002 | chan->sec_level, | ||
6003 | diff --git a/net/rds/send.c b/net/rds/send.c | ||
6004 | index ef53d164e146..50241d30e16d 100644 | ||
6005 | --- a/net/rds/send.c | ||
6006 | +++ b/net/rds/send.c | ||
6007 | @@ -1,5 +1,5 @@ | ||
6008 | /* | ||
6009 | - * Copyright (c) 2006 Oracle. All rights reserved. | ||
6010 | + * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. | ||
6011 | * | ||
6012 | * This software is available to you under a choice of one of two | ||
6013 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6014 | @@ -983,10 +983,15 @@ static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn) | ||
6015 | if (conn->c_npaths == 0 && hash != 0) { | ||
6016 | rds_send_ping(conn); | ||
6017 | |||
6018 | - if (conn->c_npaths == 0) { | ||
6019 | - wait_event_interruptible(conn->c_hs_waitq, | ||
6020 | - (conn->c_npaths != 0)); | ||
6021 | - } | ||
6022 | + /* The underlying connection is not up yet. Need to wait | ||
6023 | + * until it is up to be sure that the non-zero c_path can be | ||
6024 | + * used. But if we are interrupted, we have to use the zero | ||
6025 | + * c_path in case the connection ends up being non-MP capable. | ||
6026 | + */ | ||
6027 | + if (conn->c_npaths == 0) | ||
6028 | + if (wait_event_interruptible(conn->c_hs_waitq, | ||
6029 | + conn->c_npaths != 0)) | ||
6030 | + hash = 0; | ||
6031 | if (conn->c_npaths == 1) | ||
6032 | hash = 0; | ||
6033 | } | ||
6034 | diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c | ||
6035 | index 79aec90259cd..4afd4149a632 100644 | ||
6036 | --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c | ||
6037 | +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c | ||
6038 | @@ -237,9 +237,6 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, | ||
6039 | |||
6040 | ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); | ||
6041 | |||
6042 | - err = crypto_ahash_init(req); | ||
6043 | - if (err) | ||
6044 | - goto out; | ||
6045 | err = crypto_ahash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength); | ||
6046 | if (err) | ||
6047 | goto out; | ||
6048 | diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c | ||
6049 | index 150334064071..ff5bc6363a79 100644 | ||
6050 | --- a/tools/perf/tests/code-reading.c | ||
6051 | +++ b/tools/perf/tests/code-reading.c | ||
6052 | @@ -224,8 +224,6 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, | ||
6053 | unsigned char buf2[BUFSZ]; | ||
6054 | size_t ret_len; | ||
6055 | u64 objdump_addr; | ||
6056 | - const char *objdump_name; | ||
6057 | - char decomp_name[KMOD_DECOMP_LEN]; | ||
6058 | int ret; | ||
6059 | |||
6060 | pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr); | ||
6061 | @@ -286,25 +284,9 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, | ||
6062 | state->done[state->done_cnt++] = al.map->start; | ||
6063 | } | ||
6064 | |||
6065 | - objdump_name = al.map->dso->long_name; | ||
6066 | - if (dso__needs_decompress(al.map->dso)) { | ||
6067 | - if (dso__decompress_kmodule_path(al.map->dso, objdump_name, | ||
6068 | - decomp_name, | ||
6069 | - sizeof(decomp_name)) < 0) { | ||
6070 | - pr_debug("decompression failed\n"); | ||
6071 | - return -1; | ||
6072 | - } | ||
6073 | - | ||
6074 | - objdump_name = decomp_name; | ||
6075 | - } | ||
6076 | - | ||
6077 | /* Read the object code using objdump */ | ||
6078 | objdump_addr = map__rip_2objdump(al.map, al.addr); | ||
6079 | - ret = read_via_objdump(objdump_name, objdump_addr, buf2, len); | ||
6080 | - | ||
6081 | - if (dso__needs_decompress(al.map->dso)) | ||
6082 | - unlink(objdump_name); | ||
6083 | - | ||
6084 | + ret = read_via_objdump(al.map->dso->long_name, objdump_addr, buf2, len); | ||
6085 | if (ret > 0) { | ||
6086 | /* | ||
6087 | * The kernel maps are inaccurate - assume objdump is right in | ||
6088 | diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | ||
6089 | index 7e27207d0f45..cac39532c057 100644 | ||
6090 | --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | ||
6091 | +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | ||
6092 | @@ -1300,6 +1300,7 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) | ||
6093 | intel_pt_clear_tx_flags(decoder); | ||
6094 | decoder->have_tma = false; | ||
6095 | decoder->cbr = 0; | ||
6096 | + decoder->timestamp_insn_cnt = 0; | ||
6097 | decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; | ||
6098 | decoder->overflow = true; | ||
6099 | return -EOVERFLOW; | ||
6100 | @@ -1522,6 +1523,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) | ||
6101 | case INTEL_PT_PSBEND: | ||
6102 | intel_pt_log("ERROR: Missing TIP after FUP\n"); | ||
6103 | decoder->pkt_state = INTEL_PT_STATE_ERR3; | ||
6104 | + decoder->pkt_step = 0; | ||
6105 | return -ENOENT; | ||
6106 | |||
6107 | case INTEL_PT_OVF: | ||
6108 | @@ -2182,14 +2184,6 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) | ||
6109 | return &decoder->state; | ||
6110 | } | ||
6111 | |||
6112 | -static bool intel_pt_at_psb(unsigned char *buf, size_t len) | ||
6113 | -{ | ||
6114 | - if (len < INTEL_PT_PSB_LEN) | ||
6115 | - return false; | ||
6116 | - return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR, | ||
6117 | - INTEL_PT_PSB_LEN); | ||
6118 | -} | ||
6119 | - | ||
6120 | /** | ||
6121 | * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet. | ||
6122 | * @buf: pointer to buffer pointer | ||
6123 | @@ -2278,6 +2272,7 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len) | ||
6124 | * @buf: buffer | ||
6125 | * @len: size of buffer | ||
6126 | * @tsc: TSC value returned | ||
6127 | + * @rem: returns remaining size when TSC is found | ||
6128 | * | ||
6129 | * Find a TSC packet in @buf and return the TSC value. This function assumes | ||
6130 | * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a | ||
6131 | @@ -2285,7 +2280,8 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len) | ||
6132 | * | ||
6133 | * Return: %true if TSC is found, false otherwise. | ||
6134 | */ | ||
6135 | -static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc) | ||
6136 | +static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc, | ||
6137 | + size_t *rem) | ||
6138 | { | ||
6139 | struct intel_pt_pkt packet; | ||
6140 | int ret; | ||
6141 | @@ -2296,6 +2292,7 @@ static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc) | ||
6142 | return false; | ||
6143 | if (packet.type == INTEL_PT_TSC) { | ||
6144 | *tsc = packet.payload; | ||
6145 | + *rem = len; | ||
6146 | return true; | ||
6147 | } | ||
6148 | if (packet.type == INTEL_PT_PSBEND) | ||
6149 | @@ -2346,6 +2343,8 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) | ||
6150 | * @len_a: size of first buffer | ||
6151 | * @buf_b: second buffer | ||
6152 | * @len_b: size of second buffer | ||
6153 | + * @consecutive: returns true if there is data in buf_b that is consecutive | ||
6154 | + * to buf_a | ||
6155 | * | ||
6156 | * If the trace contains TSC we can look at the last TSC of @buf_a and the | ||
6157 | * first TSC of @buf_b in order to determine if the buffers overlap, and then | ||
6158 | @@ -2358,33 +2357,41 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) | ||
6159 | static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, | ||
6160 | size_t len_a, | ||
6161 | unsigned char *buf_b, | ||
6162 | - size_t len_b) | ||
6163 | + size_t len_b, bool *consecutive) | ||
6164 | { | ||
6165 | uint64_t tsc_a, tsc_b; | ||
6166 | unsigned char *p; | ||
6167 | - size_t len; | ||
6168 | + size_t len, rem_a, rem_b; | ||
6169 | |||
6170 | p = intel_pt_last_psb(buf_a, len_a); | ||
6171 | if (!p) | ||
6172 | return buf_b; /* No PSB in buf_a => no overlap */ | ||
6173 | |||
6174 | len = len_a - (p - buf_a); | ||
6175 | - if (!intel_pt_next_tsc(p, len, &tsc_a)) { | ||
6176 | + if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) { | ||
6177 | /* The last PSB+ in buf_a is incomplete, so go back one more */ | ||
6178 | len_a -= len; | ||
6179 | p = intel_pt_last_psb(buf_a, len_a); | ||
6180 | if (!p) | ||
6181 | return buf_b; /* No full PSB+ => assume no overlap */ | ||
6182 | len = len_a - (p - buf_a); | ||
6183 | - if (!intel_pt_next_tsc(p, len, &tsc_a)) | ||
6184 | + if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) | ||
6185 | return buf_b; /* No TSC in buf_a => assume no overlap */ | ||
6186 | } | ||
6187 | |||
6188 | while (1) { | ||
6189 | /* Ignore PSB+ with no TSC */ | ||
6190 | - if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) && | ||
6191 | - intel_pt_tsc_cmp(tsc_a, tsc_b) < 0) | ||
6192 | - return buf_b; /* tsc_a < tsc_b => no overlap */ | ||
6193 | + if (intel_pt_next_tsc(buf_b, len_b, &tsc_b, &rem_b)) { | ||
6194 | + int cmp = intel_pt_tsc_cmp(tsc_a, tsc_b); | ||
6195 | + | ||
6196 | + /* Same TSC, so buffers are consecutive */ | ||
6197 | + if (!cmp && rem_b >= rem_a) { | ||
6198 | + *consecutive = true; | ||
6199 | + return buf_b + len_b - (rem_b - rem_a); | ||
6200 | + } | ||
6201 | + if (cmp < 0) | ||
6202 | + return buf_b; /* tsc_a < tsc_b => no overlap */ | ||
6203 | + } | ||
6204 | |||
6205 | if (!intel_pt_step_psb(&buf_b, &len_b)) | ||
6206 | return buf_b + len_b; /* No PSB in buf_b => no data */ | ||
6207 | @@ -2398,6 +2405,8 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, | ||
6208 | * @buf_b: second buffer | ||
6209 | * @len_b: size of second buffer | ||
6210 | * @have_tsc: can use TSC packets to detect overlap | ||
6211 | + * @consecutive: returns true if there is data in buf_b that is consecutive | ||
6212 | + * to buf_a | ||
6213 | * | ||
6214 | * When trace samples or snapshots are recorded there is the possibility that | ||
6215 | * the data overlaps. Note that, for the purposes of decoding, data is only | ||
6216 | @@ -2408,7 +2417,7 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, | ||
6217 | */ | ||
6218 | unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, | ||
6219 | unsigned char *buf_b, size_t len_b, | ||
6220 | - bool have_tsc) | ||
6221 | + bool have_tsc, bool *consecutive) | ||
6222 | { | ||
6223 | unsigned char *found; | ||
6224 | |||
6225 | @@ -2420,7 +2429,8 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, | ||
6226 | return buf_b; /* No overlap */ | ||
6227 | |||
6228 | if (have_tsc) { | ||
6229 | - found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b); | ||
6230 | + found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b, | ||
6231 | + consecutive); | ||
6232 | if (found) | ||
6233 | return found; | ||
6234 | } | ||
6235 | @@ -2435,28 +2445,16 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, | ||
6236 | } | ||
6237 | |||
6238 | /* Now len_b >= len_a */ | ||
6239 | - if (len_b > len_a) { | ||
6240 | - /* The leftover buffer 'b' must start at a PSB */ | ||
6241 | - while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { | ||
6242 | - if (!intel_pt_step_psb(&buf_a, &len_a)) | ||
6243 | - return buf_b; /* No overlap */ | ||
6244 | - } | ||
6245 | - } | ||
6246 | - | ||
6247 | while (1) { | ||
6248 | /* Potential overlap so check the bytes */ | ||
6249 | found = memmem(buf_a, len_a, buf_b, len_a); | ||
6250 | - if (found) | ||
6251 | + if (found) { | ||
6252 | + *consecutive = true; | ||
6253 | return buf_b + len_a; | ||
6254 | + } | ||
6255 | |||
6256 | /* Try again at next PSB in buffer 'a' */ | ||
6257 | if (!intel_pt_step_psb(&buf_a, &len_a)) | ||
6258 | return buf_b; /* No overlap */ | ||
6259 | - | ||
6260 | - /* The leftover buffer 'b' must start at a PSB */ | ||
6261 | - while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { | ||
6262 | - if (!intel_pt_step_psb(&buf_a, &len_a)) | ||
6263 | - return buf_b; /* No overlap */ | ||
6264 | - } | ||
6265 | } | ||
6266 | } | ||
6267 | diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h | ||
6268 | index 89399985fa4d..9ae4df1dcedc 100644 | ||
6269 | --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h | ||
6270 | +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h | ||
6271 | @@ -103,7 +103,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder); | ||
6272 | |||
6273 | unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, | ||
6274 | unsigned char *buf_b, size_t len_b, | ||
6275 | - bool have_tsc); | ||
6276 | + bool have_tsc, bool *consecutive); | ||
6277 | |||
6278 | int intel_pt__strerror(int code, char *buf, size_t buflen); | ||
6279 | |||
6280 | diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c | ||
6281 | index dc041d4368c8..b1161d725ce9 100644 | ||
6282 | --- a/tools/perf/util/intel-pt.c | ||
6283 | +++ b/tools/perf/util/intel-pt.c | ||
6284 | @@ -131,6 +131,7 @@ struct intel_pt_queue { | ||
6285 | bool stop; | ||
6286 | bool step_through_buffers; | ||
6287 | bool use_buffer_pid_tid; | ||
6288 | + bool sync_switch; | ||
6289 | pid_t pid, tid; | ||
6290 | int cpu; | ||
6291 | int switch_state; | ||
6292 | @@ -194,14 +195,17 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, | ||
6293 | static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, | ||
6294 | struct auxtrace_buffer *b) | ||
6295 | { | ||
6296 | + bool consecutive = false; | ||
6297 | void *start; | ||
6298 | |||
6299 | start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, | ||
6300 | - pt->have_tsc); | ||
6301 | + pt->have_tsc, &consecutive); | ||
6302 | if (!start) | ||
6303 | return -EINVAL; | ||
6304 | b->use_size = b->data + b->size - start; | ||
6305 | b->use_data = start; | ||
6306 | + if (b->use_size && consecutive) | ||
6307 | + b->consecutive = true; | ||
6308 | return 0; | ||
6309 | } | ||
6310 | |||
6311 | @@ -928,10 +932,12 @@ static int intel_pt_setup_queue(struct intel_pt *pt, | ||
6312 | if (pt->timeless_decoding || !pt->have_sched_switch) | ||
6313 | ptq->use_buffer_pid_tid = true; | ||
6314 | } | ||
6315 | + | ||
6316 | + ptq->sync_switch = pt->sync_switch; | ||
6317 | } | ||
6318 | |||
6319 | if (!ptq->on_heap && | ||
6320 | - (!pt->sync_switch || | ||
6321 | + (!ptq->sync_switch || | ||
6322 | ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) { | ||
6323 | const struct intel_pt_state *state; | ||
6324 | int ret; | ||
6325 | @@ -1333,7 +1339,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) | ||
6326 | if (pt->synth_opts.last_branch) | ||
6327 | intel_pt_update_last_branch_rb(ptq); | ||
6328 | |||
6329 | - if (!pt->sync_switch) | ||
6330 | + if (!ptq->sync_switch) | ||
6331 | return 0; | ||
6332 | |||
6333 | if (intel_pt_is_switch_ip(ptq, state->to_ip)) { | ||
6334 | @@ -1414,6 +1420,21 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip) | ||
6335 | return switch_ip; | ||
6336 | } | ||
6337 | |||
6338 | +static void intel_pt_enable_sync_switch(struct intel_pt *pt) | ||
6339 | +{ | ||
6340 | + unsigned int i; | ||
6341 | + | ||
6342 | + pt->sync_switch = true; | ||
6343 | + | ||
6344 | + for (i = 0; i < pt->queues.nr_queues; i++) { | ||
6345 | + struct auxtrace_queue *queue = &pt->queues.queue_array[i]; | ||
6346 | + struct intel_pt_queue *ptq = queue->priv; | ||
6347 | + | ||
6348 | + if (ptq) | ||
6349 | + ptq->sync_switch = true; | ||
6350 | + } | ||
6351 | +} | ||
6352 | + | ||
6353 | static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) | ||
6354 | { | ||
6355 | const struct intel_pt_state *state = ptq->state; | ||
6356 | @@ -1430,7 +1451,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) | ||
6357 | if (pt->switch_ip) { | ||
6358 | intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", | ||
6359 | pt->switch_ip, pt->ptss_ip); | ||
6360 | - pt->sync_switch = true; | ||
6361 | + intel_pt_enable_sync_switch(pt); | ||
6362 | } | ||
6363 | } | ||
6364 | } | ||
6365 | @@ -1446,9 +1467,9 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) | ||
6366 | if (state->err) { | ||
6367 | if (state->err == INTEL_PT_ERR_NODATA) | ||
6368 | return 1; | ||
6369 | - if (pt->sync_switch && | ||
6370 | + if (ptq->sync_switch && | ||
6371 | state->from_ip >= pt->kernel_start) { | ||
6372 | - pt->sync_switch = false; | ||
6373 | + ptq->sync_switch = false; | ||
6374 | intel_pt_next_tid(pt, ptq); | ||
6375 | } | ||
6376 | if (pt->synth_opts.errors) { | ||
6377 | @@ -1474,7 +1495,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) | ||
6378 | state->timestamp, state->est_timestamp); | ||
6379 | ptq->timestamp = state->est_timestamp; | ||
6380 | /* Use estimated TSC in unknown switch state */ | ||
6381 | - } else if (pt->sync_switch && | ||
6382 | + } else if (ptq->sync_switch && | ||
6383 | ptq->switch_state == INTEL_PT_SS_UNKNOWN && | ||
6384 | intel_pt_is_switch_ip(ptq, state->to_ip) && | ||
6385 | ptq->next_tid == -1) { | ||
6386 | @@ -1621,7 +1642,7 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, | ||
6387 | return 1; | ||
6388 | |||
6389 | ptq = intel_pt_cpu_to_ptq(pt, cpu); | ||
6390 | - if (!ptq) | ||
6391 | + if (!ptq || !ptq->sync_switch) | ||
6392 | return 1; | ||
6393 | |||
6394 | switch (ptq->switch_state) { |