Magellan Linux

Contents of /trunk/kernel-alx-legacy/patches-4.9/0194-4.9.95-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3608 - (show annotations) (download)
Fri Aug 14 07:34:29 2020 UTC (3 years, 8 months ago) by niro
File size: 186555 byte(s)
-added kerenl-alx-legacy pkg
1 diff --git a/Makefile b/Makefile
2 index 02188cf8e9af..1aeec9df709d 100644
3 --- a/Makefile
4 +++ b/Makefile
5 @@ -1,6 +1,6 @@
6 VERSION = 4
7 PATCHLEVEL = 9
8 -SUBLEVEL = 94
9 +SUBLEVEL = 95
10 EXTRAVERSION =
11 NAME = Roaring Lionus
12
13 diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
14 index d5423ab15ed5..9fe1043e72d2 100644
15 --- a/arch/arm/include/asm/kvm_host.h
16 +++ b/arch/arm/include/asm/kvm_host.h
17 @@ -318,4 +318,10 @@ static inline int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
18 return -ENXIO;
19 }
20
21 +static inline bool kvm_arm_harden_branch_predictor(void)
22 +{
23 + /* No way to detect it yet, pretend it is not there. */
24 + return false;
25 +}
26 +
27 #endif /* __ARM_KVM_HOST_H__ */
28 diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
29 index a58bbaa3ec60..d10e36235438 100644
30 --- a/arch/arm/include/asm/kvm_mmu.h
31 +++ b/arch/arm/include/asm/kvm_mmu.h
32 @@ -223,6 +223,16 @@ static inline unsigned int kvm_get_vmid_bits(void)
33 return 8;
34 }
35
36 +static inline void *kvm_get_hyp_vector(void)
37 +{
38 + return kvm_ksym_ref(__kvm_hyp_vector);
39 +}
40 +
41 +static inline int kvm_map_vectors(void)
42 +{
43 + return 0;
44 +}
45 +
46 #endif /* !__ASSEMBLY__ */
47
48 #endif /* __ARM_KVM_MMU_H__ */
49 diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h
50 deleted file mode 100644
51 index 6bda945d31fa..000000000000
52 --- a/arch/arm/include/asm/kvm_psci.h
53 +++ /dev/null
54 @@ -1,27 +0,0 @@
55 -/*
56 - * Copyright (C) 2012 - ARM Ltd
57 - * Author: Marc Zyngier <marc.zyngier@arm.com>
58 - *
59 - * This program is free software; you can redistribute it and/or modify
60 - * it under the terms of the GNU General Public License version 2 as
61 - * published by the Free Software Foundation.
62 - *
63 - * This program is distributed in the hope that it will be useful,
64 - * but WITHOUT ANY WARRANTY; without even the implied warranty of
65 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
66 - * GNU General Public License for more details.
67 - *
68 - * You should have received a copy of the GNU General Public License
69 - * along with this program. If not, see <http://www.gnu.org/licenses/>.
70 - */
71 -
72 -#ifndef __ARM_KVM_PSCI_H__
73 -#define __ARM_KVM_PSCI_H__
74 -
75 -#define KVM_ARM_PSCI_0_1 1
76 -#define KVM_ARM_PSCI_0_2 2
77 -
78 -int kvm_psci_version(struct kvm_vcpu *vcpu);
79 -int kvm_psci_call(struct kvm_vcpu *vcpu);
80 -
81 -#endif /* __ARM_KVM_PSCI_H__ */
82 diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
83 index c38bfbeec306..ef6595c7d697 100644
84 --- a/arch/arm/kvm/arm.c
85 +++ b/arch/arm/kvm/arm.c
86 @@ -29,6 +29,7 @@
87 #include <linux/kvm.h>
88 #include <trace/events/kvm.h>
89 #include <kvm/arm_pmu.h>
90 +#include <kvm/arm_psci.h>
91
92 #define CREATE_TRACE_POINTS
93 #include "trace.h"
94 @@ -44,7 +45,6 @@
95 #include <asm/kvm_mmu.h>
96 #include <asm/kvm_emulate.h>
97 #include <asm/kvm_coproc.h>
98 -#include <asm/kvm_psci.h>
99 #include <asm/sections.h>
100
101 #ifdef REQUIRES_VIRT
102 @@ -1088,7 +1088,7 @@ static void cpu_init_hyp_mode(void *dummy)
103 pgd_ptr = kvm_mmu_get_httbr();
104 stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
105 hyp_stack_ptr = stack_page + PAGE_SIZE;
106 - vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector);
107 + vector_ptr = (unsigned long)kvm_get_hyp_vector();
108
109 __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
110 __cpu_init_stage2();
111 @@ -1345,6 +1345,13 @@ static int init_hyp_mode(void)
112 goto out_err;
113 }
114
115 +
116 + err = kvm_map_vectors();
117 + if (err) {
118 + kvm_err("Cannot map vectors\n");
119 + goto out_err;
120 + }
121 +
122 /*
123 * Map the Hyp stack pages
124 */
125 diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
126 index 4e57ebca6e69..de1aedce2a8b 100644
127 --- a/arch/arm/kvm/handle_exit.c
128 +++ b/arch/arm/kvm/handle_exit.c
129 @@ -21,7 +21,7 @@
130 #include <asm/kvm_emulate.h>
131 #include <asm/kvm_coproc.h>
132 #include <asm/kvm_mmu.h>
133 -#include <asm/kvm_psci.h>
134 +#include <kvm/arm_psci.h>
135 #include <trace/events/kvm.h>
136
137 #include "trace.h"
138 @@ -36,7 +36,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
139 kvm_vcpu_hvc_get_imm(vcpu));
140 vcpu->stat.hvc_exit_stat++;
141
142 - ret = kvm_psci_call(vcpu);
143 + ret = kvm_hvc_call_handler(vcpu);
144 if (ret < 0) {
145 vcpu_set_reg(vcpu, 0, ~0UL);
146 return 1;
147 diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
148 index a08d7a93aebb..3d962257c166 100644
149 --- a/arch/arm/kvm/psci.c
150 +++ b/arch/arm/kvm/psci.c
151 @@ -15,16 +15,16 @@
152 * along with this program. If not, see <http://www.gnu.org/licenses/>.
153 */
154
155 +#include <linux/arm-smccc.h>
156 #include <linux/preempt.h>
157 #include <linux/kvm_host.h>
158 #include <linux/wait.h>
159
160 #include <asm/cputype.h>
161 #include <asm/kvm_emulate.h>
162 -#include <asm/kvm_psci.h>
163 #include <asm/kvm_host.h>
164
165 -#include <uapi/linux/psci.h>
166 +#include <kvm/arm_psci.h>
167
168 /*
169 * This is an implementation of the Power State Coordination Interface
170 @@ -33,6 +33,38 @@
171
172 #define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
173
174 +static u32 smccc_get_function(struct kvm_vcpu *vcpu)
175 +{
176 + return vcpu_get_reg(vcpu, 0);
177 +}
178 +
179 +static unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu)
180 +{
181 + return vcpu_get_reg(vcpu, 1);
182 +}
183 +
184 +static unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu)
185 +{
186 + return vcpu_get_reg(vcpu, 2);
187 +}
188 +
189 +static unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu)
190 +{
191 + return vcpu_get_reg(vcpu, 3);
192 +}
193 +
194 +static void smccc_set_retval(struct kvm_vcpu *vcpu,
195 + unsigned long a0,
196 + unsigned long a1,
197 + unsigned long a2,
198 + unsigned long a3)
199 +{
200 + vcpu_set_reg(vcpu, 0, a0);
201 + vcpu_set_reg(vcpu, 1, a1);
202 + vcpu_set_reg(vcpu, 2, a2);
203 + vcpu_set_reg(vcpu, 3, a3);
204 +}
205 +
206 static unsigned long psci_affinity_mask(unsigned long affinity_level)
207 {
208 if (affinity_level <= 3)
209 @@ -75,7 +107,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
210 unsigned long context_id;
211 phys_addr_t target_pc;
212
213 - cpu_id = vcpu_get_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK;
214 + cpu_id = smccc_get_arg1(source_vcpu) & MPIDR_HWID_BITMASK;
215 if (vcpu_mode_is_32bit(source_vcpu))
216 cpu_id &= ~((u32) 0);
217
218 @@ -88,14 +120,14 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
219 if (!vcpu)
220 return PSCI_RET_INVALID_PARAMS;
221 if (!vcpu->arch.power_off) {
222 - if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
223 + if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1)
224 return PSCI_RET_ALREADY_ON;
225 else
226 return PSCI_RET_INVALID_PARAMS;
227 }
228
229 - target_pc = vcpu_get_reg(source_vcpu, 2);
230 - context_id = vcpu_get_reg(source_vcpu, 3);
231 + target_pc = smccc_get_arg2(source_vcpu);
232 + context_id = smccc_get_arg3(source_vcpu);
233
234 kvm_reset_vcpu(vcpu);
235
236 @@ -114,7 +146,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
237 * NOTE: We always update r0 (or x0) because for PSCI v0.1
238 * the general puspose registers are undefined upon CPU_ON.
239 */
240 - vcpu_set_reg(vcpu, 0, context_id);
241 + smccc_set_retval(vcpu, context_id, 0, 0, 0);
242 vcpu->arch.power_off = false;
243 smp_mb(); /* Make sure the above is visible */
244
245 @@ -134,8 +166,8 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
246 struct kvm *kvm = vcpu->kvm;
247 struct kvm_vcpu *tmp;
248
249 - target_affinity = vcpu_get_reg(vcpu, 1);
250 - lowest_affinity_level = vcpu_get_reg(vcpu, 2);
251 + target_affinity = smccc_get_arg1(vcpu);
252 + lowest_affinity_level = smccc_get_arg2(vcpu);
253
254 /* Determine target affinity mask */
255 target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
256 @@ -198,18 +230,10 @@ static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
257 kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
258 }
259
260 -int kvm_psci_version(struct kvm_vcpu *vcpu)
261 -{
262 - if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
263 - return KVM_ARM_PSCI_0_2;
264 -
265 - return KVM_ARM_PSCI_0_1;
266 -}
267 -
268 static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
269 {
270 struct kvm *kvm = vcpu->kvm;
271 - unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
272 + unsigned long psci_fn = smccc_get_function(vcpu);
273 unsigned long val;
274 int ret = 1;
275
276 @@ -219,7 +243,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
277 * Bits[31:16] = Major Version = 0
278 * Bits[15:0] = Minor Version = 2
279 */
280 - val = 2;
281 + val = KVM_ARM_PSCI_0_2;
282 break;
283 case PSCI_0_2_FN_CPU_SUSPEND:
284 case PSCI_0_2_FN64_CPU_SUSPEND:
285 @@ -276,14 +300,56 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
286 break;
287 }
288
289 - vcpu_set_reg(vcpu, 0, val);
290 + smccc_set_retval(vcpu, val, 0, 0, 0);
291 + return ret;
292 +}
293 +
294 +static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu)
295 +{
296 + u32 psci_fn = smccc_get_function(vcpu);
297 + u32 feature;
298 + unsigned long val;
299 + int ret = 1;
300 +
301 + switch(psci_fn) {
302 + case PSCI_0_2_FN_PSCI_VERSION:
303 + val = KVM_ARM_PSCI_1_0;
304 + break;
305 + case PSCI_1_0_FN_PSCI_FEATURES:
306 + feature = smccc_get_arg1(vcpu);
307 + switch(feature) {
308 + case PSCI_0_2_FN_PSCI_VERSION:
309 + case PSCI_0_2_FN_CPU_SUSPEND:
310 + case PSCI_0_2_FN64_CPU_SUSPEND:
311 + case PSCI_0_2_FN_CPU_OFF:
312 + case PSCI_0_2_FN_CPU_ON:
313 + case PSCI_0_2_FN64_CPU_ON:
314 + case PSCI_0_2_FN_AFFINITY_INFO:
315 + case PSCI_0_2_FN64_AFFINITY_INFO:
316 + case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
317 + case PSCI_0_2_FN_SYSTEM_OFF:
318 + case PSCI_0_2_FN_SYSTEM_RESET:
319 + case PSCI_1_0_FN_PSCI_FEATURES:
320 + case ARM_SMCCC_VERSION_FUNC_ID:
321 + val = 0;
322 + break;
323 + default:
324 + val = PSCI_RET_NOT_SUPPORTED;
325 + break;
326 + }
327 + break;
328 + default:
329 + return kvm_psci_0_2_call(vcpu);
330 + }
331 +
332 + smccc_set_retval(vcpu, val, 0, 0, 0);
333 return ret;
334 }
335
336 static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
337 {
338 struct kvm *kvm = vcpu->kvm;
339 - unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
340 + unsigned long psci_fn = smccc_get_function(vcpu);
341 unsigned long val;
342
343 switch (psci_fn) {
344 @@ -301,7 +367,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
345 break;
346 }
347
348 - vcpu_set_reg(vcpu, 0, val);
349 + smccc_set_retval(vcpu, val, 0, 0, 0);
350 return 1;
351 }
352
353 @@ -319,9 +385,11 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
354 * Errors:
355 * -EINVAL: Unrecognized PSCI function
356 */
357 -int kvm_psci_call(struct kvm_vcpu *vcpu)
358 +static int kvm_psci_call(struct kvm_vcpu *vcpu)
359 {
360 - switch (kvm_psci_version(vcpu)) {
361 + switch (kvm_psci_version(vcpu, vcpu->kvm)) {
362 + case KVM_ARM_PSCI_1_0:
363 + return kvm_psci_1_0_call(vcpu);
364 case KVM_ARM_PSCI_0_2:
365 return kvm_psci_0_2_call(vcpu);
366 case KVM_ARM_PSCI_0_1:
367 @@ -330,3 +398,30 @@ int kvm_psci_call(struct kvm_vcpu *vcpu)
368 return -EINVAL;
369 };
370 }
371 +
372 +int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
373 +{
374 + u32 func_id = smccc_get_function(vcpu);
375 + u32 val = PSCI_RET_NOT_SUPPORTED;
376 + u32 feature;
377 +
378 + switch (func_id) {
379 + case ARM_SMCCC_VERSION_FUNC_ID:
380 + val = ARM_SMCCC_VERSION_1_1;
381 + break;
382 + case ARM_SMCCC_ARCH_FEATURES_FUNC_ID:
383 + feature = smccc_get_arg1(vcpu);
384 + switch(feature) {
385 + case ARM_SMCCC_ARCH_WORKAROUND_1:
386 + if (kvm_arm_harden_branch_predictor())
387 + val = 0;
388 + break;
389 + }
390 + break;
391 + default:
392 + return kvm_psci_call(vcpu);
393 + }
394 +
395 + smccc_set_retval(vcpu, val, 0, 0, 0);
396 + return 1;
397 +}
398 diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
399 index c8471cf46cbb..90e58bbbd858 100644
400 --- a/arch/arm64/Kconfig
401 +++ b/arch/arm64/Kconfig
402 @@ -745,6 +745,23 @@ config UNMAP_KERNEL_AT_EL0
403
404 If unsure, say Y.
405
406 +config HARDEN_BRANCH_PREDICTOR
407 + bool "Harden the branch predictor against aliasing attacks" if EXPERT
408 + default y
409 + help
410 + Speculation attacks against some high-performance processors rely on
411 + being able to manipulate the branch predictor for a victim context by
412 + executing aliasing branches in the attacker context. Such attacks
413 + can be partially mitigated against by clearing internal branch
414 + predictor state and limiting the prediction logic in some situations.
415 +
416 + This config option will take CPU-specific actions to harden the
417 + branch predictor against aliasing attacks and may rely on specific
418 + instruction sequences or control bits being set by the system
419 + firmware.
420 +
421 + If unsure, say Y.
422 +
423 menuconfig ARMV8_DEPRECATED
424 bool "Emulate deprecated/obsolete ARMv8 instructions"
425 depends on COMPAT
426 diff --git a/arch/arm64/crypto/sha256-core.S b/arch/arm64/crypto/sha256-core.S
427 new file mode 100644
428 index 000000000000..3ce82cc860bc
429 --- /dev/null
430 +++ b/arch/arm64/crypto/sha256-core.S
431 @@ -0,0 +1,2061 @@
432 +// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
433 +//
434 +// Licensed under the OpenSSL license (the "License"). You may not use
435 +// this file except in compliance with the License. You can obtain a copy
436 +// in the file LICENSE in the source distribution or at
437 +// https://www.openssl.org/source/license.html
438 +
439 +// ====================================================================
440 +// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
441 +// project. The module is, however, dual licensed under OpenSSL and
442 +// CRYPTOGAMS licenses depending on where you obtain it. For further
443 +// details see http://www.openssl.org/~appro/cryptogams/.
444 +//
445 +// Permission to use under GPLv2 terms is granted.
446 +// ====================================================================
447 +//
448 +// SHA256/512 for ARMv8.
449 +//
450 +// Performance in cycles per processed byte and improvement coefficient
451 +// over code generated with "default" compiler:
452 +//
453 +// SHA256-hw SHA256(*) SHA512
454 +// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**))
455 +// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***))
456 +// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***))
457 +// Denver 2.01 10.5 (+26%) 6.70 (+8%)
458 +// X-Gene 20.0 (+100%) 12.8 (+300%(***))
459 +// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
460 +//
461 +// (*) Software SHA256 results are of lesser relevance, presented
462 +// mostly for informational purposes.
463 +// (**) The result is a trade-off: it's possible to improve it by
464 +// 10% (or by 1 cycle per round), but at the cost of 20% loss
465 +// on Cortex-A53 (or by 4 cycles per round).
466 +// (***) Super-impressive coefficients over gcc-generated code are
467 +// indication of some compiler "pathology", most notably code
468 +// generated with -mgeneral-regs-only is significanty faster
469 +// and the gap is only 40-90%.
470 +//
471 +// October 2016.
472 +//
473 +// Originally it was reckoned that it makes no sense to implement NEON
474 +// version of SHA256 for 64-bit processors. This is because performance
475 +// improvement on most wide-spread Cortex-A5x processors was observed
476 +// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
477 +// observed that 32-bit NEON SHA256 performs significantly better than
478 +// 64-bit scalar version on *some* of the more recent processors. As
479 +// result 64-bit NEON version of SHA256 was added to provide best
480 +// all-round performance. For example it executes ~30% faster on X-Gene
481 +// and Mongoose. [For reference, NEON version of SHA512 is bound to
482 +// deliver much less improvement, likely *negative* on Cortex-A5x.
483 +// Which is why NEON support is limited to SHA256.]
484 +
485 +#ifndef __KERNEL__
486 +# include "arm_arch.h"
487 +#endif
488 +
489 +.text
490 +
491 +.extern OPENSSL_armcap_P
492 +.globl sha256_block_data_order
493 +.type sha256_block_data_order,%function
494 +.align 6
495 +sha256_block_data_order:
496 +#ifndef __KERNEL__
497 +# ifdef __ILP32__
498 + ldrsw x16,.LOPENSSL_armcap_P
499 +# else
500 + ldr x16,.LOPENSSL_armcap_P
501 +# endif
502 + adr x17,.LOPENSSL_armcap_P
503 + add x16,x16,x17
504 + ldr w16,[x16]
505 + tst w16,#ARMV8_SHA256
506 + b.ne .Lv8_entry
507 + tst w16,#ARMV7_NEON
508 + b.ne .Lneon_entry
509 +#endif
510 + stp x29,x30,[sp,#-128]!
511 + add x29,sp,#0
512 +
513 + stp x19,x20,[sp,#16]
514 + stp x21,x22,[sp,#32]
515 + stp x23,x24,[sp,#48]
516 + stp x25,x26,[sp,#64]
517 + stp x27,x28,[sp,#80]
518 + sub sp,sp,#4*4
519 +
520 + ldp w20,w21,[x0] // load context
521 + ldp w22,w23,[x0,#2*4]
522 + ldp w24,w25,[x0,#4*4]
523 + add x2,x1,x2,lsl#6 // end of input
524 + ldp w26,w27,[x0,#6*4]
525 + adr x30,.LK256
526 + stp x0,x2,[x29,#96]
527 +
528 +.Loop:
529 + ldp w3,w4,[x1],#2*4
530 + ldr w19,[x30],#4 // *K++
531 + eor w28,w21,w22 // magic seed
532 + str x1,[x29,#112]
533 +#ifndef __AARCH64EB__
534 + rev w3,w3 // 0
535 +#endif
536 + ror w16,w24,#6
537 + add w27,w27,w19 // h+=K[i]
538 + eor w6,w24,w24,ror#14
539 + and w17,w25,w24
540 + bic w19,w26,w24
541 + add w27,w27,w3 // h+=X[i]
542 + orr w17,w17,w19 // Ch(e,f,g)
543 + eor w19,w20,w21 // a^b, b^c in next round
544 + eor w16,w16,w6,ror#11 // Sigma1(e)
545 + ror w6,w20,#2
546 + add w27,w27,w17 // h+=Ch(e,f,g)
547 + eor w17,w20,w20,ror#9
548 + add w27,w27,w16 // h+=Sigma1(e)
549 + and w28,w28,w19 // (b^c)&=(a^b)
550 + add w23,w23,w27 // d+=h
551 + eor w28,w28,w21 // Maj(a,b,c)
552 + eor w17,w6,w17,ror#13 // Sigma0(a)
553 + add w27,w27,w28 // h+=Maj(a,b,c)
554 + ldr w28,[x30],#4 // *K++, w19 in next round
555 + //add w27,w27,w17 // h+=Sigma0(a)
556 +#ifndef __AARCH64EB__
557 + rev w4,w4 // 1
558 +#endif
559 + ldp w5,w6,[x1],#2*4
560 + add w27,w27,w17 // h+=Sigma0(a)
561 + ror w16,w23,#6
562 + add w26,w26,w28 // h+=K[i]
563 + eor w7,w23,w23,ror#14
564 + and w17,w24,w23
565 + bic w28,w25,w23
566 + add w26,w26,w4 // h+=X[i]
567 + orr w17,w17,w28 // Ch(e,f,g)
568 + eor w28,w27,w20 // a^b, b^c in next round
569 + eor w16,w16,w7,ror#11 // Sigma1(e)
570 + ror w7,w27,#2
571 + add w26,w26,w17 // h+=Ch(e,f,g)
572 + eor w17,w27,w27,ror#9
573 + add w26,w26,w16 // h+=Sigma1(e)
574 + and w19,w19,w28 // (b^c)&=(a^b)
575 + add w22,w22,w26 // d+=h
576 + eor w19,w19,w20 // Maj(a,b,c)
577 + eor w17,w7,w17,ror#13 // Sigma0(a)
578 + add w26,w26,w19 // h+=Maj(a,b,c)
579 + ldr w19,[x30],#4 // *K++, w28 in next round
580 + //add w26,w26,w17 // h+=Sigma0(a)
581 +#ifndef __AARCH64EB__
582 + rev w5,w5 // 2
583 +#endif
584 + add w26,w26,w17 // h+=Sigma0(a)
585 + ror w16,w22,#6
586 + add w25,w25,w19 // h+=K[i]
587 + eor w8,w22,w22,ror#14
588 + and w17,w23,w22
589 + bic w19,w24,w22
590 + add w25,w25,w5 // h+=X[i]
591 + orr w17,w17,w19 // Ch(e,f,g)
592 + eor w19,w26,w27 // a^b, b^c in next round
593 + eor w16,w16,w8,ror#11 // Sigma1(e)
594 + ror w8,w26,#2
595 + add w25,w25,w17 // h+=Ch(e,f,g)
596 + eor w17,w26,w26,ror#9
597 + add w25,w25,w16 // h+=Sigma1(e)
598 + and w28,w28,w19 // (b^c)&=(a^b)
599 + add w21,w21,w25 // d+=h
600 + eor w28,w28,w27 // Maj(a,b,c)
601 + eor w17,w8,w17,ror#13 // Sigma0(a)
602 + add w25,w25,w28 // h+=Maj(a,b,c)
603 + ldr w28,[x30],#4 // *K++, w19 in next round
604 + //add w25,w25,w17 // h+=Sigma0(a)
605 +#ifndef __AARCH64EB__
606 + rev w6,w6 // 3
607 +#endif
608 + ldp w7,w8,[x1],#2*4
609 + add w25,w25,w17 // h+=Sigma0(a)
610 + ror w16,w21,#6
611 + add w24,w24,w28 // h+=K[i]
612 + eor w9,w21,w21,ror#14
613 + and w17,w22,w21
614 + bic w28,w23,w21
615 + add w24,w24,w6 // h+=X[i]
616 + orr w17,w17,w28 // Ch(e,f,g)
617 + eor w28,w25,w26 // a^b, b^c in next round
618 + eor w16,w16,w9,ror#11 // Sigma1(e)
619 + ror w9,w25,#2
620 + add w24,w24,w17 // h+=Ch(e,f,g)
621 + eor w17,w25,w25,ror#9
622 + add w24,w24,w16 // h+=Sigma1(e)
623 + and w19,w19,w28 // (b^c)&=(a^b)
624 + add w20,w20,w24 // d+=h
625 + eor w19,w19,w26 // Maj(a,b,c)
626 + eor w17,w9,w17,ror#13 // Sigma0(a)
627 + add w24,w24,w19 // h+=Maj(a,b,c)
628 + ldr w19,[x30],#4 // *K++, w28 in next round
629 + //add w24,w24,w17 // h+=Sigma0(a)
630 +#ifndef __AARCH64EB__
631 + rev w7,w7 // 4
632 +#endif
633 + add w24,w24,w17 // h+=Sigma0(a)
634 + ror w16,w20,#6
635 + add w23,w23,w19 // h+=K[i]
636 + eor w10,w20,w20,ror#14
637 + and w17,w21,w20
638 + bic w19,w22,w20
639 + add w23,w23,w7 // h+=X[i]
640 + orr w17,w17,w19 // Ch(e,f,g)
641 + eor w19,w24,w25 // a^b, b^c in next round
642 + eor w16,w16,w10,ror#11 // Sigma1(e)
643 + ror w10,w24,#2
644 + add w23,w23,w17 // h+=Ch(e,f,g)
645 + eor w17,w24,w24,ror#9
646 + add w23,w23,w16 // h+=Sigma1(e)
647 + and w28,w28,w19 // (b^c)&=(a^b)
648 + add w27,w27,w23 // d+=h
649 + eor w28,w28,w25 // Maj(a,b,c)
650 + eor w17,w10,w17,ror#13 // Sigma0(a)
651 + add w23,w23,w28 // h+=Maj(a,b,c)
652 + ldr w28,[x30],#4 // *K++, w19 in next round
653 + //add w23,w23,w17 // h+=Sigma0(a)
654 +#ifndef __AARCH64EB__
655 + rev w8,w8 // 5
656 +#endif
657 + ldp w9,w10,[x1],#2*4
658 + add w23,w23,w17 // h+=Sigma0(a)
659 + ror w16,w27,#6
660 + add w22,w22,w28 // h+=K[i]
661 + eor w11,w27,w27,ror#14
662 + and w17,w20,w27
663 + bic w28,w21,w27
664 + add w22,w22,w8 // h+=X[i]
665 + orr w17,w17,w28 // Ch(e,f,g)
666 + eor w28,w23,w24 // a^b, b^c in next round
667 + eor w16,w16,w11,ror#11 // Sigma1(e)
668 + ror w11,w23,#2
669 + add w22,w22,w17 // h+=Ch(e,f,g)
670 + eor w17,w23,w23,ror#9
671 + add w22,w22,w16 // h+=Sigma1(e)
672 + and w19,w19,w28 // (b^c)&=(a^b)
673 + add w26,w26,w22 // d+=h
674 + eor w19,w19,w24 // Maj(a,b,c)
675 + eor w17,w11,w17,ror#13 // Sigma0(a)
676 + add w22,w22,w19 // h+=Maj(a,b,c)
677 + ldr w19,[x30],#4 // *K++, w28 in next round
678 + //add w22,w22,w17 // h+=Sigma0(a)
679 +#ifndef __AARCH64EB__
680 + rev w9,w9 // 6
681 +#endif
682 + add w22,w22,w17 // h+=Sigma0(a)
683 + ror w16,w26,#6
684 + add w21,w21,w19 // h+=K[i]
685 + eor w12,w26,w26,ror#14
686 + and w17,w27,w26
687 + bic w19,w20,w26
688 + add w21,w21,w9 // h+=X[i]
689 + orr w17,w17,w19 // Ch(e,f,g)
690 + eor w19,w22,w23 // a^b, b^c in next round
691 + eor w16,w16,w12,ror#11 // Sigma1(e)
692 + ror w12,w22,#2
693 + add w21,w21,w17 // h+=Ch(e,f,g)
694 + eor w17,w22,w22,ror#9
695 + add w21,w21,w16 // h+=Sigma1(e)
696 + and w28,w28,w19 // (b^c)&=(a^b)
697 + add w25,w25,w21 // d+=h
698 + eor w28,w28,w23 // Maj(a,b,c)
699 + eor w17,w12,w17,ror#13 // Sigma0(a)
700 + add w21,w21,w28 // h+=Maj(a,b,c)
701 + ldr w28,[x30],#4 // *K++, w19 in next round
702 + //add w21,w21,w17 // h+=Sigma0(a)
703 +#ifndef __AARCH64EB__
704 + rev w10,w10 // 7
705 +#endif
706 + ldp w11,w12,[x1],#2*4
707 + add w21,w21,w17 // h+=Sigma0(a)
708 + ror w16,w25,#6
709 + add w20,w20,w28 // h+=K[i]
710 + eor w13,w25,w25,ror#14
711 + and w17,w26,w25
712 + bic w28,w27,w25
713 + add w20,w20,w10 // h+=X[i]
714 + orr w17,w17,w28 // Ch(e,f,g)
715 + eor w28,w21,w22 // a^b, b^c in next round
716 + eor w16,w16,w13,ror#11 // Sigma1(e)
717 + ror w13,w21,#2
718 + add w20,w20,w17 // h+=Ch(e,f,g)
719 + eor w17,w21,w21,ror#9
720 + add w20,w20,w16 // h+=Sigma1(e)
721 + and w19,w19,w28 // (b^c)&=(a^b)
722 + add w24,w24,w20 // d+=h
723 + eor w19,w19,w22 // Maj(a,b,c)
724 + eor w17,w13,w17,ror#13 // Sigma0(a)
725 + add w20,w20,w19 // h+=Maj(a,b,c)
726 + ldr w19,[x30],#4 // *K++, w28 in next round
727 + //add w20,w20,w17 // h+=Sigma0(a)
728 +#ifndef __AARCH64EB__
729 + rev w11,w11 // 8
730 +#endif
731 + add w20,w20,w17 // h+=Sigma0(a)
732 + ror w16,w24,#6
733 + add w27,w27,w19 // h+=K[i]
734 + eor w14,w24,w24,ror#14
735 + and w17,w25,w24
736 + bic w19,w26,w24
737 + add w27,w27,w11 // h+=X[i]
738 + orr w17,w17,w19 // Ch(e,f,g)
739 + eor w19,w20,w21 // a^b, b^c in next round
740 + eor w16,w16,w14,ror#11 // Sigma1(e)
741 + ror w14,w20,#2
742 + add w27,w27,w17 // h+=Ch(e,f,g)
743 + eor w17,w20,w20,ror#9
744 + add w27,w27,w16 // h+=Sigma1(e)
745 + and w28,w28,w19 // (b^c)&=(a^b)
746 + add w23,w23,w27 // d+=h
747 + eor w28,w28,w21 // Maj(a,b,c)
748 + eor w17,w14,w17,ror#13 // Sigma0(a)
749 + add w27,w27,w28 // h+=Maj(a,b,c)
750 + ldr w28,[x30],#4 // *K++, w19 in next round
751 + //add w27,w27,w17 // h+=Sigma0(a)
752 +#ifndef __AARCH64EB__
753 + rev w12,w12 // 9
754 +#endif
755 + ldp w13,w14,[x1],#2*4
756 + add w27,w27,w17 // h+=Sigma0(a)
757 + ror w16,w23,#6
758 + add w26,w26,w28 // h+=K[i]
759 + eor w15,w23,w23,ror#14
760 + and w17,w24,w23
761 + bic w28,w25,w23
762 + add w26,w26,w12 // h+=X[i]
763 + orr w17,w17,w28 // Ch(e,f,g)
764 + eor w28,w27,w20 // a^b, b^c in next round
765 + eor w16,w16,w15,ror#11 // Sigma1(e)
766 + ror w15,w27,#2
767 + add w26,w26,w17 // h+=Ch(e,f,g)
768 + eor w17,w27,w27,ror#9
769 + add w26,w26,w16 // h+=Sigma1(e)
770 + and w19,w19,w28 // (b^c)&=(a^b)
771 + add w22,w22,w26 // d+=h
772 + eor w19,w19,w20 // Maj(a,b,c)
773 + eor w17,w15,w17,ror#13 // Sigma0(a)
774 + add w26,w26,w19 // h+=Maj(a,b,c)
775 + ldr w19,[x30],#4 // *K++, w28 in next round
776 + //add w26,w26,w17 // h+=Sigma0(a)
777 +#ifndef __AARCH64EB__
778 + rev w13,w13 // 10
779 +#endif
780 + add w26,w26,w17 // h+=Sigma0(a)
781 + ror w16,w22,#6
782 + add w25,w25,w19 // h+=K[i]
783 + eor w0,w22,w22,ror#14
784 + and w17,w23,w22
785 + bic w19,w24,w22
786 + add w25,w25,w13 // h+=X[i]
787 + orr w17,w17,w19 // Ch(e,f,g)
788 + eor w19,w26,w27 // a^b, b^c in next round
789 + eor w16,w16,w0,ror#11 // Sigma1(e)
790 + ror w0,w26,#2
791 + add w25,w25,w17 // h+=Ch(e,f,g)
792 + eor w17,w26,w26,ror#9
793 + add w25,w25,w16 // h+=Sigma1(e)
794 + and w28,w28,w19 // (b^c)&=(a^b)
795 + add w21,w21,w25 // d+=h
796 + eor w28,w28,w27 // Maj(a,b,c)
797 + eor w17,w0,w17,ror#13 // Sigma0(a)
798 + add w25,w25,w28 // h+=Maj(a,b,c)
799 + ldr w28,[x30],#4 // *K++, w19 in next round
800 + //add w25,w25,w17 // h+=Sigma0(a)
801 +#ifndef __AARCH64EB__
802 + rev w14,w14 // 11
803 +#endif
804 + ldp w15,w0,[x1],#2*4
805 + add w25,w25,w17 // h+=Sigma0(a)
806 + str w6,[sp,#12]
807 + ror w16,w21,#6
808 + add w24,w24,w28 // h+=K[i]
809 + eor w6,w21,w21,ror#14
810 + and w17,w22,w21
811 + bic w28,w23,w21
812 + add w24,w24,w14 // h+=X[i]
813 + orr w17,w17,w28 // Ch(e,f,g)
814 + eor w28,w25,w26 // a^b, b^c in next round
815 + eor w16,w16,w6,ror#11 // Sigma1(e)
816 + ror w6,w25,#2
817 + add w24,w24,w17 // h+=Ch(e,f,g)
818 + eor w17,w25,w25,ror#9
819 + add w24,w24,w16 // h+=Sigma1(e)
820 + and w19,w19,w28 // (b^c)&=(a^b)
821 + add w20,w20,w24 // d+=h
822 + eor w19,w19,w26 // Maj(a,b,c)
823 + eor w17,w6,w17,ror#13 // Sigma0(a)
824 + add w24,w24,w19 // h+=Maj(a,b,c)
825 + ldr w19,[x30],#4 // *K++, w28 in next round
826 + //add w24,w24,w17 // h+=Sigma0(a)
827 +#ifndef __AARCH64EB__
828 + rev w15,w15 // 12
829 +#endif
830 + add w24,w24,w17 // h+=Sigma0(a)
831 + str w7,[sp,#0]
832 + ror w16,w20,#6
833 + add w23,w23,w19 // h+=K[i]
834 + eor w7,w20,w20,ror#14
835 + and w17,w21,w20
836 + bic w19,w22,w20
837 + add w23,w23,w15 // h+=X[i]
838 + orr w17,w17,w19 // Ch(e,f,g)
839 + eor w19,w24,w25 // a^b, b^c in next round
840 + eor w16,w16,w7,ror#11 // Sigma1(e)
841 + ror w7,w24,#2
842 + add w23,w23,w17 // h+=Ch(e,f,g)
843 + eor w17,w24,w24,ror#9
844 + add w23,w23,w16 // h+=Sigma1(e)
845 + and w28,w28,w19 // (b^c)&=(a^b)
846 + add w27,w27,w23 // d+=h
847 + eor w28,w28,w25 // Maj(a,b,c)
848 + eor w17,w7,w17,ror#13 // Sigma0(a)
849 + add w23,w23,w28 // h+=Maj(a,b,c)
850 + ldr w28,[x30],#4 // *K++, w19 in next round
851 + //add w23,w23,w17 // h+=Sigma0(a)
852 +#ifndef __AARCH64EB__
853 + rev w0,w0 // 13
854 +#endif
855 + ldp w1,w2,[x1]
856 + add w23,w23,w17 // h+=Sigma0(a)
857 + str w8,[sp,#4]
858 + ror w16,w27,#6
859 + add w22,w22,w28 // h+=K[i]
860 + eor w8,w27,w27,ror#14
861 + and w17,w20,w27
862 + bic w28,w21,w27
863 + add w22,w22,w0 // h+=X[i]
864 + orr w17,w17,w28 // Ch(e,f,g)
865 + eor w28,w23,w24 // a^b, b^c in next round
866 + eor w16,w16,w8,ror#11 // Sigma1(e)
867 + ror w8,w23,#2
868 + add w22,w22,w17 // h+=Ch(e,f,g)
869 + eor w17,w23,w23,ror#9
870 + add w22,w22,w16 // h+=Sigma1(e)
871 + and w19,w19,w28 // (b^c)&=(a^b)
872 + add w26,w26,w22 // d+=h
873 + eor w19,w19,w24 // Maj(a,b,c)
874 + eor w17,w8,w17,ror#13 // Sigma0(a)
875 + add w22,w22,w19 // h+=Maj(a,b,c)
876 + ldr w19,[x30],#4 // *K++, w28 in next round
877 + //add w22,w22,w17 // h+=Sigma0(a)
878 +#ifndef __AARCH64EB__
879 + rev w1,w1 // 14
880 +#endif
881 + ldr w6,[sp,#12]
882 + add w22,w22,w17 // h+=Sigma0(a)
883 + str w9,[sp,#8]
884 + ror w16,w26,#6
885 + add w21,w21,w19 // h+=K[i]
886 + eor w9,w26,w26,ror#14
887 + and w17,w27,w26
888 + bic w19,w20,w26
889 + add w21,w21,w1 // h+=X[i]
890 + orr w17,w17,w19 // Ch(e,f,g)
891 + eor w19,w22,w23 // a^b, b^c in next round
892 + eor w16,w16,w9,ror#11 // Sigma1(e)
893 + ror w9,w22,#2
894 + add w21,w21,w17 // h+=Ch(e,f,g)
895 + eor w17,w22,w22,ror#9
896 + add w21,w21,w16 // h+=Sigma1(e)
897 + and w28,w28,w19 // (b^c)&=(a^b)
898 + add w25,w25,w21 // d+=h
899 + eor w28,w28,w23 // Maj(a,b,c)
900 + eor w17,w9,w17,ror#13 // Sigma0(a)
901 + add w21,w21,w28 // h+=Maj(a,b,c)
902 + ldr w28,[x30],#4 // *K++, w19 in next round
903 + //add w21,w21,w17 // h+=Sigma0(a)
904 +#ifndef __AARCH64EB__
905 + rev w2,w2 // 15
906 +#endif
907 + ldr w7,[sp,#0]
908 + add w21,w21,w17 // h+=Sigma0(a)
909 + str w10,[sp,#12]
910 + ror w16,w25,#6
911 + add w20,w20,w28 // h+=K[i]
912 + ror w9,w4,#7
913 + and w17,w26,w25
914 + ror w8,w1,#17
915 + bic w28,w27,w25
916 + ror w10,w21,#2
917 + add w20,w20,w2 // h+=X[i]
918 + eor w16,w16,w25,ror#11
919 + eor w9,w9,w4,ror#18
920 + orr w17,w17,w28 // Ch(e,f,g)
921 + eor w28,w21,w22 // a^b, b^c in next round
922 + eor w16,w16,w25,ror#25 // Sigma1(e)
923 + eor w10,w10,w21,ror#13
924 + add w20,w20,w17 // h+=Ch(e,f,g)
925 + and w19,w19,w28 // (b^c)&=(a^b)
926 + eor w8,w8,w1,ror#19
927 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1])
928 + add w20,w20,w16 // h+=Sigma1(e)
929 + eor w19,w19,w22 // Maj(a,b,c)
930 + eor w17,w10,w21,ror#22 // Sigma0(a)
931 + eor w8,w8,w1,lsr#10 // sigma1(X[i+14])
932 + add w3,w3,w12
933 + add w24,w24,w20 // d+=h
934 + add w20,w20,w19 // h+=Maj(a,b,c)
935 + ldr w19,[x30],#4 // *K++, w28 in next round
936 + add w3,w3,w9
937 + add w20,w20,w17 // h+=Sigma0(a)
938 + add w3,w3,w8
939 +.Loop_16_xx:
940 + ldr w8,[sp,#4]
941 + str w11,[sp,#0]
942 + ror w16,w24,#6
943 + add w27,w27,w19 // h+=K[i]
944 + ror w10,w5,#7
945 + and w17,w25,w24
946 + ror w9,w2,#17
947 + bic w19,w26,w24
948 + ror w11,w20,#2
949 + add w27,w27,w3 // h+=X[i]
950 + eor w16,w16,w24,ror#11
951 + eor w10,w10,w5,ror#18
952 + orr w17,w17,w19 // Ch(e,f,g)
953 + eor w19,w20,w21 // a^b, b^c in next round
954 + eor w16,w16,w24,ror#25 // Sigma1(e)
955 + eor w11,w11,w20,ror#13
956 + add w27,w27,w17 // h+=Ch(e,f,g)
957 + and w28,w28,w19 // (b^c)&=(a^b)
958 + eor w9,w9,w2,ror#19
959 + eor w10,w10,w5,lsr#3 // sigma0(X[i+1])
960 + add w27,w27,w16 // h+=Sigma1(e)
961 + eor w28,w28,w21 // Maj(a,b,c)
962 + eor w17,w11,w20,ror#22 // Sigma0(a)
963 + eor w9,w9,w2,lsr#10 // sigma1(X[i+14])
964 + add w4,w4,w13
965 + add w23,w23,w27 // d+=h
966 + add w27,w27,w28 // h+=Maj(a,b,c)
967 + ldr w28,[x30],#4 // *K++, w19 in next round
968 + add w4,w4,w10
969 + add w27,w27,w17 // h+=Sigma0(a)
970 + add w4,w4,w9
971 + ldr w9,[sp,#8]
972 + str w12,[sp,#4]
973 + ror w16,w23,#6
974 + add w26,w26,w28 // h+=K[i]
975 + ror w11,w6,#7
976 + and w17,w24,w23
977 + ror w10,w3,#17
978 + bic w28,w25,w23
979 + ror w12,w27,#2
980 + add w26,w26,w4 // h+=X[i]
981 + eor w16,w16,w23,ror#11
982 + eor w11,w11,w6,ror#18
983 + orr w17,w17,w28 // Ch(e,f,g)
984 + eor w28,w27,w20 // a^b, b^c in next round
985 + eor w16,w16,w23,ror#25 // Sigma1(e)
986 + eor w12,w12,w27,ror#13
987 + add w26,w26,w17 // h+=Ch(e,f,g)
988 + and w19,w19,w28 // (b^c)&=(a^b)
989 + eor w10,w10,w3,ror#19
990 + eor w11,w11,w6,lsr#3 // sigma0(X[i+1])
991 + add w26,w26,w16 // h+=Sigma1(e)
992 + eor w19,w19,w20 // Maj(a,b,c)
993 + eor w17,w12,w27,ror#22 // Sigma0(a)
994 + eor w10,w10,w3,lsr#10 // sigma1(X[i+14])
995 + add w5,w5,w14
996 + add w22,w22,w26 // d+=h
997 + add w26,w26,w19 // h+=Maj(a,b,c)
998 + ldr w19,[x30],#4 // *K++, w28 in next round
999 + add w5,w5,w11
1000 + add w26,w26,w17 // h+=Sigma0(a)
1001 + add w5,w5,w10
1002 + ldr w10,[sp,#12]
1003 + str w13,[sp,#8]
1004 + ror w16,w22,#6
1005 + add w25,w25,w19 // h+=K[i]
1006 + ror w12,w7,#7
1007 + and w17,w23,w22
1008 + ror w11,w4,#17
1009 + bic w19,w24,w22
1010 + ror w13,w26,#2
1011 + add w25,w25,w5 // h+=X[i]
1012 + eor w16,w16,w22,ror#11
1013 + eor w12,w12,w7,ror#18
1014 + orr w17,w17,w19 // Ch(e,f,g)
1015 + eor w19,w26,w27 // a^b, b^c in next round
1016 + eor w16,w16,w22,ror#25 // Sigma1(e)
1017 + eor w13,w13,w26,ror#13
1018 + add w25,w25,w17 // h+=Ch(e,f,g)
1019 + and w28,w28,w19 // (b^c)&=(a^b)
1020 + eor w11,w11,w4,ror#19
1021 + eor w12,w12,w7,lsr#3 // sigma0(X[i+1])
1022 + add w25,w25,w16 // h+=Sigma1(e)
1023 + eor w28,w28,w27 // Maj(a,b,c)
1024 + eor w17,w13,w26,ror#22 // Sigma0(a)
1025 + eor w11,w11,w4,lsr#10 // sigma1(X[i+14])
1026 + add w6,w6,w15
1027 + add w21,w21,w25 // d+=h
1028 + add w25,w25,w28 // h+=Maj(a,b,c)
1029 + ldr w28,[x30],#4 // *K++, w19 in next round
1030 + add w6,w6,w12
1031 + add w25,w25,w17 // h+=Sigma0(a)
1032 + add w6,w6,w11
1033 + ldr w11,[sp,#0]
1034 + str w14,[sp,#12]
1035 + ror w16,w21,#6
1036 + add w24,w24,w28 // h+=K[i]
1037 + ror w13,w8,#7
1038 + and w17,w22,w21
1039 + ror w12,w5,#17
1040 + bic w28,w23,w21
1041 + ror w14,w25,#2
1042 + add w24,w24,w6 // h+=X[i]
1043 + eor w16,w16,w21,ror#11
1044 + eor w13,w13,w8,ror#18
1045 + orr w17,w17,w28 // Ch(e,f,g)
1046 + eor w28,w25,w26 // a^b, b^c in next round
1047 + eor w16,w16,w21,ror#25 // Sigma1(e)
1048 + eor w14,w14,w25,ror#13
1049 + add w24,w24,w17 // h+=Ch(e,f,g)
1050 + and w19,w19,w28 // (b^c)&=(a^b)
1051 + eor w12,w12,w5,ror#19
1052 + eor w13,w13,w8,lsr#3 // sigma0(X[i+1])
1053 + add w24,w24,w16 // h+=Sigma1(e)
1054 + eor w19,w19,w26 // Maj(a,b,c)
1055 + eor w17,w14,w25,ror#22 // Sigma0(a)
1056 + eor w12,w12,w5,lsr#10 // sigma1(X[i+14])
1057 + add w7,w7,w0
1058 + add w20,w20,w24 // d+=h
1059 + add w24,w24,w19 // h+=Maj(a,b,c)
1060 + ldr w19,[x30],#4 // *K++, w28 in next round
1061 + add w7,w7,w13
1062 + add w24,w24,w17 // h+=Sigma0(a)
1063 + add w7,w7,w12
1064 + ldr w12,[sp,#4]
1065 + str w15,[sp,#0]
1066 + ror w16,w20,#6
1067 + add w23,w23,w19 // h+=K[i]
1068 + ror w14,w9,#7
1069 + and w17,w21,w20
1070 + ror w13,w6,#17
1071 + bic w19,w22,w20
1072 + ror w15,w24,#2
1073 + add w23,w23,w7 // h+=X[i]
1074 + eor w16,w16,w20,ror#11
1075 + eor w14,w14,w9,ror#18
1076 + orr w17,w17,w19 // Ch(e,f,g)
1077 + eor w19,w24,w25 // a^b, b^c in next round
1078 + eor w16,w16,w20,ror#25 // Sigma1(e)
1079 + eor w15,w15,w24,ror#13
1080 + add w23,w23,w17 // h+=Ch(e,f,g)
1081 + and w28,w28,w19 // (b^c)&=(a^b)
1082 + eor w13,w13,w6,ror#19
1083 + eor w14,w14,w9,lsr#3 // sigma0(X[i+1])
1084 + add w23,w23,w16 // h+=Sigma1(e)
1085 + eor w28,w28,w25 // Maj(a,b,c)
1086 + eor w17,w15,w24,ror#22 // Sigma0(a)
1087 + eor w13,w13,w6,lsr#10 // sigma1(X[i+14])
1088 + add w8,w8,w1
1089 + add w27,w27,w23 // d+=h
1090 + add w23,w23,w28 // h+=Maj(a,b,c)
1091 + ldr w28,[x30],#4 // *K++, w19 in next round
1092 + add w8,w8,w14
1093 + add w23,w23,w17 // h+=Sigma0(a)
1094 + add w8,w8,w13
1095 + ldr w13,[sp,#8]
1096 + str w0,[sp,#4]
1097 + ror w16,w27,#6
1098 + add w22,w22,w28 // h+=K[i]
1099 + ror w15,w10,#7
1100 + and w17,w20,w27
1101 + ror w14,w7,#17
1102 + bic w28,w21,w27
1103 + ror w0,w23,#2
1104 + add w22,w22,w8 // h+=X[i]
1105 + eor w16,w16,w27,ror#11
1106 + eor w15,w15,w10,ror#18
1107 + orr w17,w17,w28 // Ch(e,f,g)
1108 + eor w28,w23,w24 // a^b, b^c in next round
1109 + eor w16,w16,w27,ror#25 // Sigma1(e)
1110 + eor w0,w0,w23,ror#13
1111 + add w22,w22,w17 // h+=Ch(e,f,g)
1112 + and w19,w19,w28 // (b^c)&=(a^b)
1113 + eor w14,w14,w7,ror#19
1114 + eor w15,w15,w10,lsr#3 // sigma0(X[i+1])
1115 + add w22,w22,w16 // h+=Sigma1(e)
1116 + eor w19,w19,w24 // Maj(a,b,c)
1117 + eor w17,w0,w23,ror#22 // Sigma0(a)
1118 + eor w14,w14,w7,lsr#10 // sigma1(X[i+14])
1119 + add w9,w9,w2
1120 + add w26,w26,w22 // d+=h
1121 + add w22,w22,w19 // h+=Maj(a,b,c)
1122 + ldr w19,[x30],#4 // *K++, w28 in next round
1123 + add w9,w9,w15
1124 + add w22,w22,w17 // h+=Sigma0(a)
1125 + add w9,w9,w14
1126 + ldr w14,[sp,#12]
1127 + str w1,[sp,#8]
1128 + ror w16,w26,#6
1129 + add w21,w21,w19 // h+=K[i]
1130 + ror w0,w11,#7
1131 + and w17,w27,w26
1132 + ror w15,w8,#17
1133 + bic w19,w20,w26
1134 + ror w1,w22,#2
1135 + add w21,w21,w9 // h+=X[i]
1136 + eor w16,w16,w26,ror#11
1137 + eor w0,w0,w11,ror#18
1138 + orr w17,w17,w19 // Ch(e,f,g)
1139 + eor w19,w22,w23 // a^b, b^c in next round
1140 + eor w16,w16,w26,ror#25 // Sigma1(e)
1141 + eor w1,w1,w22,ror#13
1142 + add w21,w21,w17 // h+=Ch(e,f,g)
1143 + and w28,w28,w19 // (b^c)&=(a^b)
1144 + eor w15,w15,w8,ror#19
1145 + eor w0,w0,w11,lsr#3 // sigma0(X[i+1])
1146 + add w21,w21,w16 // h+=Sigma1(e)
1147 + eor w28,w28,w23 // Maj(a,b,c)
1148 + eor w17,w1,w22,ror#22 // Sigma0(a)
1149 + eor w15,w15,w8,lsr#10 // sigma1(X[i+14])
1150 + add w10,w10,w3
1151 + add w25,w25,w21 // d+=h
1152 + add w21,w21,w28 // h+=Maj(a,b,c)
1153 + ldr w28,[x30],#4 // *K++, w19 in next round
1154 + add w10,w10,w0
1155 + add w21,w21,w17 // h+=Sigma0(a)
1156 + add w10,w10,w15
1157 + ldr w15,[sp,#0]
1158 + str w2,[sp,#12]
1159 + ror w16,w25,#6
1160 + add w20,w20,w28 // h+=K[i]
1161 + ror w1,w12,#7
1162 + and w17,w26,w25
1163 + ror w0,w9,#17
1164 + bic w28,w27,w25
1165 + ror w2,w21,#2
1166 + add w20,w20,w10 // h+=X[i]
1167 + eor w16,w16,w25,ror#11
1168 + eor w1,w1,w12,ror#18
1169 + orr w17,w17,w28 // Ch(e,f,g)
1170 + eor w28,w21,w22 // a^b, b^c in next round
1171 + eor w16,w16,w25,ror#25 // Sigma1(e)
1172 + eor w2,w2,w21,ror#13
1173 + add w20,w20,w17 // h+=Ch(e,f,g)
1174 + and w19,w19,w28 // (b^c)&=(a^b)
1175 + eor w0,w0,w9,ror#19
1176 + eor w1,w1,w12,lsr#3 // sigma0(X[i+1])
1177 + add w20,w20,w16 // h+=Sigma1(e)
1178 + eor w19,w19,w22 // Maj(a,b,c)
1179 + eor w17,w2,w21,ror#22 // Sigma0(a)
1180 + eor w0,w0,w9,lsr#10 // sigma1(X[i+14])
1181 + add w11,w11,w4
1182 + add w24,w24,w20 // d+=h
1183 + add w20,w20,w19 // h+=Maj(a,b,c)
1184 + ldr w19,[x30],#4 // *K++, w28 in next round
1185 + add w11,w11,w1
1186 + add w20,w20,w17 // h+=Sigma0(a)
1187 + add w11,w11,w0
1188 + ldr w0,[sp,#4]
1189 + str w3,[sp,#0]
1190 + ror w16,w24,#6
1191 + add w27,w27,w19 // h+=K[i]
1192 + ror w2,w13,#7
1193 + and w17,w25,w24
1194 + ror w1,w10,#17
1195 + bic w19,w26,w24
1196 + ror w3,w20,#2
1197 + add w27,w27,w11 // h+=X[i]
1198 + eor w16,w16,w24,ror#11
1199 + eor w2,w2,w13,ror#18
1200 + orr w17,w17,w19 // Ch(e,f,g)
1201 + eor w19,w20,w21 // a^b, b^c in next round
1202 + eor w16,w16,w24,ror#25 // Sigma1(e)
1203 + eor w3,w3,w20,ror#13
1204 + add w27,w27,w17 // h+=Ch(e,f,g)
1205 + and w28,w28,w19 // (b^c)&=(a^b)
1206 + eor w1,w1,w10,ror#19
1207 + eor w2,w2,w13,lsr#3 // sigma0(X[i+1])
1208 + add w27,w27,w16 // h+=Sigma1(e)
1209 + eor w28,w28,w21 // Maj(a,b,c)
1210 + eor w17,w3,w20,ror#22 // Sigma0(a)
1211 + eor w1,w1,w10,lsr#10 // sigma1(X[i+14])
1212 + add w12,w12,w5
1213 + add w23,w23,w27 // d+=h
1214 + add w27,w27,w28 // h+=Maj(a,b,c)
1215 + ldr w28,[x30],#4 // *K++, w19 in next round
1216 + add w12,w12,w2
1217 + add w27,w27,w17 // h+=Sigma0(a)
1218 + add w12,w12,w1
1219 + ldr w1,[sp,#8]
1220 + str w4,[sp,#4]
1221 + ror w16,w23,#6
1222 + add w26,w26,w28 // h+=K[i]
1223 + ror w3,w14,#7
1224 + and w17,w24,w23
1225 + ror w2,w11,#17
1226 + bic w28,w25,w23
1227 + ror w4,w27,#2
1228 + add w26,w26,w12 // h+=X[i]
1229 + eor w16,w16,w23,ror#11
1230 + eor w3,w3,w14,ror#18
1231 + orr w17,w17,w28 // Ch(e,f,g)
1232 + eor w28,w27,w20 // a^b, b^c in next round
1233 + eor w16,w16,w23,ror#25 // Sigma1(e)
1234 + eor w4,w4,w27,ror#13
1235 + add w26,w26,w17 // h+=Ch(e,f,g)
1236 + and w19,w19,w28 // (b^c)&=(a^b)
1237 + eor w2,w2,w11,ror#19
1238 + eor w3,w3,w14,lsr#3 // sigma0(X[i+1])
1239 + add w26,w26,w16 // h+=Sigma1(e)
1240 + eor w19,w19,w20 // Maj(a,b,c)
1241 + eor w17,w4,w27,ror#22 // Sigma0(a)
1242 + eor w2,w2,w11,lsr#10 // sigma1(X[i+14])
1243 + add w13,w13,w6
1244 + add w22,w22,w26 // d+=h
1245 + add w26,w26,w19 // h+=Maj(a,b,c)
1246 + ldr w19,[x30],#4 // *K++, w28 in next round
1247 + add w13,w13,w3
1248 + add w26,w26,w17 // h+=Sigma0(a)
1249 + add w13,w13,w2
1250 + ldr w2,[sp,#12]
1251 + str w5,[sp,#8]
1252 + ror w16,w22,#6
1253 + add w25,w25,w19 // h+=K[i]
1254 + ror w4,w15,#7
1255 + and w17,w23,w22
1256 + ror w3,w12,#17
1257 + bic w19,w24,w22
1258 + ror w5,w26,#2
1259 + add w25,w25,w13 // h+=X[i]
1260 + eor w16,w16,w22,ror#11
1261 + eor w4,w4,w15,ror#18
1262 + orr w17,w17,w19 // Ch(e,f,g)
1263 + eor w19,w26,w27 // a^b, b^c in next round
1264 + eor w16,w16,w22,ror#25 // Sigma1(e)
1265 + eor w5,w5,w26,ror#13
1266 + add w25,w25,w17 // h+=Ch(e,f,g)
1267 + and w28,w28,w19 // (b^c)&=(a^b)
1268 + eor w3,w3,w12,ror#19
1269 + eor w4,w4,w15,lsr#3 // sigma0(X[i+1])
1270 + add w25,w25,w16 // h+=Sigma1(e)
1271 + eor w28,w28,w27 // Maj(a,b,c)
1272 + eor w17,w5,w26,ror#22 // Sigma0(a)
1273 + eor w3,w3,w12,lsr#10 // sigma1(X[i+14])
1274 + add w14,w14,w7
1275 + add w21,w21,w25 // d+=h
1276 + add w25,w25,w28 // h+=Maj(a,b,c)
1277 + ldr w28,[x30],#4 // *K++, w19 in next round
1278 + add w14,w14,w4
1279 + add w25,w25,w17 // h+=Sigma0(a)
1280 + add w14,w14,w3
1281 + ldr w3,[sp,#0]
1282 + str w6,[sp,#12]
1283 + ror w16,w21,#6
1284 + add w24,w24,w28 // h+=K[i]
1285 + ror w5,w0,#7
1286 + and w17,w22,w21
1287 + ror w4,w13,#17
1288 + bic w28,w23,w21
1289 + ror w6,w25,#2
1290 + add w24,w24,w14 // h+=X[i]
1291 + eor w16,w16,w21,ror#11
1292 + eor w5,w5,w0,ror#18
1293 + orr w17,w17,w28 // Ch(e,f,g)
1294 + eor w28,w25,w26 // a^b, b^c in next round
1295 + eor w16,w16,w21,ror#25 // Sigma1(e)
1296 + eor w6,w6,w25,ror#13
1297 + add w24,w24,w17 // h+=Ch(e,f,g)
1298 + and w19,w19,w28 // (b^c)&=(a^b)
1299 + eor w4,w4,w13,ror#19
1300 + eor w5,w5,w0,lsr#3 // sigma0(X[i+1])
1301 + add w24,w24,w16 // h+=Sigma1(e)
1302 + eor w19,w19,w26 // Maj(a,b,c)
1303 + eor w17,w6,w25,ror#22 // Sigma0(a)
1304 + eor w4,w4,w13,lsr#10 // sigma1(X[i+14])
1305 + add w15,w15,w8
1306 + add w20,w20,w24 // d+=h
1307 + add w24,w24,w19 // h+=Maj(a,b,c)
1308 + ldr w19,[x30],#4 // *K++, w28 in next round
1309 + add w15,w15,w5
1310 + add w24,w24,w17 // h+=Sigma0(a)
1311 + add w15,w15,w4
1312 + ldr w4,[sp,#4]
1313 + str w7,[sp,#0]
1314 + ror w16,w20,#6
1315 + add w23,w23,w19 // h+=K[i]
1316 + ror w6,w1,#7
1317 + and w17,w21,w20
1318 + ror w5,w14,#17
1319 + bic w19,w22,w20
1320 + ror w7,w24,#2
1321 + add w23,w23,w15 // h+=X[i]
1322 + eor w16,w16,w20,ror#11
1323 + eor w6,w6,w1,ror#18
1324 + orr w17,w17,w19 // Ch(e,f,g)
1325 + eor w19,w24,w25 // a^b, b^c in next round
1326 + eor w16,w16,w20,ror#25 // Sigma1(e)
1327 + eor w7,w7,w24,ror#13
1328 + add w23,w23,w17 // h+=Ch(e,f,g)
1329 + and w28,w28,w19 // (b^c)&=(a^b)
1330 + eor w5,w5,w14,ror#19
1331 + eor w6,w6,w1,lsr#3 // sigma0(X[i+1])
1332 + add w23,w23,w16 // h+=Sigma1(e)
1333 + eor w28,w28,w25 // Maj(a,b,c)
1334 + eor w17,w7,w24,ror#22 // Sigma0(a)
1335 + eor w5,w5,w14,lsr#10 // sigma1(X[i+14])
1336 + add w0,w0,w9
1337 + add w27,w27,w23 // d+=h
1338 + add w23,w23,w28 // h+=Maj(a,b,c)
1339 + ldr w28,[x30],#4 // *K++, w19 in next round
1340 + add w0,w0,w6
1341 + add w23,w23,w17 // h+=Sigma0(a)
1342 + add w0,w0,w5
1343 + ldr w5,[sp,#8]
1344 + str w8,[sp,#4]
1345 + ror w16,w27,#6
1346 + add w22,w22,w28 // h+=K[i]
1347 + ror w7,w2,#7
1348 + and w17,w20,w27
1349 + ror w6,w15,#17
1350 + bic w28,w21,w27
1351 + ror w8,w23,#2
1352 + add w22,w22,w0 // h+=X[i]
1353 + eor w16,w16,w27,ror#11
1354 + eor w7,w7,w2,ror#18
1355 + orr w17,w17,w28 // Ch(e,f,g)
1356 + eor w28,w23,w24 // a^b, b^c in next round
1357 + eor w16,w16,w27,ror#25 // Sigma1(e)
1358 + eor w8,w8,w23,ror#13
1359 + add w22,w22,w17 // h+=Ch(e,f,g)
1360 + and w19,w19,w28 // (b^c)&=(a^b)
1361 + eor w6,w6,w15,ror#19
1362 + eor w7,w7,w2,lsr#3 // sigma0(X[i+1])
1363 + add w22,w22,w16 // h+=Sigma1(e)
1364 + eor w19,w19,w24 // Maj(a,b,c)
1365 + eor w17,w8,w23,ror#22 // Sigma0(a)
1366 + eor w6,w6,w15,lsr#10 // sigma1(X[i+14])
1367 + add w1,w1,w10
1368 + add w26,w26,w22 // d+=h
1369 + add w22,w22,w19 // h+=Maj(a,b,c)
1370 + ldr w19,[x30],#4 // *K++, w28 in next round
1371 + add w1,w1,w7
1372 + add w22,w22,w17 // h+=Sigma0(a)
1373 + add w1,w1,w6
1374 + ldr w6,[sp,#12]
1375 + str w9,[sp,#8]
1376 + ror w16,w26,#6
1377 + add w21,w21,w19 // h+=K[i]
1378 + ror w8,w3,#7
1379 + and w17,w27,w26
1380 + ror w7,w0,#17
1381 + bic w19,w20,w26
1382 + ror w9,w22,#2
1383 + add w21,w21,w1 // h+=X[i]
1384 + eor w16,w16,w26,ror#11
1385 + eor w8,w8,w3,ror#18
1386 + orr w17,w17,w19 // Ch(e,f,g)
1387 + eor w19,w22,w23 // a^b, b^c in next round
1388 + eor w16,w16,w26,ror#25 // Sigma1(e)
1389 + eor w9,w9,w22,ror#13
1390 + add w21,w21,w17 // h+=Ch(e,f,g)
1391 + and w28,w28,w19 // (b^c)&=(a^b)
1392 + eor w7,w7,w0,ror#19
1393 + eor w8,w8,w3,lsr#3 // sigma0(X[i+1])
1394 + add w21,w21,w16 // h+=Sigma1(e)
1395 + eor w28,w28,w23 // Maj(a,b,c)
1396 + eor w17,w9,w22,ror#22 // Sigma0(a)
1397 + eor w7,w7,w0,lsr#10 // sigma1(X[i+14])
1398 + add w2,w2,w11
1399 + add w25,w25,w21 // d+=h
1400 + add w21,w21,w28 // h+=Maj(a,b,c)
1401 + ldr w28,[x30],#4 // *K++, w19 in next round
1402 + add w2,w2,w8
1403 + add w21,w21,w17 // h+=Sigma0(a)
1404 + add w2,w2,w7
1405 + ldr w7,[sp,#0]
1406 + str w10,[sp,#12]
1407 + ror w16,w25,#6
1408 + add w20,w20,w28 // h+=K[i]
1409 + ror w9,w4,#7
1410 + and w17,w26,w25
1411 + ror w8,w1,#17
1412 + bic w28,w27,w25
1413 + ror w10,w21,#2
1414 + add w20,w20,w2 // h+=X[i]
1415 + eor w16,w16,w25,ror#11
1416 + eor w9,w9,w4,ror#18
1417 + orr w17,w17,w28 // Ch(e,f,g)
1418 + eor w28,w21,w22 // a^b, b^c in next round
1419 + eor w16,w16,w25,ror#25 // Sigma1(e)
1420 + eor w10,w10,w21,ror#13
1421 + add w20,w20,w17 // h+=Ch(e,f,g)
1422 + and w19,w19,w28 // (b^c)&=(a^b)
1423 + eor w8,w8,w1,ror#19
1424 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1])
1425 + add w20,w20,w16 // h+=Sigma1(e)
1426 + eor w19,w19,w22 // Maj(a,b,c)
1427 + eor w17,w10,w21,ror#22 // Sigma0(a)
1428 + eor w8,w8,w1,lsr#10 // sigma1(X[i+14])
1429 + add w3,w3,w12
1430 + add w24,w24,w20 // d+=h
1431 + add w20,w20,w19 // h+=Maj(a,b,c)
1432 + ldr w19,[x30],#4 // *K++, w28 in next round
1433 + add w3,w3,w9
1434 + add w20,w20,w17 // h+=Sigma0(a)
1435 + add w3,w3,w8
1436 + cbnz w19,.Loop_16_xx
1437 +
1438 + ldp x0,x2,[x29,#96]
1439 + ldr x1,[x29,#112]
1440 + sub x30,x30,#260 // rewind
1441 +
1442 + ldp w3,w4,[x0]
1443 + ldp w5,w6,[x0,#2*4]
1444 + add x1,x1,#14*4 // advance input pointer
1445 + ldp w7,w8,[x0,#4*4]
1446 + add w20,w20,w3
1447 + ldp w9,w10,[x0,#6*4]
1448 + add w21,w21,w4
1449 + add w22,w22,w5
1450 + add w23,w23,w6
1451 + stp w20,w21,[x0]
1452 + add w24,w24,w7
1453 + add w25,w25,w8
1454 + stp w22,w23,[x0,#2*4]
1455 + add w26,w26,w9
1456 + add w27,w27,w10
1457 + cmp x1,x2
1458 + stp w24,w25,[x0,#4*4]
1459 + stp w26,w27,[x0,#6*4]
1460 + b.ne .Loop
1461 +
1462 + ldp x19,x20,[x29,#16]
1463 + add sp,sp,#4*4
1464 + ldp x21,x22,[x29,#32]
1465 + ldp x23,x24,[x29,#48]
1466 + ldp x25,x26,[x29,#64]
1467 + ldp x27,x28,[x29,#80]
1468 + ldp x29,x30,[sp],#128
1469 + ret
1470 +.size sha256_block_data_order,.-sha256_block_data_order
1471 +
1472 +.align 6
1473 +.type .LK256,%object
1474 +.LK256:
1475 + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
1476 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
1477 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
1478 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
1479 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
1480 + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
1481 + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
1482 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
1483 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
1484 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
1485 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
1486 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
1487 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
1488 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
1489 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
1490 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
1491 + .long 0 //terminator
1492 +.size .LK256,.-.LK256
1493 +#ifndef __KERNEL__
1494 +.align 3
1495 +.LOPENSSL_armcap_P:
1496 +# ifdef __ILP32__
1497 + .long OPENSSL_armcap_P-.
1498 +# else
1499 + .quad OPENSSL_armcap_P-.
1500 +# endif
1501 +#endif
1502 +.asciz "SHA256 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
1503 +.align 2
1504 +#ifndef __KERNEL__
1505 +.type sha256_block_armv8,%function
1506 +.align 6
1507 +sha256_block_armv8:
1508 +.Lv8_entry:
1509 + stp x29,x30,[sp,#-16]!
1510 + add x29,sp,#0
1511 +
1512 + ld1 {v0.4s,v1.4s},[x0]
1513 + adr x3,.LK256
1514 +
1515 +.Loop_hw:
1516 + ld1 {v4.16b-v7.16b},[x1],#64
1517 + sub x2,x2,#1
1518 + ld1 {v16.4s},[x3],#16
1519 + rev32 v4.16b,v4.16b
1520 + rev32 v5.16b,v5.16b
1521 + rev32 v6.16b,v6.16b
1522 + rev32 v7.16b,v7.16b
1523 + orr v18.16b,v0.16b,v0.16b // offload
1524 + orr v19.16b,v1.16b,v1.16b
1525 + ld1 {v17.4s},[x3],#16
1526 + add v16.4s,v16.4s,v4.4s
1527 + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
1528 + orr v2.16b,v0.16b,v0.16b
1529 + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
1530 + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
1531 + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
1532 + ld1 {v16.4s},[x3],#16
1533 + add v17.4s,v17.4s,v5.4s
1534 + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
1535 + orr v2.16b,v0.16b,v0.16b
1536 + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
1537 + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
1538 + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
1539 + ld1 {v17.4s},[x3],#16
1540 + add v16.4s,v16.4s,v6.4s
1541 + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
1542 + orr v2.16b,v0.16b,v0.16b
1543 + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
1544 + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
1545 + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
1546 + ld1 {v16.4s},[x3],#16
1547 + add v17.4s,v17.4s,v7.4s
1548 + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
1549 + orr v2.16b,v0.16b,v0.16b
1550 + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
1551 + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
1552 + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
1553 + ld1 {v17.4s},[x3],#16
1554 + add v16.4s,v16.4s,v4.4s
1555 + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
1556 + orr v2.16b,v0.16b,v0.16b
1557 + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
1558 + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
1559 + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
1560 + ld1 {v16.4s},[x3],#16
1561 + add v17.4s,v17.4s,v5.4s
1562 + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
1563 + orr v2.16b,v0.16b,v0.16b
1564 + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
1565 + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
1566 + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
1567 + ld1 {v17.4s},[x3],#16
1568 + add v16.4s,v16.4s,v6.4s
1569 + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
1570 + orr v2.16b,v0.16b,v0.16b
1571 + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
1572 + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
1573 + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
1574 + ld1 {v16.4s},[x3],#16
1575 + add v17.4s,v17.4s,v7.4s
1576 + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
1577 + orr v2.16b,v0.16b,v0.16b
1578 + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
1579 + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
1580 + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
1581 + ld1 {v17.4s},[x3],#16
1582 + add v16.4s,v16.4s,v4.4s
1583 + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
1584 + orr v2.16b,v0.16b,v0.16b
1585 + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
1586 + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
1587 + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
1588 + ld1 {v16.4s},[x3],#16
1589 + add v17.4s,v17.4s,v5.4s
1590 + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
1591 + orr v2.16b,v0.16b,v0.16b
1592 + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
1593 + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
1594 + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
1595 + ld1 {v17.4s},[x3],#16
1596 + add v16.4s,v16.4s,v6.4s
1597 + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
1598 + orr v2.16b,v0.16b,v0.16b
1599 + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
1600 + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
1601 + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
1602 + ld1 {v16.4s},[x3],#16
1603 + add v17.4s,v17.4s,v7.4s
1604 + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
1605 + orr v2.16b,v0.16b,v0.16b
1606 + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
1607 + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
1608 + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
1609 + ld1 {v17.4s},[x3],#16
1610 + add v16.4s,v16.4s,v4.4s
1611 + orr v2.16b,v0.16b,v0.16b
1612 + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
1613 + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
1614 +
1615 + ld1 {v16.4s},[x3],#16
1616 + add v17.4s,v17.4s,v5.4s
1617 + orr v2.16b,v0.16b,v0.16b
1618 + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
1619 + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
1620 +
1621 + ld1 {v17.4s},[x3]
1622 + add v16.4s,v16.4s,v6.4s
1623 + sub x3,x3,#64*4-16 // rewind
1624 + orr v2.16b,v0.16b,v0.16b
1625 + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
1626 + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
1627 +
1628 + add v17.4s,v17.4s,v7.4s
1629 + orr v2.16b,v0.16b,v0.16b
1630 + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
1631 + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
1632 +
1633 + add v0.4s,v0.4s,v18.4s
1634 + add v1.4s,v1.4s,v19.4s
1635 +
1636 + cbnz x2,.Loop_hw
1637 +
1638 + st1 {v0.4s,v1.4s},[x0]
1639 +
1640 + ldr x29,[sp],#16
1641 + ret
1642 +.size sha256_block_armv8,.-sha256_block_armv8
1643 +#endif
1644 +#ifdef __KERNEL__
1645 +.globl sha256_block_neon
1646 +#endif
1647 +.type sha256_block_neon,%function
1648 +.align 4
1649 +sha256_block_neon:
1650 +.Lneon_entry:
1651 + stp x29, x30, [sp, #-16]!
1652 + mov x29, sp
1653 + sub sp,sp,#16*4
1654 +
1655 + adr x16,.LK256
1656 + add x2,x1,x2,lsl#6 // len to point at the end of inp
1657 +
1658 + ld1 {v0.16b},[x1], #16
1659 + ld1 {v1.16b},[x1], #16
1660 + ld1 {v2.16b},[x1], #16
1661 + ld1 {v3.16b},[x1], #16
1662 + ld1 {v4.4s},[x16], #16
1663 + ld1 {v5.4s},[x16], #16
1664 + ld1 {v6.4s},[x16], #16
1665 + ld1 {v7.4s},[x16], #16
1666 + rev32 v0.16b,v0.16b // yes, even on
1667 + rev32 v1.16b,v1.16b // big-endian
1668 + rev32 v2.16b,v2.16b
1669 + rev32 v3.16b,v3.16b
1670 + mov x17,sp
1671 + add v4.4s,v4.4s,v0.4s
1672 + add v5.4s,v5.4s,v1.4s
1673 + add v6.4s,v6.4s,v2.4s
1674 + st1 {v4.4s-v5.4s},[x17], #32
1675 + add v7.4s,v7.4s,v3.4s
1676 + st1 {v6.4s-v7.4s},[x17]
1677 + sub x17,x17,#32
1678 +
1679 + ldp w3,w4,[x0]
1680 + ldp w5,w6,[x0,#8]
1681 + ldp w7,w8,[x0,#16]
1682 + ldp w9,w10,[x0,#24]
1683 + ldr w12,[sp,#0]
1684 + mov w13,wzr
1685 + eor w14,w4,w5
1686 + mov w15,wzr
1687 + b .L_00_48
1688 +
1689 +.align 4
1690 +.L_00_48:
1691 + ext v4.16b,v0.16b,v1.16b,#4
1692 + add w10,w10,w12
1693 + add w3,w3,w15
1694 + and w12,w8,w7
1695 + bic w15,w9,w7
1696 + ext v7.16b,v2.16b,v3.16b,#4
1697 + eor w11,w7,w7,ror#5
1698 + add w3,w3,w13
1699 + mov d19,v3.d[1]
1700 + orr w12,w12,w15
1701 + eor w11,w11,w7,ror#19
1702 + ushr v6.4s,v4.4s,#7
1703 + eor w15,w3,w3,ror#11
1704 + ushr v5.4s,v4.4s,#3
1705 + add w10,w10,w12
1706 + add v0.4s,v0.4s,v7.4s
1707 + ror w11,w11,#6
1708 + sli v6.4s,v4.4s,#25
1709 + eor w13,w3,w4
1710 + eor w15,w15,w3,ror#20
1711 + ushr v7.4s,v4.4s,#18
1712 + add w10,w10,w11
1713 + ldr w12,[sp,#4]
1714 + and w14,w14,w13
1715 + eor v5.16b,v5.16b,v6.16b
1716 + ror w15,w15,#2
1717 + add w6,w6,w10
1718 + sli v7.4s,v4.4s,#14
1719 + eor w14,w14,w4
1720 + ushr v16.4s,v19.4s,#17
1721 + add w9,w9,w12
1722 + add w10,w10,w15
1723 + and w12,w7,w6
1724 + eor v5.16b,v5.16b,v7.16b
1725 + bic w15,w8,w6
1726 + eor w11,w6,w6,ror#5
1727 + sli v16.4s,v19.4s,#15
1728 + add w10,w10,w14
1729 + orr w12,w12,w15
1730 + ushr v17.4s,v19.4s,#10
1731 + eor w11,w11,w6,ror#19
1732 + eor w15,w10,w10,ror#11
1733 + ushr v7.4s,v19.4s,#19
1734 + add w9,w9,w12
1735 + ror w11,w11,#6
1736 + add v0.4s,v0.4s,v5.4s
1737 + eor w14,w10,w3
1738 + eor w15,w15,w10,ror#20
1739 + sli v7.4s,v19.4s,#13
1740 + add w9,w9,w11
1741 + ldr w12,[sp,#8]
1742 + and w13,w13,w14
1743 + eor v17.16b,v17.16b,v16.16b
1744 + ror w15,w15,#2
1745 + add w5,w5,w9
1746 + eor w13,w13,w3
1747 + eor v17.16b,v17.16b,v7.16b
1748 + add w8,w8,w12
1749 + add w9,w9,w15
1750 + and w12,w6,w5
1751 + add v0.4s,v0.4s,v17.4s
1752 + bic w15,w7,w5
1753 + eor w11,w5,w5,ror#5
1754 + add w9,w9,w13
1755 + ushr v18.4s,v0.4s,#17
1756 + orr w12,w12,w15
1757 + ushr v19.4s,v0.4s,#10
1758 + eor w11,w11,w5,ror#19
1759 + eor w15,w9,w9,ror#11
1760 + sli v18.4s,v0.4s,#15
1761 + add w8,w8,w12
1762 + ushr v17.4s,v0.4s,#19
1763 + ror w11,w11,#6
1764 + eor w13,w9,w10
1765 + eor v19.16b,v19.16b,v18.16b
1766 + eor w15,w15,w9,ror#20
1767 + add w8,w8,w11
1768 + sli v17.4s,v0.4s,#13
1769 + ldr w12,[sp,#12]
1770 + and w14,w14,w13
1771 + ror w15,w15,#2
1772 + ld1 {v4.4s},[x16], #16
1773 + add w4,w4,w8
1774 + eor v19.16b,v19.16b,v17.16b
1775 + eor w14,w14,w10
1776 + eor v17.16b,v17.16b,v17.16b
1777 + add w7,w7,w12
1778 + add w8,w8,w15
1779 + and w12,w5,w4
1780 + mov v17.d[1],v19.d[0]
1781 + bic w15,w6,w4
1782 + eor w11,w4,w4,ror#5
1783 + add w8,w8,w14
1784 + add v0.4s,v0.4s,v17.4s
1785 + orr w12,w12,w15
1786 + eor w11,w11,w4,ror#19
1787 + eor w15,w8,w8,ror#11
1788 + add v4.4s,v4.4s,v0.4s
1789 + add w7,w7,w12
1790 + ror w11,w11,#6
1791 + eor w14,w8,w9
1792 + eor w15,w15,w8,ror#20
1793 + add w7,w7,w11
1794 + ldr w12,[sp,#16]
1795 + and w13,w13,w14
1796 + ror w15,w15,#2
1797 + add w3,w3,w7
1798 + eor w13,w13,w9
1799 + st1 {v4.4s},[x17], #16
1800 + ext v4.16b,v1.16b,v2.16b,#4
1801 + add w6,w6,w12
1802 + add w7,w7,w15
1803 + and w12,w4,w3
1804 + bic w15,w5,w3
1805 + ext v7.16b,v3.16b,v0.16b,#4
1806 + eor w11,w3,w3,ror#5
1807 + add w7,w7,w13
1808 + mov d19,v0.d[1]
1809 + orr w12,w12,w15
1810 + eor w11,w11,w3,ror#19
1811 + ushr v6.4s,v4.4s,#7
1812 + eor w15,w7,w7,ror#11
1813 + ushr v5.4s,v4.4s,#3
1814 + add w6,w6,w12
1815 + add v1.4s,v1.4s,v7.4s
1816 + ror w11,w11,#6
1817 + sli v6.4s,v4.4s,#25
1818 + eor w13,w7,w8
1819 + eor w15,w15,w7,ror#20
1820 + ushr v7.4s,v4.4s,#18
1821 + add w6,w6,w11
1822 + ldr w12,[sp,#20]
1823 + and w14,w14,w13
1824 + eor v5.16b,v5.16b,v6.16b
1825 + ror w15,w15,#2
1826 + add w10,w10,w6
1827 + sli v7.4s,v4.4s,#14
1828 + eor w14,w14,w8
1829 + ushr v16.4s,v19.4s,#17
1830 + add w5,w5,w12
1831 + add w6,w6,w15
1832 + and w12,w3,w10
1833 + eor v5.16b,v5.16b,v7.16b
1834 + bic w15,w4,w10
1835 + eor w11,w10,w10,ror#5
1836 + sli v16.4s,v19.4s,#15
1837 + add w6,w6,w14
1838 + orr w12,w12,w15
1839 + ushr v17.4s,v19.4s,#10
1840 + eor w11,w11,w10,ror#19
1841 + eor w15,w6,w6,ror#11
1842 + ushr v7.4s,v19.4s,#19
1843 + add w5,w5,w12
1844 + ror w11,w11,#6
1845 + add v1.4s,v1.4s,v5.4s
1846 + eor w14,w6,w7
1847 + eor w15,w15,w6,ror#20
1848 + sli v7.4s,v19.4s,#13
1849 + add w5,w5,w11
1850 + ldr w12,[sp,#24]
1851 + and w13,w13,w14
1852 + eor v17.16b,v17.16b,v16.16b
1853 + ror w15,w15,#2
1854 + add w9,w9,w5
1855 + eor w13,w13,w7
1856 + eor v17.16b,v17.16b,v7.16b
1857 + add w4,w4,w12
1858 + add w5,w5,w15
1859 + and w12,w10,w9
1860 + add v1.4s,v1.4s,v17.4s
1861 + bic w15,w3,w9
1862 + eor w11,w9,w9,ror#5
1863 + add w5,w5,w13
1864 + ushr v18.4s,v1.4s,#17
1865 + orr w12,w12,w15
1866 + ushr v19.4s,v1.4s,#10
1867 + eor w11,w11,w9,ror#19
1868 + eor w15,w5,w5,ror#11
1869 + sli v18.4s,v1.4s,#15
1870 + add w4,w4,w12
1871 + ushr v17.4s,v1.4s,#19
1872 + ror w11,w11,#6
1873 + eor w13,w5,w6
1874 + eor v19.16b,v19.16b,v18.16b
1875 + eor w15,w15,w5,ror#20
1876 + add w4,w4,w11
1877 + sli v17.4s,v1.4s,#13
1878 + ldr w12,[sp,#28]
1879 + and w14,w14,w13
1880 + ror w15,w15,#2
1881 + ld1 {v4.4s},[x16], #16
1882 + add w8,w8,w4
1883 + eor v19.16b,v19.16b,v17.16b
1884 + eor w14,w14,w6
1885 + eor v17.16b,v17.16b,v17.16b
1886 + add w3,w3,w12
1887 + add w4,w4,w15
1888 + and w12,w9,w8
1889 + mov v17.d[1],v19.d[0]
1890 + bic w15,w10,w8
1891 + eor w11,w8,w8,ror#5
1892 + add w4,w4,w14
1893 + add v1.4s,v1.4s,v17.4s
1894 + orr w12,w12,w15
1895 + eor w11,w11,w8,ror#19
1896 + eor w15,w4,w4,ror#11
1897 + add v4.4s,v4.4s,v1.4s
1898 + add w3,w3,w12
1899 + ror w11,w11,#6
1900 + eor w14,w4,w5
1901 + eor w15,w15,w4,ror#20
1902 + add w3,w3,w11
1903 + ldr w12,[sp,#32]
1904 + and w13,w13,w14
1905 + ror w15,w15,#2
1906 + add w7,w7,w3
1907 + eor w13,w13,w5
1908 + st1 {v4.4s},[x17], #16
1909 + ext v4.16b,v2.16b,v3.16b,#4
1910 + add w10,w10,w12
1911 + add w3,w3,w15
1912 + and w12,w8,w7
1913 + bic w15,w9,w7
1914 + ext v7.16b,v0.16b,v1.16b,#4
1915 + eor w11,w7,w7,ror#5
1916 + add w3,w3,w13
1917 + mov d19,v1.d[1]
1918 + orr w12,w12,w15
1919 + eor w11,w11,w7,ror#19
1920 + ushr v6.4s,v4.4s,#7
1921 + eor w15,w3,w3,ror#11
1922 + ushr v5.4s,v4.4s,#3
1923 + add w10,w10,w12
1924 + add v2.4s,v2.4s,v7.4s
1925 + ror w11,w11,#6
1926 + sli v6.4s,v4.4s,#25
1927 + eor w13,w3,w4
1928 + eor w15,w15,w3,ror#20
1929 + ushr v7.4s,v4.4s,#18
1930 + add w10,w10,w11
1931 + ldr w12,[sp,#36]
1932 + and w14,w14,w13
1933 + eor v5.16b,v5.16b,v6.16b
1934 + ror w15,w15,#2
1935 + add w6,w6,w10
1936 + sli v7.4s,v4.4s,#14
1937 + eor w14,w14,w4
1938 + ushr v16.4s,v19.4s,#17
1939 + add w9,w9,w12
1940 + add w10,w10,w15
1941 + and w12,w7,w6
1942 + eor v5.16b,v5.16b,v7.16b
1943 + bic w15,w8,w6
1944 + eor w11,w6,w6,ror#5
1945 + sli v16.4s,v19.4s,#15
1946 + add w10,w10,w14
1947 + orr w12,w12,w15
1948 + ushr v17.4s,v19.4s,#10
1949 + eor w11,w11,w6,ror#19
1950 + eor w15,w10,w10,ror#11
1951 + ushr v7.4s,v19.4s,#19
1952 + add w9,w9,w12
1953 + ror w11,w11,#6
1954 + add v2.4s,v2.4s,v5.4s
1955 + eor w14,w10,w3
1956 + eor w15,w15,w10,ror#20
1957 + sli v7.4s,v19.4s,#13
1958 + add w9,w9,w11
1959 + ldr w12,[sp,#40]
1960 + and w13,w13,w14
1961 + eor v17.16b,v17.16b,v16.16b
1962 + ror w15,w15,#2
1963 + add w5,w5,w9
1964 + eor w13,w13,w3
1965 + eor v17.16b,v17.16b,v7.16b
1966 + add w8,w8,w12
1967 + add w9,w9,w15
1968 + and w12,w6,w5
1969 + add v2.4s,v2.4s,v17.4s
1970 + bic w15,w7,w5
1971 + eor w11,w5,w5,ror#5
1972 + add w9,w9,w13
1973 + ushr v18.4s,v2.4s,#17
1974 + orr w12,w12,w15
1975 + ushr v19.4s,v2.4s,#10
1976 + eor w11,w11,w5,ror#19
1977 + eor w15,w9,w9,ror#11
1978 + sli v18.4s,v2.4s,#15
1979 + add w8,w8,w12
1980 + ushr v17.4s,v2.4s,#19
1981 + ror w11,w11,#6
1982 + eor w13,w9,w10
1983 + eor v19.16b,v19.16b,v18.16b
1984 + eor w15,w15,w9,ror#20
1985 + add w8,w8,w11
1986 + sli v17.4s,v2.4s,#13
1987 + ldr w12,[sp,#44]
1988 + and w14,w14,w13
1989 + ror w15,w15,#2
1990 + ld1 {v4.4s},[x16], #16
1991 + add w4,w4,w8
1992 + eor v19.16b,v19.16b,v17.16b
1993 + eor w14,w14,w10
1994 + eor v17.16b,v17.16b,v17.16b
1995 + add w7,w7,w12
1996 + add w8,w8,w15
1997 + and w12,w5,w4
1998 + mov v17.d[1],v19.d[0]
1999 + bic w15,w6,w4
2000 + eor w11,w4,w4,ror#5
2001 + add w8,w8,w14
2002 + add v2.4s,v2.4s,v17.4s
2003 + orr w12,w12,w15
2004 + eor w11,w11,w4,ror#19
2005 + eor w15,w8,w8,ror#11
2006 + add v4.4s,v4.4s,v2.4s
2007 + add w7,w7,w12
2008 + ror w11,w11,#6
2009 + eor w14,w8,w9
2010 + eor w15,w15,w8,ror#20
2011 + add w7,w7,w11
2012 + ldr w12,[sp,#48]
2013 + and w13,w13,w14
2014 + ror w15,w15,#2
2015 + add w3,w3,w7
2016 + eor w13,w13,w9
2017 + st1 {v4.4s},[x17], #16
2018 + ext v4.16b,v3.16b,v0.16b,#4
2019 + add w6,w6,w12
2020 + add w7,w7,w15
2021 + and w12,w4,w3
2022 + bic w15,w5,w3
2023 + ext v7.16b,v1.16b,v2.16b,#4
2024 + eor w11,w3,w3,ror#5
2025 + add w7,w7,w13
2026 + mov d19,v2.d[1]
2027 + orr w12,w12,w15
2028 + eor w11,w11,w3,ror#19
2029 + ushr v6.4s,v4.4s,#7
2030 + eor w15,w7,w7,ror#11
2031 + ushr v5.4s,v4.4s,#3
2032 + add w6,w6,w12
2033 + add v3.4s,v3.4s,v7.4s
2034 + ror w11,w11,#6
2035 + sli v6.4s,v4.4s,#25
2036 + eor w13,w7,w8
2037 + eor w15,w15,w7,ror#20
2038 + ushr v7.4s,v4.4s,#18
2039 + add w6,w6,w11
2040 + ldr w12,[sp,#52]
2041 + and w14,w14,w13
2042 + eor v5.16b,v5.16b,v6.16b
2043 + ror w15,w15,#2
2044 + add w10,w10,w6
2045 + sli v7.4s,v4.4s,#14
2046 + eor w14,w14,w8
2047 + ushr v16.4s,v19.4s,#17
2048 + add w5,w5,w12
2049 + add w6,w6,w15
2050 + and w12,w3,w10
2051 + eor v5.16b,v5.16b,v7.16b
2052 + bic w15,w4,w10
2053 + eor w11,w10,w10,ror#5
2054 + sli v16.4s,v19.4s,#15
2055 + add w6,w6,w14
2056 + orr w12,w12,w15
2057 + ushr v17.4s,v19.4s,#10
2058 + eor w11,w11,w10,ror#19
2059 + eor w15,w6,w6,ror#11
2060 + ushr v7.4s,v19.4s,#19
2061 + add w5,w5,w12
2062 + ror w11,w11,#6
2063 + add v3.4s,v3.4s,v5.4s
2064 + eor w14,w6,w7
2065 + eor w15,w15,w6,ror#20
2066 + sli v7.4s,v19.4s,#13
2067 + add w5,w5,w11
2068 + ldr w12,[sp,#56]
2069 + and w13,w13,w14
2070 + eor v17.16b,v17.16b,v16.16b
2071 + ror w15,w15,#2
2072 + add w9,w9,w5
2073 + eor w13,w13,w7
2074 + eor v17.16b,v17.16b,v7.16b
2075 + add w4,w4,w12
2076 + add w5,w5,w15
2077 + and w12,w10,w9
2078 + add v3.4s,v3.4s,v17.4s
2079 + bic w15,w3,w9
2080 + eor w11,w9,w9,ror#5
2081 + add w5,w5,w13
2082 + ushr v18.4s,v3.4s,#17
2083 + orr w12,w12,w15
2084 + ushr v19.4s,v3.4s,#10
2085 + eor w11,w11,w9,ror#19
2086 + eor w15,w5,w5,ror#11
2087 + sli v18.4s,v3.4s,#15
2088 + add w4,w4,w12
2089 + ushr v17.4s,v3.4s,#19
2090 + ror w11,w11,#6
2091 + eor w13,w5,w6
2092 + eor v19.16b,v19.16b,v18.16b
2093 + eor w15,w15,w5,ror#20
2094 + add w4,w4,w11
2095 + sli v17.4s,v3.4s,#13
2096 + ldr w12,[sp,#60]
2097 + and w14,w14,w13
2098 + ror w15,w15,#2
2099 + ld1 {v4.4s},[x16], #16
2100 + add w8,w8,w4
2101 + eor v19.16b,v19.16b,v17.16b
2102 + eor w14,w14,w6
2103 + eor v17.16b,v17.16b,v17.16b
2104 + add w3,w3,w12
2105 + add w4,w4,w15
2106 + and w12,w9,w8
2107 + mov v17.d[1],v19.d[0]
2108 + bic w15,w10,w8
2109 + eor w11,w8,w8,ror#5
2110 + add w4,w4,w14
2111 + add v3.4s,v3.4s,v17.4s
2112 + orr w12,w12,w15
2113 + eor w11,w11,w8,ror#19
2114 + eor w15,w4,w4,ror#11
2115 + add v4.4s,v4.4s,v3.4s
2116 + add w3,w3,w12
2117 + ror w11,w11,#6
2118 + eor w14,w4,w5
2119 + eor w15,w15,w4,ror#20
2120 + add w3,w3,w11
2121 + ldr w12,[x16]
2122 + and w13,w13,w14
2123 + ror w15,w15,#2
2124 + add w7,w7,w3
2125 + eor w13,w13,w5
2126 + st1 {v4.4s},[x17], #16
2127 + cmp w12,#0 // check for K256 terminator
2128 + ldr w12,[sp,#0]
2129 + sub x17,x17,#64
2130 + bne .L_00_48
2131 +
2132 + sub x16,x16,#256 // rewind x16
2133 + cmp x1,x2
2134 + mov x17, #64
2135 + csel x17, x17, xzr, eq
2136 + sub x1,x1,x17 // avoid SEGV
2137 + mov x17,sp
2138 + add w10,w10,w12
2139 + add w3,w3,w15
2140 + and w12,w8,w7
2141 + ld1 {v0.16b},[x1],#16
2142 + bic w15,w9,w7
2143 + eor w11,w7,w7,ror#5
2144 + ld1 {v4.4s},[x16],#16
2145 + add w3,w3,w13
2146 + orr w12,w12,w15
2147 + eor w11,w11,w7,ror#19
2148 + eor w15,w3,w3,ror#11
2149 + rev32 v0.16b,v0.16b
2150 + add w10,w10,w12
2151 + ror w11,w11,#6
2152 + eor w13,w3,w4
2153 + eor w15,w15,w3,ror#20
2154 + add v4.4s,v4.4s,v0.4s
2155 + add w10,w10,w11
2156 + ldr w12,[sp,#4]
2157 + and w14,w14,w13
2158 + ror w15,w15,#2
2159 + add w6,w6,w10
2160 + eor w14,w14,w4
2161 + add w9,w9,w12
2162 + add w10,w10,w15
2163 + and w12,w7,w6
2164 + bic w15,w8,w6
2165 + eor w11,w6,w6,ror#5
2166 + add w10,w10,w14
2167 + orr w12,w12,w15
2168 + eor w11,w11,w6,ror#19
2169 + eor w15,w10,w10,ror#11
2170 + add w9,w9,w12
2171 + ror w11,w11,#6
2172 + eor w14,w10,w3
2173 + eor w15,w15,w10,ror#20
2174 + add w9,w9,w11
2175 + ldr w12,[sp,#8]
2176 + and w13,w13,w14
2177 + ror w15,w15,#2
2178 + add w5,w5,w9
2179 + eor w13,w13,w3
2180 + add w8,w8,w12
2181 + add w9,w9,w15
2182 + and w12,w6,w5
2183 + bic w15,w7,w5
2184 + eor w11,w5,w5,ror#5
2185 + add w9,w9,w13
2186 + orr w12,w12,w15
2187 + eor w11,w11,w5,ror#19
2188 + eor w15,w9,w9,ror#11
2189 + add w8,w8,w12
2190 + ror w11,w11,#6
2191 + eor w13,w9,w10
2192 + eor w15,w15,w9,ror#20
2193 + add w8,w8,w11
2194 + ldr w12,[sp,#12]
2195 + and w14,w14,w13
2196 + ror w15,w15,#2
2197 + add w4,w4,w8
2198 + eor w14,w14,w10
2199 + add w7,w7,w12
2200 + add w8,w8,w15
2201 + and w12,w5,w4
2202 + bic w15,w6,w4
2203 + eor w11,w4,w4,ror#5
2204 + add w8,w8,w14
2205 + orr w12,w12,w15
2206 + eor w11,w11,w4,ror#19
2207 + eor w15,w8,w8,ror#11
2208 + add w7,w7,w12
2209 + ror w11,w11,#6
2210 + eor w14,w8,w9
2211 + eor w15,w15,w8,ror#20
2212 + add w7,w7,w11
2213 + ldr w12,[sp,#16]
2214 + and w13,w13,w14
2215 + ror w15,w15,#2
2216 + add w3,w3,w7
2217 + eor w13,w13,w9
2218 + st1 {v4.4s},[x17], #16
2219 + add w6,w6,w12
2220 + add w7,w7,w15
2221 + and w12,w4,w3
2222 + ld1 {v1.16b},[x1],#16
2223 + bic w15,w5,w3
2224 + eor w11,w3,w3,ror#5
2225 + ld1 {v4.4s},[x16],#16
2226 + add w7,w7,w13
2227 + orr w12,w12,w15
2228 + eor w11,w11,w3,ror#19
2229 + eor w15,w7,w7,ror#11
2230 + rev32 v1.16b,v1.16b
2231 + add w6,w6,w12
2232 + ror w11,w11,#6
2233 + eor w13,w7,w8
2234 + eor w15,w15,w7,ror#20
2235 + add v4.4s,v4.4s,v1.4s
2236 + add w6,w6,w11
2237 + ldr w12,[sp,#20]
2238 + and w14,w14,w13
2239 + ror w15,w15,#2
2240 + add w10,w10,w6
2241 + eor w14,w14,w8
2242 + add w5,w5,w12
2243 + add w6,w6,w15
2244 + and w12,w3,w10
2245 + bic w15,w4,w10
2246 + eor w11,w10,w10,ror#5
2247 + add w6,w6,w14
2248 + orr w12,w12,w15
2249 + eor w11,w11,w10,ror#19
2250 + eor w15,w6,w6,ror#11
2251 + add w5,w5,w12
2252 + ror w11,w11,#6
2253 + eor w14,w6,w7
2254 + eor w15,w15,w6,ror#20
2255 + add w5,w5,w11
2256 + ldr w12,[sp,#24]
2257 + and w13,w13,w14
2258 + ror w15,w15,#2
2259 + add w9,w9,w5
2260 + eor w13,w13,w7
2261 + add w4,w4,w12
2262 + add w5,w5,w15
2263 + and w12,w10,w9
2264 + bic w15,w3,w9
2265 + eor w11,w9,w9,ror#5
2266 + add w5,w5,w13
2267 + orr w12,w12,w15
2268 + eor w11,w11,w9,ror#19
2269 + eor w15,w5,w5,ror#11
2270 + add w4,w4,w12
2271 + ror w11,w11,#6
2272 + eor w13,w5,w6
2273 + eor w15,w15,w5,ror#20
2274 + add w4,w4,w11
2275 + ldr w12,[sp,#28]
2276 + and w14,w14,w13
2277 + ror w15,w15,#2
2278 + add w8,w8,w4
2279 + eor w14,w14,w6
2280 + add w3,w3,w12
2281 + add w4,w4,w15
2282 + and w12,w9,w8
2283 + bic w15,w10,w8
2284 + eor w11,w8,w8,ror#5
2285 + add w4,w4,w14
2286 + orr w12,w12,w15
2287 + eor w11,w11,w8,ror#19
2288 + eor w15,w4,w4,ror#11
2289 + add w3,w3,w12
2290 + ror w11,w11,#6
2291 + eor w14,w4,w5
2292 + eor w15,w15,w4,ror#20
2293 + add w3,w3,w11
2294 + ldr w12,[sp,#32]
2295 + and w13,w13,w14
2296 + ror w15,w15,#2
2297 + add w7,w7,w3
2298 + eor w13,w13,w5
2299 + st1 {v4.4s},[x17], #16
2300 + add w10,w10,w12
2301 + add w3,w3,w15
2302 + and w12,w8,w7
2303 + ld1 {v2.16b},[x1],#16
2304 + bic w15,w9,w7
2305 + eor w11,w7,w7,ror#5
2306 + ld1 {v4.4s},[x16],#16
2307 + add w3,w3,w13
2308 + orr w12,w12,w15
2309 + eor w11,w11,w7,ror#19
2310 + eor w15,w3,w3,ror#11
2311 + rev32 v2.16b,v2.16b
2312 + add w10,w10,w12
2313 + ror w11,w11,#6
2314 + eor w13,w3,w4
2315 + eor w15,w15,w3,ror#20
2316 + add v4.4s,v4.4s,v2.4s
2317 + add w10,w10,w11
2318 + ldr w12,[sp,#36]
2319 + and w14,w14,w13
2320 + ror w15,w15,#2
2321 + add w6,w6,w10
2322 + eor w14,w14,w4
2323 + add w9,w9,w12
2324 + add w10,w10,w15
2325 + and w12,w7,w6
2326 + bic w15,w8,w6
2327 + eor w11,w6,w6,ror#5
2328 + add w10,w10,w14
2329 + orr w12,w12,w15
2330 + eor w11,w11,w6,ror#19
2331 + eor w15,w10,w10,ror#11
2332 + add w9,w9,w12
2333 + ror w11,w11,#6
2334 + eor w14,w10,w3
2335 + eor w15,w15,w10,ror#20
2336 + add w9,w9,w11
2337 + ldr w12,[sp,#40]
2338 + and w13,w13,w14
2339 + ror w15,w15,#2
2340 + add w5,w5,w9
2341 + eor w13,w13,w3
2342 + add w8,w8,w12
2343 + add w9,w9,w15
2344 + and w12,w6,w5
2345 + bic w15,w7,w5
2346 + eor w11,w5,w5,ror#5
2347 + add w9,w9,w13
2348 + orr w12,w12,w15
2349 + eor w11,w11,w5,ror#19
2350 + eor w15,w9,w9,ror#11
2351 + add w8,w8,w12
2352 + ror w11,w11,#6
2353 + eor w13,w9,w10
2354 + eor w15,w15,w9,ror#20
2355 + add w8,w8,w11
2356 + ldr w12,[sp,#44]
2357 + and w14,w14,w13
2358 + ror w15,w15,#2
2359 + add w4,w4,w8
2360 + eor w14,w14,w10
2361 + add w7,w7,w12
2362 + add w8,w8,w15
2363 + and w12,w5,w4
2364 + bic w15,w6,w4
2365 + eor w11,w4,w4,ror#5
2366 + add w8,w8,w14
2367 + orr w12,w12,w15
2368 + eor w11,w11,w4,ror#19
2369 + eor w15,w8,w8,ror#11
2370 + add w7,w7,w12
2371 + ror w11,w11,#6
2372 + eor w14,w8,w9
2373 + eor w15,w15,w8,ror#20
2374 + add w7,w7,w11
2375 + ldr w12,[sp,#48]
2376 + and w13,w13,w14
2377 + ror w15,w15,#2
2378 + add w3,w3,w7
2379 + eor w13,w13,w9
2380 + st1 {v4.4s},[x17], #16
2381 + add w6,w6,w12
2382 + add w7,w7,w15
2383 + and w12,w4,w3
2384 + ld1 {v3.16b},[x1],#16
2385 + bic w15,w5,w3
2386 + eor w11,w3,w3,ror#5
2387 + ld1 {v4.4s},[x16],#16
2388 + add w7,w7,w13
2389 + orr w12,w12,w15
2390 + eor w11,w11,w3,ror#19
2391 + eor w15,w7,w7,ror#11
2392 + rev32 v3.16b,v3.16b
2393 + add w6,w6,w12
2394 + ror w11,w11,#6
2395 + eor w13,w7,w8
2396 + eor w15,w15,w7,ror#20
2397 + add v4.4s,v4.4s,v3.4s
2398 + add w6,w6,w11
2399 + ldr w12,[sp,#52]
2400 + and w14,w14,w13
2401 + ror w15,w15,#2
2402 + add w10,w10,w6
2403 + eor w14,w14,w8
2404 + add w5,w5,w12
2405 + add w6,w6,w15
2406 + and w12,w3,w10
2407 + bic w15,w4,w10
2408 + eor w11,w10,w10,ror#5
2409 + add w6,w6,w14
2410 + orr w12,w12,w15
2411 + eor w11,w11,w10,ror#19
2412 + eor w15,w6,w6,ror#11
2413 + add w5,w5,w12
2414 + ror w11,w11,#6
2415 + eor w14,w6,w7
2416 + eor w15,w15,w6,ror#20
2417 + add w5,w5,w11
2418 + ldr w12,[sp,#56]
2419 + and w13,w13,w14
2420 + ror w15,w15,#2
2421 + add w9,w9,w5
2422 + eor w13,w13,w7
2423 + add w4,w4,w12
2424 + add w5,w5,w15
2425 + and w12,w10,w9
2426 + bic w15,w3,w9
2427 + eor w11,w9,w9,ror#5
2428 + add w5,w5,w13
2429 + orr w12,w12,w15
2430 + eor w11,w11,w9,ror#19
2431 + eor w15,w5,w5,ror#11
2432 + add w4,w4,w12
2433 + ror w11,w11,#6
2434 + eor w13,w5,w6
2435 + eor w15,w15,w5,ror#20
2436 + add w4,w4,w11
2437 + ldr w12,[sp,#60]
2438 + and w14,w14,w13
2439 + ror w15,w15,#2
2440 + add w8,w8,w4
2441 + eor w14,w14,w6
2442 + add w3,w3,w12
2443 + add w4,w4,w15
2444 + and w12,w9,w8
2445 + bic w15,w10,w8
2446 + eor w11,w8,w8,ror#5
2447 + add w4,w4,w14
2448 + orr w12,w12,w15
2449 + eor w11,w11,w8,ror#19
2450 + eor w15,w4,w4,ror#11
2451 + add w3,w3,w12
2452 + ror w11,w11,#6
2453 + eor w14,w4,w5
2454 + eor w15,w15,w4,ror#20
2455 + add w3,w3,w11
2456 + and w13,w13,w14
2457 + ror w15,w15,#2
2458 + add w7,w7,w3
2459 + eor w13,w13,w5
2460 + st1 {v4.4s},[x17], #16
2461 + add w3,w3,w15 // h+=Sigma0(a) from the past
2462 + ldp w11,w12,[x0,#0]
2463 + add w3,w3,w13 // h+=Maj(a,b,c) from the past
2464 + ldp w13,w14,[x0,#8]
2465 + add w3,w3,w11 // accumulate
2466 + add w4,w4,w12
2467 + ldp w11,w12,[x0,#16]
2468 + add w5,w5,w13
2469 + add w6,w6,w14
2470 + ldp w13,w14,[x0,#24]
2471 + add w7,w7,w11
2472 + add w8,w8,w12
2473 + ldr w12,[sp,#0]
2474 + stp w3,w4,[x0,#0]
2475 + add w9,w9,w13
2476 + mov w13,wzr
2477 + stp w5,w6,[x0,#8]
2478 + add w10,w10,w14
2479 + stp w7,w8,[x0,#16]
2480 + eor w14,w4,w5
2481 + stp w9,w10,[x0,#24]
2482 + mov w15,wzr
2483 + mov x17,sp
2484 + b.ne .L_00_48
2485 +
2486 + ldr x29,[x29]
2487 + add sp,sp,#16*4+16
2488 + ret
2489 +.size sha256_block_neon,.-sha256_block_neon
2490 +#ifndef __KERNEL__
2491 +.comm OPENSSL_armcap_P,4,4
2492 +#endif
2493 diff --git a/arch/arm64/crypto/sha512-core.S b/arch/arm64/crypto/sha512-core.S
2494 new file mode 100644
2495 index 000000000000..bd0f59f06c9d
2496 --- /dev/null
2497 +++ b/arch/arm64/crypto/sha512-core.S
2498 @@ -0,0 +1,1085 @@
2499 +// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
2500 +//
2501 +// Licensed under the OpenSSL license (the "License"). You may not use
2502 +// this file except in compliance with the License. You can obtain a copy
2503 +// in the file LICENSE in the source distribution or at
2504 +// https://www.openssl.org/source/license.html
2505 +
2506 +// ====================================================================
2507 +// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
2508 +// project. The module is, however, dual licensed under OpenSSL and
2509 +// CRYPTOGAMS licenses depending on where you obtain it. For further
2510 +// details see http://www.openssl.org/~appro/cryptogams/.
2511 +//
2512 +// Permission to use under GPLv2 terms is granted.
2513 +// ====================================================================
2514 +//
2515 +// SHA256/512 for ARMv8.
2516 +//
2517 +// Performance in cycles per processed byte and improvement coefficient
2518 +// over code generated with "default" compiler:
2519 +//
2520 +// SHA256-hw SHA256(*) SHA512
2521 +// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**))
2522 +// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***))
2523 +// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***))
2524 +// Denver 2.01 10.5 (+26%) 6.70 (+8%)
2525 +// X-Gene 20.0 (+100%) 12.8 (+300%(***))
2526 +// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
2527 +//
2528 +// (*) Software SHA256 results are of lesser relevance, presented
2529 +// mostly for informational purposes.
2530 +// (**) The result is a trade-off: it's possible to improve it by
2531 +// 10% (or by 1 cycle per round), but at the cost of 20% loss
2532 +// on Cortex-A53 (or by 4 cycles per round).
2533 +// (***) Super-impressive coefficients over gcc-generated code are
2534 +// indication of some compiler "pathology", most notably code
2535 +// generated with -mgeneral-regs-only is significanty faster
2536 +// and the gap is only 40-90%.
2537 +//
2538 +// October 2016.
2539 +//
2540 +// Originally it was reckoned that it makes no sense to implement NEON
2541 +// version of SHA256 for 64-bit processors. This is because performance
2542 +// improvement on most wide-spread Cortex-A5x processors was observed
2543 +// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
2544 +// observed that 32-bit NEON SHA256 performs significantly better than
2545 +// 64-bit scalar version on *some* of the more recent processors. As
2546 +// result 64-bit NEON version of SHA256 was added to provide best
2547 +// all-round performance. For example it executes ~30% faster on X-Gene
2548 +// and Mongoose. [For reference, NEON version of SHA512 is bound to
2549 +// deliver much less improvement, likely *negative* on Cortex-A5x.
2550 +// Which is why NEON support is limited to SHA256.]
2551 +
2552 +#ifndef __KERNEL__
2553 +# include "arm_arch.h"
2554 +#endif
2555 +
2556 +.text
2557 +
2558 +.extern OPENSSL_armcap_P
2559 +.globl sha512_block_data_order
2560 +.type sha512_block_data_order,%function
2561 +.align 6
2562 +sha512_block_data_order:
2563 + stp x29,x30,[sp,#-128]!
2564 + add x29,sp,#0
2565 +
2566 + stp x19,x20,[sp,#16]
2567 + stp x21,x22,[sp,#32]
2568 + stp x23,x24,[sp,#48]
2569 + stp x25,x26,[sp,#64]
2570 + stp x27,x28,[sp,#80]
2571 + sub sp,sp,#4*8
2572 +
2573 + ldp x20,x21,[x0] // load context
2574 + ldp x22,x23,[x0,#2*8]
2575 + ldp x24,x25,[x0,#4*8]
2576 + add x2,x1,x2,lsl#7 // end of input
2577 + ldp x26,x27,[x0,#6*8]
2578 + adr x30,.LK512
2579 + stp x0,x2,[x29,#96]
2580 +
2581 +.Loop:
2582 + ldp x3,x4,[x1],#2*8
2583 + ldr x19,[x30],#8 // *K++
2584 + eor x28,x21,x22 // magic seed
2585 + str x1,[x29,#112]
2586 +#ifndef __AARCH64EB__
2587 + rev x3,x3 // 0
2588 +#endif
2589 + ror x16,x24,#14
2590 + add x27,x27,x19 // h+=K[i]
2591 + eor x6,x24,x24,ror#23
2592 + and x17,x25,x24
2593 + bic x19,x26,x24
2594 + add x27,x27,x3 // h+=X[i]
2595 + orr x17,x17,x19 // Ch(e,f,g)
2596 + eor x19,x20,x21 // a^b, b^c in next round
2597 + eor x16,x16,x6,ror#18 // Sigma1(e)
2598 + ror x6,x20,#28
2599 + add x27,x27,x17 // h+=Ch(e,f,g)
2600 + eor x17,x20,x20,ror#5
2601 + add x27,x27,x16 // h+=Sigma1(e)
2602 + and x28,x28,x19 // (b^c)&=(a^b)
2603 + add x23,x23,x27 // d+=h
2604 + eor x28,x28,x21 // Maj(a,b,c)
2605 + eor x17,x6,x17,ror#34 // Sigma0(a)
2606 + add x27,x27,x28 // h+=Maj(a,b,c)
2607 + ldr x28,[x30],#8 // *K++, x19 in next round
2608 + //add x27,x27,x17 // h+=Sigma0(a)
2609 +#ifndef __AARCH64EB__
2610 + rev x4,x4 // 1
2611 +#endif
2612 + ldp x5,x6,[x1],#2*8
2613 + add x27,x27,x17 // h+=Sigma0(a)
2614 + ror x16,x23,#14
2615 + add x26,x26,x28 // h+=K[i]
2616 + eor x7,x23,x23,ror#23
2617 + and x17,x24,x23
2618 + bic x28,x25,x23
2619 + add x26,x26,x4 // h+=X[i]
2620 + orr x17,x17,x28 // Ch(e,f,g)
2621 + eor x28,x27,x20 // a^b, b^c in next round
2622 + eor x16,x16,x7,ror#18 // Sigma1(e)
2623 + ror x7,x27,#28
2624 + add x26,x26,x17 // h+=Ch(e,f,g)
2625 + eor x17,x27,x27,ror#5
2626 + add x26,x26,x16 // h+=Sigma1(e)
2627 + and x19,x19,x28 // (b^c)&=(a^b)
2628 + add x22,x22,x26 // d+=h
2629 + eor x19,x19,x20 // Maj(a,b,c)
2630 + eor x17,x7,x17,ror#34 // Sigma0(a)
2631 + add x26,x26,x19 // h+=Maj(a,b,c)
2632 + ldr x19,[x30],#8 // *K++, x28 in next round
2633 + //add x26,x26,x17 // h+=Sigma0(a)
2634 +#ifndef __AARCH64EB__
2635 + rev x5,x5 // 2
2636 +#endif
2637 + add x26,x26,x17 // h+=Sigma0(a)
2638 + ror x16,x22,#14
2639 + add x25,x25,x19 // h+=K[i]
2640 + eor x8,x22,x22,ror#23
2641 + and x17,x23,x22
2642 + bic x19,x24,x22
2643 + add x25,x25,x5 // h+=X[i]
2644 + orr x17,x17,x19 // Ch(e,f,g)
2645 + eor x19,x26,x27 // a^b, b^c in next round
2646 + eor x16,x16,x8,ror#18 // Sigma1(e)
2647 + ror x8,x26,#28
2648 + add x25,x25,x17 // h+=Ch(e,f,g)
2649 + eor x17,x26,x26,ror#5
2650 + add x25,x25,x16 // h+=Sigma1(e)
2651 + and x28,x28,x19 // (b^c)&=(a^b)
2652 + add x21,x21,x25 // d+=h
2653 + eor x28,x28,x27 // Maj(a,b,c)
2654 + eor x17,x8,x17,ror#34 // Sigma0(a)
2655 + add x25,x25,x28 // h+=Maj(a,b,c)
2656 + ldr x28,[x30],#8 // *K++, x19 in next round
2657 + //add x25,x25,x17 // h+=Sigma0(a)
2658 +#ifndef __AARCH64EB__
2659 + rev x6,x6 // 3
2660 +#endif
2661 + ldp x7,x8,[x1],#2*8
2662 + add x25,x25,x17 // h+=Sigma0(a)
2663 + ror x16,x21,#14
2664 + add x24,x24,x28 // h+=K[i]
2665 + eor x9,x21,x21,ror#23
2666 + and x17,x22,x21
2667 + bic x28,x23,x21
2668 + add x24,x24,x6 // h+=X[i]
2669 + orr x17,x17,x28 // Ch(e,f,g)
2670 + eor x28,x25,x26 // a^b, b^c in next round
2671 + eor x16,x16,x9,ror#18 // Sigma1(e)
2672 + ror x9,x25,#28
2673 + add x24,x24,x17 // h+=Ch(e,f,g)
2674 + eor x17,x25,x25,ror#5
2675 + add x24,x24,x16 // h+=Sigma1(e)
2676 + and x19,x19,x28 // (b^c)&=(a^b)
2677 + add x20,x20,x24 // d+=h
2678 + eor x19,x19,x26 // Maj(a,b,c)
2679 + eor x17,x9,x17,ror#34 // Sigma0(a)
2680 + add x24,x24,x19 // h+=Maj(a,b,c)
2681 + ldr x19,[x30],#8 // *K++, x28 in next round
2682 + //add x24,x24,x17 // h+=Sigma0(a)
2683 +#ifndef __AARCH64EB__
2684 + rev x7,x7 // 4
2685 +#endif
2686 + add x24,x24,x17 // h+=Sigma0(a)
2687 + ror x16,x20,#14
2688 + add x23,x23,x19 // h+=K[i]
2689 + eor x10,x20,x20,ror#23
2690 + and x17,x21,x20
2691 + bic x19,x22,x20
2692 + add x23,x23,x7 // h+=X[i]
2693 + orr x17,x17,x19 // Ch(e,f,g)
2694 + eor x19,x24,x25 // a^b, b^c in next round
2695 + eor x16,x16,x10,ror#18 // Sigma1(e)
2696 + ror x10,x24,#28
2697 + add x23,x23,x17 // h+=Ch(e,f,g)
2698 + eor x17,x24,x24,ror#5
2699 + add x23,x23,x16 // h+=Sigma1(e)
2700 + and x28,x28,x19 // (b^c)&=(a^b)
2701 + add x27,x27,x23 // d+=h
2702 + eor x28,x28,x25 // Maj(a,b,c)
2703 + eor x17,x10,x17,ror#34 // Sigma0(a)
2704 + add x23,x23,x28 // h+=Maj(a,b,c)
2705 + ldr x28,[x30],#8 // *K++, x19 in next round
2706 + //add x23,x23,x17 // h+=Sigma0(a)
2707 +#ifndef __AARCH64EB__
2708 + rev x8,x8 // 5
2709 +#endif
2710 + ldp x9,x10,[x1],#2*8
2711 + add x23,x23,x17 // h+=Sigma0(a)
2712 + ror x16,x27,#14
2713 + add x22,x22,x28 // h+=K[i]
2714 + eor x11,x27,x27,ror#23
2715 + and x17,x20,x27
2716 + bic x28,x21,x27
2717 + add x22,x22,x8 // h+=X[i]
2718 + orr x17,x17,x28 // Ch(e,f,g)
2719 + eor x28,x23,x24 // a^b, b^c in next round
2720 + eor x16,x16,x11,ror#18 // Sigma1(e)
2721 + ror x11,x23,#28
2722 + add x22,x22,x17 // h+=Ch(e,f,g)
2723 + eor x17,x23,x23,ror#5
2724 + add x22,x22,x16 // h+=Sigma1(e)
2725 + and x19,x19,x28 // (b^c)&=(a^b)
2726 + add x26,x26,x22 // d+=h
2727 + eor x19,x19,x24 // Maj(a,b,c)
2728 + eor x17,x11,x17,ror#34 // Sigma0(a)
2729 + add x22,x22,x19 // h+=Maj(a,b,c)
2730 + ldr x19,[x30],#8 // *K++, x28 in next round
2731 + //add x22,x22,x17 // h+=Sigma0(a)
2732 +#ifndef __AARCH64EB__
2733 + rev x9,x9 // 6
2734 +#endif
2735 + add x22,x22,x17 // h+=Sigma0(a)
2736 + ror x16,x26,#14
2737 + add x21,x21,x19 // h+=K[i]
2738 + eor x12,x26,x26,ror#23
2739 + and x17,x27,x26
2740 + bic x19,x20,x26
2741 + add x21,x21,x9 // h+=X[i]
2742 + orr x17,x17,x19 // Ch(e,f,g)
2743 + eor x19,x22,x23 // a^b, b^c in next round
2744 + eor x16,x16,x12,ror#18 // Sigma1(e)
2745 + ror x12,x22,#28
2746 + add x21,x21,x17 // h+=Ch(e,f,g)
2747 + eor x17,x22,x22,ror#5
2748 + add x21,x21,x16 // h+=Sigma1(e)
2749 + and x28,x28,x19 // (b^c)&=(a^b)
2750 + add x25,x25,x21 // d+=h
2751 + eor x28,x28,x23 // Maj(a,b,c)
2752 + eor x17,x12,x17,ror#34 // Sigma0(a)
2753 + add x21,x21,x28 // h+=Maj(a,b,c)
2754 + ldr x28,[x30],#8 // *K++, x19 in next round
2755 + //add x21,x21,x17 // h+=Sigma0(a)
2756 +#ifndef __AARCH64EB__
2757 + rev x10,x10 // 7
2758 +#endif
2759 + ldp x11,x12,[x1],#2*8
2760 + add x21,x21,x17 // h+=Sigma0(a)
2761 + ror x16,x25,#14
2762 + add x20,x20,x28 // h+=K[i]
2763 + eor x13,x25,x25,ror#23
2764 + and x17,x26,x25
2765 + bic x28,x27,x25
2766 + add x20,x20,x10 // h+=X[i]
2767 + orr x17,x17,x28 // Ch(e,f,g)
2768 + eor x28,x21,x22 // a^b, b^c in next round
2769 + eor x16,x16,x13,ror#18 // Sigma1(e)
2770 + ror x13,x21,#28
2771 + add x20,x20,x17 // h+=Ch(e,f,g)
2772 + eor x17,x21,x21,ror#5
2773 + add x20,x20,x16 // h+=Sigma1(e)
2774 + and x19,x19,x28 // (b^c)&=(a^b)
2775 + add x24,x24,x20 // d+=h
2776 + eor x19,x19,x22 // Maj(a,b,c)
2777 + eor x17,x13,x17,ror#34 // Sigma0(a)
2778 + add x20,x20,x19 // h+=Maj(a,b,c)
2779 + ldr x19,[x30],#8 // *K++, x28 in next round
2780 + //add x20,x20,x17 // h+=Sigma0(a)
2781 +#ifndef __AARCH64EB__
2782 + rev x11,x11 // 8
2783 +#endif
2784 + add x20,x20,x17 // h+=Sigma0(a)
2785 + ror x16,x24,#14
2786 + add x27,x27,x19 // h+=K[i]
2787 + eor x14,x24,x24,ror#23
2788 + and x17,x25,x24
2789 + bic x19,x26,x24
2790 + add x27,x27,x11 // h+=X[i]
2791 + orr x17,x17,x19 // Ch(e,f,g)
2792 + eor x19,x20,x21 // a^b, b^c in next round
2793 + eor x16,x16,x14,ror#18 // Sigma1(e)
2794 + ror x14,x20,#28
2795 + add x27,x27,x17 // h+=Ch(e,f,g)
2796 + eor x17,x20,x20,ror#5
2797 + add x27,x27,x16 // h+=Sigma1(e)
2798 + and x28,x28,x19 // (b^c)&=(a^b)
2799 + add x23,x23,x27 // d+=h
2800 + eor x28,x28,x21 // Maj(a,b,c)
2801 + eor x17,x14,x17,ror#34 // Sigma0(a)
2802 + add x27,x27,x28 // h+=Maj(a,b,c)
2803 + ldr x28,[x30],#8 // *K++, x19 in next round
2804 + //add x27,x27,x17 // h+=Sigma0(a)
2805 +#ifndef __AARCH64EB__
2806 + rev x12,x12 // 9
2807 +#endif
2808 + ldp x13,x14,[x1],#2*8
2809 + add x27,x27,x17 // h+=Sigma0(a)
2810 + ror x16,x23,#14
2811 + add x26,x26,x28 // h+=K[i]
2812 + eor x15,x23,x23,ror#23
2813 + and x17,x24,x23
2814 + bic x28,x25,x23
2815 + add x26,x26,x12 // h+=X[i]
2816 + orr x17,x17,x28 // Ch(e,f,g)
2817 + eor x28,x27,x20 // a^b, b^c in next round
2818 + eor x16,x16,x15,ror#18 // Sigma1(e)
2819 + ror x15,x27,#28
2820 + add x26,x26,x17 // h+=Ch(e,f,g)
2821 + eor x17,x27,x27,ror#5
2822 + add x26,x26,x16 // h+=Sigma1(e)
2823 + and x19,x19,x28 // (b^c)&=(a^b)
2824 + add x22,x22,x26 // d+=h
2825 + eor x19,x19,x20 // Maj(a,b,c)
2826 + eor x17,x15,x17,ror#34 // Sigma0(a)
2827 + add x26,x26,x19 // h+=Maj(a,b,c)
2828 + ldr x19,[x30],#8 // *K++, x28 in next round
2829 + //add x26,x26,x17 // h+=Sigma0(a)
2830 +#ifndef __AARCH64EB__
2831 + rev x13,x13 // 10
2832 +#endif
2833 + add x26,x26,x17 // h+=Sigma0(a)
2834 + ror x16,x22,#14
2835 + add x25,x25,x19 // h+=K[i]
2836 + eor x0,x22,x22,ror#23
2837 + and x17,x23,x22
2838 + bic x19,x24,x22
2839 + add x25,x25,x13 // h+=X[i]
2840 + orr x17,x17,x19 // Ch(e,f,g)
2841 + eor x19,x26,x27 // a^b, b^c in next round
2842 + eor x16,x16,x0,ror#18 // Sigma1(e)
2843 + ror x0,x26,#28
2844 + add x25,x25,x17 // h+=Ch(e,f,g)
2845 + eor x17,x26,x26,ror#5
2846 + add x25,x25,x16 // h+=Sigma1(e)
2847 + and x28,x28,x19 // (b^c)&=(a^b)
2848 + add x21,x21,x25 // d+=h
2849 + eor x28,x28,x27 // Maj(a,b,c)
2850 + eor x17,x0,x17,ror#34 // Sigma0(a)
2851 + add x25,x25,x28 // h+=Maj(a,b,c)
2852 + ldr x28,[x30],#8 // *K++, x19 in next round
2853 + //add x25,x25,x17 // h+=Sigma0(a)
2854 +#ifndef __AARCH64EB__
2855 + rev x14,x14 // 11
2856 +#endif
2857 + ldp x15,x0,[x1],#2*8
2858 + add x25,x25,x17 // h+=Sigma0(a)
2859 + str x6,[sp,#24]
2860 + ror x16,x21,#14
2861 + add x24,x24,x28 // h+=K[i]
2862 + eor x6,x21,x21,ror#23
2863 + and x17,x22,x21
2864 + bic x28,x23,x21
2865 + add x24,x24,x14 // h+=X[i]
2866 + orr x17,x17,x28 // Ch(e,f,g)
2867 + eor x28,x25,x26 // a^b, b^c in next round
2868 + eor x16,x16,x6,ror#18 // Sigma1(e)
2869 + ror x6,x25,#28
2870 + add x24,x24,x17 // h+=Ch(e,f,g)
2871 + eor x17,x25,x25,ror#5
2872 + add x24,x24,x16 // h+=Sigma1(e)
2873 + and x19,x19,x28 // (b^c)&=(a^b)
2874 + add x20,x20,x24 // d+=h
2875 + eor x19,x19,x26 // Maj(a,b,c)
2876 + eor x17,x6,x17,ror#34 // Sigma0(a)
2877 + add x24,x24,x19 // h+=Maj(a,b,c)
2878 + ldr x19,[x30],#8 // *K++, x28 in next round
2879 + //add x24,x24,x17 // h+=Sigma0(a)
2880 +#ifndef __AARCH64EB__
2881 + rev x15,x15 // 12
2882 +#endif
2883 + add x24,x24,x17 // h+=Sigma0(a)
2884 + str x7,[sp,#0]
2885 + ror x16,x20,#14
2886 + add x23,x23,x19 // h+=K[i]
2887 + eor x7,x20,x20,ror#23
2888 + and x17,x21,x20
2889 + bic x19,x22,x20
2890 + add x23,x23,x15 // h+=X[i]
2891 + orr x17,x17,x19 // Ch(e,f,g)
2892 + eor x19,x24,x25 // a^b, b^c in next round
2893 + eor x16,x16,x7,ror#18 // Sigma1(e)
2894 + ror x7,x24,#28
2895 + add x23,x23,x17 // h+=Ch(e,f,g)
2896 + eor x17,x24,x24,ror#5
2897 + add x23,x23,x16 // h+=Sigma1(e)
2898 + and x28,x28,x19 // (b^c)&=(a^b)
2899 + add x27,x27,x23 // d+=h
2900 + eor x28,x28,x25 // Maj(a,b,c)
2901 + eor x17,x7,x17,ror#34 // Sigma0(a)
2902 + add x23,x23,x28 // h+=Maj(a,b,c)
2903 + ldr x28,[x30],#8 // *K++, x19 in next round
2904 + //add x23,x23,x17 // h+=Sigma0(a)
2905 +#ifndef __AARCH64EB__
2906 + rev x0,x0 // 13
2907 +#endif
2908 + ldp x1,x2,[x1]
2909 + add x23,x23,x17 // h+=Sigma0(a)
2910 + str x8,[sp,#8]
2911 + ror x16,x27,#14
2912 + add x22,x22,x28 // h+=K[i]
2913 + eor x8,x27,x27,ror#23
2914 + and x17,x20,x27
2915 + bic x28,x21,x27
2916 + add x22,x22,x0 // h+=X[i]
2917 + orr x17,x17,x28 // Ch(e,f,g)
2918 + eor x28,x23,x24 // a^b, b^c in next round
2919 + eor x16,x16,x8,ror#18 // Sigma1(e)
2920 + ror x8,x23,#28
2921 + add x22,x22,x17 // h+=Ch(e,f,g)
2922 + eor x17,x23,x23,ror#5
2923 + add x22,x22,x16 // h+=Sigma1(e)
2924 + and x19,x19,x28 // (b^c)&=(a^b)
2925 + add x26,x26,x22 // d+=h
2926 + eor x19,x19,x24 // Maj(a,b,c)
2927 + eor x17,x8,x17,ror#34 // Sigma0(a)
2928 + add x22,x22,x19 // h+=Maj(a,b,c)
2929 + ldr x19,[x30],#8 // *K++, x28 in next round
2930 + //add x22,x22,x17 // h+=Sigma0(a)
2931 +#ifndef __AARCH64EB__
2932 + rev x1,x1 // 14
2933 +#endif
2934 + ldr x6,[sp,#24]
2935 + add x22,x22,x17 // h+=Sigma0(a)
2936 + str x9,[sp,#16]
2937 + ror x16,x26,#14
2938 + add x21,x21,x19 // h+=K[i]
2939 + eor x9,x26,x26,ror#23
2940 + and x17,x27,x26
2941 + bic x19,x20,x26
2942 + add x21,x21,x1 // h+=X[i]
2943 + orr x17,x17,x19 // Ch(e,f,g)
2944 + eor x19,x22,x23 // a^b, b^c in next round
2945 + eor x16,x16,x9,ror#18 // Sigma1(e)
2946 + ror x9,x22,#28
2947 + add x21,x21,x17 // h+=Ch(e,f,g)
2948 + eor x17,x22,x22,ror#5
2949 + add x21,x21,x16 // h+=Sigma1(e)
2950 + and x28,x28,x19 // (b^c)&=(a^b)
2951 + add x25,x25,x21 // d+=h
2952 + eor x28,x28,x23 // Maj(a,b,c)
2953 + eor x17,x9,x17,ror#34 // Sigma0(a)
2954 + add x21,x21,x28 // h+=Maj(a,b,c)
2955 + ldr x28,[x30],#8 // *K++, x19 in next round
2956 + //add x21,x21,x17 // h+=Sigma0(a)
2957 +#ifndef __AARCH64EB__
2958 + rev x2,x2 // 15
2959 +#endif
2960 + ldr x7,[sp,#0]
2961 + add x21,x21,x17 // h+=Sigma0(a)
2962 + str x10,[sp,#24]
2963 + ror x16,x25,#14
2964 + add x20,x20,x28 // h+=K[i]
2965 + ror x9,x4,#1
2966 + and x17,x26,x25
2967 + ror x8,x1,#19
2968 + bic x28,x27,x25
2969 + ror x10,x21,#28
2970 + add x20,x20,x2 // h+=X[i]
2971 + eor x16,x16,x25,ror#18
2972 + eor x9,x9,x4,ror#8
2973 + orr x17,x17,x28 // Ch(e,f,g)
2974 + eor x28,x21,x22 // a^b, b^c in next round
2975 + eor x16,x16,x25,ror#41 // Sigma1(e)
2976 + eor x10,x10,x21,ror#34
2977 + add x20,x20,x17 // h+=Ch(e,f,g)
2978 + and x19,x19,x28 // (b^c)&=(a^b)
2979 + eor x8,x8,x1,ror#61
2980 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
2981 + add x20,x20,x16 // h+=Sigma1(e)
2982 + eor x19,x19,x22 // Maj(a,b,c)
2983 + eor x17,x10,x21,ror#39 // Sigma0(a)
2984 + eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
2985 + add x3,x3,x12
2986 + add x24,x24,x20 // d+=h
2987 + add x20,x20,x19 // h+=Maj(a,b,c)
2988 + ldr x19,[x30],#8 // *K++, x28 in next round
2989 + add x3,x3,x9
2990 + add x20,x20,x17 // h+=Sigma0(a)
2991 + add x3,x3,x8
2992 +.Loop_16_xx:
2993 + ldr x8,[sp,#8]
2994 + str x11,[sp,#0]
2995 + ror x16,x24,#14
2996 + add x27,x27,x19 // h+=K[i]
2997 + ror x10,x5,#1
2998 + and x17,x25,x24
2999 + ror x9,x2,#19
3000 + bic x19,x26,x24
3001 + ror x11,x20,#28
3002 + add x27,x27,x3 // h+=X[i]
3003 + eor x16,x16,x24,ror#18
3004 + eor x10,x10,x5,ror#8
3005 + orr x17,x17,x19 // Ch(e,f,g)
3006 + eor x19,x20,x21 // a^b, b^c in next round
3007 + eor x16,x16,x24,ror#41 // Sigma1(e)
3008 + eor x11,x11,x20,ror#34
3009 + add x27,x27,x17 // h+=Ch(e,f,g)
3010 + and x28,x28,x19 // (b^c)&=(a^b)
3011 + eor x9,x9,x2,ror#61
3012 + eor x10,x10,x5,lsr#7 // sigma0(X[i+1])
3013 + add x27,x27,x16 // h+=Sigma1(e)
3014 + eor x28,x28,x21 // Maj(a,b,c)
3015 + eor x17,x11,x20,ror#39 // Sigma0(a)
3016 + eor x9,x9,x2,lsr#6 // sigma1(X[i+14])
3017 + add x4,x4,x13
3018 + add x23,x23,x27 // d+=h
3019 + add x27,x27,x28 // h+=Maj(a,b,c)
3020 + ldr x28,[x30],#8 // *K++, x19 in next round
3021 + add x4,x4,x10
3022 + add x27,x27,x17 // h+=Sigma0(a)
3023 + add x4,x4,x9
3024 + ldr x9,[sp,#16]
3025 + str x12,[sp,#8]
3026 + ror x16,x23,#14
3027 + add x26,x26,x28 // h+=K[i]
3028 + ror x11,x6,#1
3029 + and x17,x24,x23
3030 + ror x10,x3,#19
3031 + bic x28,x25,x23
3032 + ror x12,x27,#28
3033 + add x26,x26,x4 // h+=X[i]
3034 + eor x16,x16,x23,ror#18
3035 + eor x11,x11,x6,ror#8
3036 + orr x17,x17,x28 // Ch(e,f,g)
3037 + eor x28,x27,x20 // a^b, b^c in next round
3038 + eor x16,x16,x23,ror#41 // Sigma1(e)
3039 + eor x12,x12,x27,ror#34
3040 + add x26,x26,x17 // h+=Ch(e,f,g)
3041 + and x19,x19,x28 // (b^c)&=(a^b)
3042 + eor x10,x10,x3,ror#61
3043 + eor x11,x11,x6,lsr#7 // sigma0(X[i+1])
3044 + add x26,x26,x16 // h+=Sigma1(e)
3045 + eor x19,x19,x20 // Maj(a,b,c)
3046 + eor x17,x12,x27,ror#39 // Sigma0(a)
3047 + eor x10,x10,x3,lsr#6 // sigma1(X[i+14])
3048 + add x5,x5,x14
3049 + add x22,x22,x26 // d+=h
3050 + add x26,x26,x19 // h+=Maj(a,b,c)
3051 + ldr x19,[x30],#8 // *K++, x28 in next round
3052 + add x5,x5,x11
3053 + add x26,x26,x17 // h+=Sigma0(a)
3054 + add x5,x5,x10
3055 + ldr x10,[sp,#24]
3056 + str x13,[sp,#16]
3057 + ror x16,x22,#14
3058 + add x25,x25,x19 // h+=K[i]
3059 + ror x12,x7,#1
3060 + and x17,x23,x22
3061 + ror x11,x4,#19
3062 + bic x19,x24,x22
3063 + ror x13,x26,#28
3064 + add x25,x25,x5 // h+=X[i]
3065 + eor x16,x16,x22,ror#18
3066 + eor x12,x12,x7,ror#8
3067 + orr x17,x17,x19 // Ch(e,f,g)
3068 + eor x19,x26,x27 // a^b, b^c in next round
3069 + eor x16,x16,x22,ror#41 // Sigma1(e)
3070 + eor x13,x13,x26,ror#34
3071 + add x25,x25,x17 // h+=Ch(e,f,g)
3072 + and x28,x28,x19 // (b^c)&=(a^b)
3073 + eor x11,x11,x4,ror#61
3074 + eor x12,x12,x7,lsr#7 // sigma0(X[i+1])
3075 + add x25,x25,x16 // h+=Sigma1(e)
3076 + eor x28,x28,x27 // Maj(a,b,c)
3077 + eor x17,x13,x26,ror#39 // Sigma0(a)
3078 + eor x11,x11,x4,lsr#6 // sigma1(X[i+14])
3079 + add x6,x6,x15
3080 + add x21,x21,x25 // d+=h
3081 + add x25,x25,x28 // h+=Maj(a,b,c)
3082 + ldr x28,[x30],#8 // *K++, x19 in next round
3083 + add x6,x6,x12
3084 + add x25,x25,x17 // h+=Sigma0(a)
3085 + add x6,x6,x11
3086 + ldr x11,[sp,#0]
3087 + str x14,[sp,#24]
3088 + ror x16,x21,#14
3089 + add x24,x24,x28 // h+=K[i]
3090 + ror x13,x8,#1
3091 + and x17,x22,x21
3092 + ror x12,x5,#19
3093 + bic x28,x23,x21
3094 + ror x14,x25,#28
3095 + add x24,x24,x6 // h+=X[i]
3096 + eor x16,x16,x21,ror#18
3097 + eor x13,x13,x8,ror#8
3098 + orr x17,x17,x28 // Ch(e,f,g)
3099 + eor x28,x25,x26 // a^b, b^c in next round
3100 + eor x16,x16,x21,ror#41 // Sigma1(e)
3101 + eor x14,x14,x25,ror#34
3102 + add x24,x24,x17 // h+=Ch(e,f,g)
3103 + and x19,x19,x28 // (b^c)&=(a^b)
3104 + eor x12,x12,x5,ror#61
3105 + eor x13,x13,x8,lsr#7 // sigma0(X[i+1])
3106 + add x24,x24,x16 // h+=Sigma1(e)
3107 + eor x19,x19,x26 // Maj(a,b,c)
3108 + eor x17,x14,x25,ror#39 // Sigma0(a)
3109 + eor x12,x12,x5,lsr#6 // sigma1(X[i+14])
3110 + add x7,x7,x0
3111 + add x20,x20,x24 // d+=h
3112 + add x24,x24,x19 // h+=Maj(a,b,c)
3113 + ldr x19,[x30],#8 // *K++, x28 in next round
3114 + add x7,x7,x13
3115 + add x24,x24,x17 // h+=Sigma0(a)
3116 + add x7,x7,x12
3117 + ldr x12,[sp,#8]
3118 + str x15,[sp,#0]
3119 + ror x16,x20,#14
3120 + add x23,x23,x19 // h+=K[i]
3121 + ror x14,x9,#1
3122 + and x17,x21,x20
3123 + ror x13,x6,#19
3124 + bic x19,x22,x20
3125 + ror x15,x24,#28
3126 + add x23,x23,x7 // h+=X[i]
3127 + eor x16,x16,x20,ror#18
3128 + eor x14,x14,x9,ror#8
3129 + orr x17,x17,x19 // Ch(e,f,g)
3130 + eor x19,x24,x25 // a^b, b^c in next round
3131 + eor x16,x16,x20,ror#41 // Sigma1(e)
3132 + eor x15,x15,x24,ror#34
3133 + add x23,x23,x17 // h+=Ch(e,f,g)
3134 + and x28,x28,x19 // (b^c)&=(a^b)
3135 + eor x13,x13,x6,ror#61
3136 + eor x14,x14,x9,lsr#7 // sigma0(X[i+1])
3137 + add x23,x23,x16 // h+=Sigma1(e)
3138 + eor x28,x28,x25 // Maj(a,b,c)
3139 + eor x17,x15,x24,ror#39 // Sigma0(a)
3140 + eor x13,x13,x6,lsr#6 // sigma1(X[i+14])
3141 + add x8,x8,x1
3142 + add x27,x27,x23 // d+=h
3143 + add x23,x23,x28 // h+=Maj(a,b,c)
3144 + ldr x28,[x30],#8 // *K++, x19 in next round
3145 + add x8,x8,x14
3146 + add x23,x23,x17 // h+=Sigma0(a)
3147 + add x8,x8,x13
3148 + ldr x13,[sp,#16]
3149 + str x0,[sp,#8]
3150 + ror x16,x27,#14
3151 + add x22,x22,x28 // h+=K[i]
3152 + ror x15,x10,#1
3153 + and x17,x20,x27
3154 + ror x14,x7,#19
3155 + bic x28,x21,x27
3156 + ror x0,x23,#28
3157 + add x22,x22,x8 // h+=X[i]
3158 + eor x16,x16,x27,ror#18
3159 + eor x15,x15,x10,ror#8
3160 + orr x17,x17,x28 // Ch(e,f,g)
3161 + eor x28,x23,x24 // a^b, b^c in next round
3162 + eor x16,x16,x27,ror#41 // Sigma1(e)
3163 + eor x0,x0,x23,ror#34
3164 + add x22,x22,x17 // h+=Ch(e,f,g)
3165 + and x19,x19,x28 // (b^c)&=(a^b)
3166 + eor x14,x14,x7,ror#61
3167 + eor x15,x15,x10,lsr#7 // sigma0(X[i+1])
3168 + add x22,x22,x16 // h+=Sigma1(e)
3169 + eor x19,x19,x24 // Maj(a,b,c)
3170 + eor x17,x0,x23,ror#39 // Sigma0(a)
3171 + eor x14,x14,x7,lsr#6 // sigma1(X[i+14])
3172 + add x9,x9,x2
3173 + add x26,x26,x22 // d+=h
3174 + add x22,x22,x19 // h+=Maj(a,b,c)
3175 + ldr x19,[x30],#8 // *K++, x28 in next round
3176 + add x9,x9,x15
3177 + add x22,x22,x17 // h+=Sigma0(a)
3178 + add x9,x9,x14
3179 + ldr x14,[sp,#24]
3180 + str x1,[sp,#16]
3181 + ror x16,x26,#14
3182 + add x21,x21,x19 // h+=K[i]
3183 + ror x0,x11,#1
3184 + and x17,x27,x26
3185 + ror x15,x8,#19
3186 + bic x19,x20,x26
3187 + ror x1,x22,#28
3188 + add x21,x21,x9 // h+=X[i]
3189 + eor x16,x16,x26,ror#18
3190 + eor x0,x0,x11,ror#8
3191 + orr x17,x17,x19 // Ch(e,f,g)
3192 + eor x19,x22,x23 // a^b, b^c in next round
3193 + eor x16,x16,x26,ror#41 // Sigma1(e)
3194 + eor x1,x1,x22,ror#34
3195 + add x21,x21,x17 // h+=Ch(e,f,g)
3196 + and x28,x28,x19 // (b^c)&=(a^b)
3197 + eor x15,x15,x8,ror#61
3198 + eor x0,x0,x11,lsr#7 // sigma0(X[i+1])
3199 + add x21,x21,x16 // h+=Sigma1(e)
3200 + eor x28,x28,x23 // Maj(a,b,c)
3201 + eor x17,x1,x22,ror#39 // Sigma0(a)
3202 + eor x15,x15,x8,lsr#6 // sigma1(X[i+14])
3203 + add x10,x10,x3
3204 + add x25,x25,x21 // d+=h
3205 + add x21,x21,x28 // h+=Maj(a,b,c)
3206 + ldr x28,[x30],#8 // *K++, x19 in next round
3207 + add x10,x10,x0
3208 + add x21,x21,x17 // h+=Sigma0(a)
3209 + add x10,x10,x15
3210 + ldr x15,[sp,#0]
3211 + str x2,[sp,#24]
3212 + ror x16,x25,#14
3213 + add x20,x20,x28 // h+=K[i]
3214 + ror x1,x12,#1
3215 + and x17,x26,x25
3216 + ror x0,x9,#19
3217 + bic x28,x27,x25
3218 + ror x2,x21,#28
3219 + add x20,x20,x10 // h+=X[i]
3220 + eor x16,x16,x25,ror#18
3221 + eor x1,x1,x12,ror#8
3222 + orr x17,x17,x28 // Ch(e,f,g)
3223 + eor x28,x21,x22 // a^b, b^c in next round
3224 + eor x16,x16,x25,ror#41 // Sigma1(e)
3225 + eor x2,x2,x21,ror#34
3226 + add x20,x20,x17 // h+=Ch(e,f,g)
3227 + and x19,x19,x28 // (b^c)&=(a^b)
3228 + eor x0,x0,x9,ror#61
3229 + eor x1,x1,x12,lsr#7 // sigma0(X[i+1])
3230 + add x20,x20,x16 // h+=Sigma1(e)
3231 + eor x19,x19,x22 // Maj(a,b,c)
3232 + eor x17,x2,x21,ror#39 // Sigma0(a)
3233 + eor x0,x0,x9,lsr#6 // sigma1(X[i+14])
3234 + add x11,x11,x4
3235 + add x24,x24,x20 // d+=h
3236 + add x20,x20,x19 // h+=Maj(a,b,c)
3237 + ldr x19,[x30],#8 // *K++, x28 in next round
3238 + add x11,x11,x1
3239 + add x20,x20,x17 // h+=Sigma0(a)
3240 + add x11,x11,x0
3241 + ldr x0,[sp,#8]
3242 + str x3,[sp,#0]
3243 + ror x16,x24,#14
3244 + add x27,x27,x19 // h+=K[i]
3245 + ror x2,x13,#1
3246 + and x17,x25,x24
3247 + ror x1,x10,#19
3248 + bic x19,x26,x24
3249 + ror x3,x20,#28
3250 + add x27,x27,x11 // h+=X[i]
3251 + eor x16,x16,x24,ror#18
3252 + eor x2,x2,x13,ror#8
3253 + orr x17,x17,x19 // Ch(e,f,g)
3254 + eor x19,x20,x21 // a^b, b^c in next round
3255 + eor x16,x16,x24,ror#41 // Sigma1(e)
3256 + eor x3,x3,x20,ror#34
3257 + add x27,x27,x17 // h+=Ch(e,f,g)
3258 + and x28,x28,x19 // (b^c)&=(a^b)
3259 + eor x1,x1,x10,ror#61
3260 + eor x2,x2,x13,lsr#7 // sigma0(X[i+1])
3261 + add x27,x27,x16 // h+=Sigma1(e)
3262 + eor x28,x28,x21 // Maj(a,b,c)
3263 + eor x17,x3,x20,ror#39 // Sigma0(a)
3264 + eor x1,x1,x10,lsr#6 // sigma1(X[i+14])
3265 + add x12,x12,x5
3266 + add x23,x23,x27 // d+=h
3267 + add x27,x27,x28 // h+=Maj(a,b,c)
3268 + ldr x28,[x30],#8 // *K++, x19 in next round
3269 + add x12,x12,x2
3270 + add x27,x27,x17 // h+=Sigma0(a)
3271 + add x12,x12,x1
3272 + ldr x1,[sp,#16]
3273 + str x4,[sp,#8]
3274 + ror x16,x23,#14
3275 + add x26,x26,x28 // h+=K[i]
3276 + ror x3,x14,#1
3277 + and x17,x24,x23
3278 + ror x2,x11,#19
3279 + bic x28,x25,x23
3280 + ror x4,x27,#28
3281 + add x26,x26,x12 // h+=X[i]
3282 + eor x16,x16,x23,ror#18
3283 + eor x3,x3,x14,ror#8
3284 + orr x17,x17,x28 // Ch(e,f,g)
3285 + eor x28,x27,x20 // a^b, b^c in next round
3286 + eor x16,x16,x23,ror#41 // Sigma1(e)
3287 + eor x4,x4,x27,ror#34
3288 + add x26,x26,x17 // h+=Ch(e,f,g)
3289 + and x19,x19,x28 // (b^c)&=(a^b)
3290 + eor x2,x2,x11,ror#61
3291 + eor x3,x3,x14,lsr#7 // sigma0(X[i+1])
3292 + add x26,x26,x16 // h+=Sigma1(e)
3293 + eor x19,x19,x20 // Maj(a,b,c)
3294 + eor x17,x4,x27,ror#39 // Sigma0(a)
3295 + eor x2,x2,x11,lsr#6 // sigma1(X[i+14])
3296 + add x13,x13,x6
3297 + add x22,x22,x26 // d+=h
3298 + add x26,x26,x19 // h+=Maj(a,b,c)
3299 + ldr x19,[x30],#8 // *K++, x28 in next round
3300 + add x13,x13,x3
3301 + add x26,x26,x17 // h+=Sigma0(a)
3302 + add x13,x13,x2
3303 + ldr x2,[sp,#24]
3304 + str x5,[sp,#16]
3305 + ror x16,x22,#14
3306 + add x25,x25,x19 // h+=K[i]
3307 + ror x4,x15,#1
3308 + and x17,x23,x22
3309 + ror x3,x12,#19
3310 + bic x19,x24,x22
3311 + ror x5,x26,#28
3312 + add x25,x25,x13 // h+=X[i]
3313 + eor x16,x16,x22,ror#18
3314 + eor x4,x4,x15,ror#8
3315 + orr x17,x17,x19 // Ch(e,f,g)
3316 + eor x19,x26,x27 // a^b, b^c in next round
3317 + eor x16,x16,x22,ror#41 // Sigma1(e)
3318 + eor x5,x5,x26,ror#34
3319 + add x25,x25,x17 // h+=Ch(e,f,g)
3320 + and x28,x28,x19 // (b^c)&=(a^b)
3321 + eor x3,x3,x12,ror#61
3322 + eor x4,x4,x15,lsr#7 // sigma0(X[i+1])
3323 + add x25,x25,x16 // h+=Sigma1(e)
3324 + eor x28,x28,x27 // Maj(a,b,c)
3325 + eor x17,x5,x26,ror#39 // Sigma0(a)
3326 + eor x3,x3,x12,lsr#6 // sigma1(X[i+14])
3327 + add x14,x14,x7
3328 + add x21,x21,x25 // d+=h
3329 + add x25,x25,x28 // h+=Maj(a,b,c)
3330 + ldr x28,[x30],#8 // *K++, x19 in next round
3331 + add x14,x14,x4
3332 + add x25,x25,x17 // h+=Sigma0(a)
3333 + add x14,x14,x3
3334 + ldr x3,[sp,#0]
3335 + str x6,[sp,#24]
3336 + ror x16,x21,#14
3337 + add x24,x24,x28 // h+=K[i]
3338 + ror x5,x0,#1
3339 + and x17,x22,x21
3340 + ror x4,x13,#19
3341 + bic x28,x23,x21
3342 + ror x6,x25,#28
3343 + add x24,x24,x14 // h+=X[i]
3344 + eor x16,x16,x21,ror#18
3345 + eor x5,x5,x0,ror#8
3346 + orr x17,x17,x28 // Ch(e,f,g)
3347 + eor x28,x25,x26 // a^b, b^c in next round
3348 + eor x16,x16,x21,ror#41 // Sigma1(e)
3349 + eor x6,x6,x25,ror#34
3350 + add x24,x24,x17 // h+=Ch(e,f,g)
3351 + and x19,x19,x28 // (b^c)&=(a^b)
3352 + eor x4,x4,x13,ror#61
3353 + eor x5,x5,x0,lsr#7 // sigma0(X[i+1])
3354 + add x24,x24,x16 // h+=Sigma1(e)
3355 + eor x19,x19,x26 // Maj(a,b,c)
3356 + eor x17,x6,x25,ror#39 // Sigma0(a)
3357 + eor x4,x4,x13,lsr#6 // sigma1(X[i+14])
3358 + add x15,x15,x8
3359 + add x20,x20,x24 // d+=h
3360 + add x24,x24,x19 // h+=Maj(a,b,c)
3361 + ldr x19,[x30],#8 // *K++, x28 in next round
3362 + add x15,x15,x5
3363 + add x24,x24,x17 // h+=Sigma0(a)
3364 + add x15,x15,x4
3365 + ldr x4,[sp,#8]
3366 + str x7,[sp,#0]
3367 + ror x16,x20,#14
3368 + add x23,x23,x19 // h+=K[i]
3369 + ror x6,x1,#1
3370 + and x17,x21,x20
3371 + ror x5,x14,#19
3372 + bic x19,x22,x20
3373 + ror x7,x24,#28
3374 + add x23,x23,x15 // h+=X[i]
3375 + eor x16,x16,x20,ror#18
3376 + eor x6,x6,x1,ror#8
3377 + orr x17,x17,x19 // Ch(e,f,g)
3378 + eor x19,x24,x25 // a^b, b^c in next round
3379 + eor x16,x16,x20,ror#41 // Sigma1(e)
3380 + eor x7,x7,x24,ror#34
3381 + add x23,x23,x17 // h+=Ch(e,f,g)
3382 + and x28,x28,x19 // (b^c)&=(a^b)
3383 + eor x5,x5,x14,ror#61
3384 + eor x6,x6,x1,lsr#7 // sigma0(X[i+1])
3385 + add x23,x23,x16 // h+=Sigma1(e)
3386 + eor x28,x28,x25 // Maj(a,b,c)
3387 + eor x17,x7,x24,ror#39 // Sigma0(a)
3388 + eor x5,x5,x14,lsr#6 // sigma1(X[i+14])
3389 + add x0,x0,x9
3390 + add x27,x27,x23 // d+=h
3391 + add x23,x23,x28 // h+=Maj(a,b,c)
3392 + ldr x28,[x30],#8 // *K++, x19 in next round
3393 + add x0,x0,x6
3394 + add x23,x23,x17 // h+=Sigma0(a)
3395 + add x0,x0,x5
3396 + ldr x5,[sp,#16]
3397 + str x8,[sp,#8]
3398 + ror x16,x27,#14
3399 + add x22,x22,x28 // h+=K[i]
3400 + ror x7,x2,#1
3401 + and x17,x20,x27
3402 + ror x6,x15,#19
3403 + bic x28,x21,x27
3404 + ror x8,x23,#28
3405 + add x22,x22,x0 // h+=X[i]
3406 + eor x16,x16,x27,ror#18
3407 + eor x7,x7,x2,ror#8
3408 + orr x17,x17,x28 // Ch(e,f,g)
3409 + eor x28,x23,x24 // a^b, b^c in next round
3410 + eor x16,x16,x27,ror#41 // Sigma1(e)
3411 + eor x8,x8,x23,ror#34
3412 + add x22,x22,x17 // h+=Ch(e,f,g)
3413 + and x19,x19,x28 // (b^c)&=(a^b)
3414 + eor x6,x6,x15,ror#61
3415 + eor x7,x7,x2,lsr#7 // sigma0(X[i+1])
3416 + add x22,x22,x16 // h+=Sigma1(e)
3417 + eor x19,x19,x24 // Maj(a,b,c)
3418 + eor x17,x8,x23,ror#39 // Sigma0(a)
3419 + eor x6,x6,x15,lsr#6 // sigma1(X[i+14])
3420 + add x1,x1,x10
3421 + add x26,x26,x22 // d+=h
3422 + add x22,x22,x19 // h+=Maj(a,b,c)
3423 + ldr x19,[x30],#8 // *K++, x28 in next round
3424 + add x1,x1,x7
3425 + add x22,x22,x17 // h+=Sigma0(a)
3426 + add x1,x1,x6
3427 + ldr x6,[sp,#24]
3428 + str x9,[sp,#16]
3429 + ror x16,x26,#14
3430 + add x21,x21,x19 // h+=K[i]
3431 + ror x8,x3,#1
3432 + and x17,x27,x26
3433 + ror x7,x0,#19
3434 + bic x19,x20,x26
3435 + ror x9,x22,#28
3436 + add x21,x21,x1 // h+=X[i]
3437 + eor x16,x16,x26,ror#18
3438 + eor x8,x8,x3,ror#8
3439 + orr x17,x17,x19 // Ch(e,f,g)
3440 + eor x19,x22,x23 // a^b, b^c in next round
3441 + eor x16,x16,x26,ror#41 // Sigma1(e)
3442 + eor x9,x9,x22,ror#34
3443 + add x21,x21,x17 // h+=Ch(e,f,g)
3444 + and x28,x28,x19 // (b^c)&=(a^b)
3445 + eor x7,x7,x0,ror#61
3446 + eor x8,x8,x3,lsr#7 // sigma0(X[i+1])
3447 + add x21,x21,x16 // h+=Sigma1(e)
3448 + eor x28,x28,x23 // Maj(a,b,c)
3449 + eor x17,x9,x22,ror#39 // Sigma0(a)
3450 + eor x7,x7,x0,lsr#6 // sigma1(X[i+14])
3451 + add x2,x2,x11
3452 + add x25,x25,x21 // d+=h
3453 + add x21,x21,x28 // h+=Maj(a,b,c)
3454 + ldr x28,[x30],#8 // *K++, x19 in next round
3455 + add x2,x2,x8
3456 + add x21,x21,x17 // h+=Sigma0(a)
3457 + add x2,x2,x7
3458 + ldr x7,[sp,#0]
3459 + str x10,[sp,#24]
3460 + ror x16,x25,#14
3461 + add x20,x20,x28 // h+=K[i]
3462 + ror x9,x4,#1
3463 + and x17,x26,x25
3464 + ror x8,x1,#19
3465 + bic x28,x27,x25
3466 + ror x10,x21,#28
3467 + add x20,x20,x2 // h+=X[i]
3468 + eor x16,x16,x25,ror#18
3469 + eor x9,x9,x4,ror#8
3470 + orr x17,x17,x28 // Ch(e,f,g)
3471 + eor x28,x21,x22 // a^b, b^c in next round
3472 + eor x16,x16,x25,ror#41 // Sigma1(e)
3473 + eor x10,x10,x21,ror#34
3474 + add x20,x20,x17 // h+=Ch(e,f,g)
3475 + and x19,x19,x28 // (b^c)&=(a^b)
3476 + eor x8,x8,x1,ror#61
3477 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
3478 + add x20,x20,x16 // h+=Sigma1(e)
3479 + eor x19,x19,x22 // Maj(a,b,c)
3480 + eor x17,x10,x21,ror#39 // Sigma0(a)
3481 + eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
3482 + add x3,x3,x12
3483 + add x24,x24,x20 // d+=h
3484 + add x20,x20,x19 // h+=Maj(a,b,c)
3485 + ldr x19,[x30],#8 // *K++, x28 in next round
3486 + add x3,x3,x9
3487 + add x20,x20,x17 // h+=Sigma0(a)
3488 + add x3,x3,x8
3489 + cbnz x19,.Loop_16_xx
3490 +
3491 + ldp x0,x2,[x29,#96]
3492 + ldr x1,[x29,#112]
3493 + sub x30,x30,#648 // rewind
3494 +
3495 + ldp x3,x4,[x0]
3496 + ldp x5,x6,[x0,#2*8]
3497 + add x1,x1,#14*8 // advance input pointer
3498 + ldp x7,x8,[x0,#4*8]
3499 + add x20,x20,x3
3500 + ldp x9,x10,[x0,#6*8]
3501 + add x21,x21,x4
3502 + add x22,x22,x5
3503 + add x23,x23,x6
3504 + stp x20,x21,[x0]
3505 + add x24,x24,x7
3506 + add x25,x25,x8
3507 + stp x22,x23,[x0,#2*8]
3508 + add x26,x26,x9
3509 + add x27,x27,x10
3510 + cmp x1,x2
3511 + stp x24,x25,[x0,#4*8]
3512 + stp x26,x27,[x0,#6*8]
3513 + b.ne .Loop
3514 +
3515 + ldp x19,x20,[x29,#16]
3516 + add sp,sp,#4*8
3517 + ldp x21,x22,[x29,#32]
3518 + ldp x23,x24,[x29,#48]
3519 + ldp x25,x26,[x29,#64]
3520 + ldp x27,x28,[x29,#80]
3521 + ldp x29,x30,[sp],#128
3522 + ret
3523 +.size sha512_block_data_order,.-sha512_block_data_order
3524 +
3525 +.align 6
3526 +.type .LK512,%object
3527 +.LK512:
3528 + .quad 0x428a2f98d728ae22,0x7137449123ef65cd
3529 + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
3530 + .quad 0x3956c25bf348b538,0x59f111f1b605d019
3531 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
3532 + .quad 0xd807aa98a3030242,0x12835b0145706fbe
3533 + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
3534 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
3535 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694
3536 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
3537 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
3538 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
3539 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
3540 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210
3541 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
3542 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
3543 + .quad 0x06ca6351e003826f,0x142929670a0e6e70
3544 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
3545 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
3546 + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
3547 + .quad 0x81c2c92e47edaee6,0x92722c851482353b
3548 + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
3549 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
3550 + .quad 0xd192e819d6ef5218,0xd69906245565a910
3551 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8
3552 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
3553 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
3554 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
3555 + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
3556 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
3557 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
3558 + .quad 0x90befffa23631e28,0xa4506cebde82bde9
3559 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
3560 + .quad 0xca273eceea26619c,0xd186b8c721c0c207
3561 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
3562 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
3563 + .quad 0x113f9804bef90dae,0x1b710b35131c471b
3564 + .quad 0x28db77f523047d84,0x32caab7b40c72493
3565 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
3566 + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
3567 + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
3568 + .quad 0 // terminator
3569 +.size .LK512,.-.LK512
3570 +#ifndef __KERNEL__
3571 +.align 3
3572 +.LOPENSSL_armcap_P:
3573 +# ifdef __ILP32__
3574 + .long OPENSSL_armcap_P-.
3575 +# else
3576 + .quad OPENSSL_armcap_P-.
3577 +# endif
3578 +#endif
3579 +.asciz "SHA512 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
3580 +.align 2
3581 +#ifndef __KERNEL__
3582 +.comm OPENSSL_armcap_P,4,4
3583 +#endif
3584 diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
3585 index 7193bf97b8da..e60375ce0dd2 100644
3586 --- a/arch/arm64/include/asm/assembler.h
3587 +++ b/arch/arm64/include/asm/assembler.h
3588 @@ -86,6 +86,24 @@
3589 dmb \opt
3590 .endm
3591
3592 +/*
3593 + * Value prediction barrier
3594 + */
3595 + .macro csdb
3596 + hint #20
3597 + .endm
3598 +
3599 +/*
3600 + * Sanitise a 64-bit bounded index wrt speculation, returning zero if out
3601 + * of bounds.
3602 + */
3603 + .macro mask_nospec64, idx, limit, tmp
3604 + sub \tmp, \idx, \limit
3605 + bic \tmp, \tmp, \idx
3606 + and \idx, \idx, \tmp, asr #63
3607 + csdb
3608 + .endm
3609 +
3610 /*
3611 * NOP sequence
3612 */
3613 @@ -416,4 +434,5 @@ alternative_endif
3614 .macro pte_to_phys, phys, pte
3615 and \phys, \pte, #(((1 << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
3616 .endm
3617 +
3618 #endif /* __ASM_ASSEMBLER_H */
3619 diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
3620 index 0fe7e43b7fbc..0b0755c961ac 100644
3621 --- a/arch/arm64/include/asm/barrier.h
3622 +++ b/arch/arm64/include/asm/barrier.h
3623 @@ -31,6 +31,8 @@
3624 #define dmb(opt) asm volatile("dmb " #opt : : : "memory")
3625 #define dsb(opt) asm volatile("dsb " #opt : : : "memory")
3626
3627 +#define csdb() asm volatile("hint #20" : : : "memory")
3628 +
3629 #define mb() dsb(sy)
3630 #define rmb() dsb(ld)
3631 #define wmb() dsb(st)
3632 @@ -38,6 +40,27 @@
3633 #define dma_rmb() dmb(oshld)
3634 #define dma_wmb() dmb(oshst)
3635
3636 +/*
3637 + * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz
3638 + * and 0 otherwise.
3639 + */
3640 +#define array_index_mask_nospec array_index_mask_nospec
3641 +static inline unsigned long array_index_mask_nospec(unsigned long idx,
3642 + unsigned long sz)
3643 +{
3644 + unsigned long mask;
3645 +
3646 + asm volatile(
3647 + " cmp %1, %2\n"
3648 + " sbc %0, xzr, xzr\n"
3649 + : "=r" (mask)
3650 + : "r" (idx), "Ir" (sz)
3651 + : "cc");
3652 +
3653 + csdb();
3654 + return mask;
3655 +}
3656 +
3657 #define __smp_mb() dmb(ish)
3658 #define __smp_rmb() dmb(ishld)
3659 #define __smp_wmb() dmb(ishst)
3660 diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
3661 index 7ddf233f05bd..ce67bf6a0886 100644
3662 --- a/arch/arm64/include/asm/cpucaps.h
3663 +++ b/arch/arm64/include/asm/cpucaps.h
3664 @@ -35,7 +35,8 @@
3665 #define ARM64_HYP_OFFSET_LOW 14
3666 #define ARM64_MISMATCHED_CACHE_LINE_SIZE 15
3667 #define ARM64_UNMAP_KERNEL_AT_EL0 16
3668 +#define ARM64_HARDEN_BRANCH_PREDICTOR 17
3669
3670 -#define ARM64_NCAPS 17
3671 +#define ARM64_NCAPS 18
3672
3673 #endif /* __ASM_CPUCAPS_H */
3674 diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
3675 index 1d47930c30dc..9ee3038a6b98 100644
3676 --- a/arch/arm64/include/asm/cputype.h
3677 +++ b/arch/arm64/include/asm/cputype.h
3678 @@ -75,7 +75,10 @@
3679 #define ARM_CPU_PART_AEM_V8 0xD0F
3680 #define ARM_CPU_PART_FOUNDATION 0xD00
3681 #define ARM_CPU_PART_CORTEX_A57 0xD07
3682 +#define ARM_CPU_PART_CORTEX_A72 0xD08
3683 #define ARM_CPU_PART_CORTEX_A53 0xD03
3684 +#define ARM_CPU_PART_CORTEX_A73 0xD09
3685 +#define ARM_CPU_PART_CORTEX_A75 0xD0A
3686
3687 #define APM_CPU_PART_POTENZA 0x000
3688
3689 @@ -87,6 +90,9 @@
3690
3691 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
3692 #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
3693 +#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
3694 +#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73)
3695 +#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75)
3696 #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
3697 #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
3698 #define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2)
3699 diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
3700 index 20dcb196b240..4e5f36a804b4 100644
3701 --- a/arch/arm64/include/asm/futex.h
3702 +++ b/arch/arm64/include/asm/futex.h
3703 @@ -51,13 +51,14 @@
3704 : "memory")
3705
3706 static inline int
3707 -futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
3708 +futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *_uaddr)
3709 {
3710 int op = (encoded_op >> 28) & 7;
3711 int cmp = (encoded_op >> 24) & 15;
3712 int oparg = (int)(encoded_op << 8) >> 20;
3713 int cmparg = (int)(encoded_op << 20) >> 20;
3714 int oldval = 0, ret, tmp;
3715 + u32 __user *uaddr = __uaccess_mask_ptr(_uaddr);
3716
3717 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
3718 oparg = 1U << (oparg & 0x1f);
3719 @@ -109,15 +110,17 @@ futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
3720 }
3721
3722 static inline int
3723 -futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
3724 +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
3725 u32 oldval, u32 newval)
3726 {
3727 int ret = 0;
3728 u32 val, tmp;
3729 + u32 __user *uaddr;
3730
3731 - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
3732 + if (!access_ok(VERIFY_WRITE, _uaddr, sizeof(u32)))
3733 return -EFAULT;
3734
3735 + uaddr = __uaccess_mask_ptr(_uaddr);
3736 asm volatile("// futex_atomic_cmpxchg_inatomic\n"
3737 ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
3738 " prfm pstl1strm, %2\n"
3739 diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
3740 index e5050388e062..37d56e85036e 100644
3741 --- a/arch/arm64/include/asm/kvm_host.h
3742 +++ b/arch/arm64/include/asm/kvm_host.h
3743 @@ -393,4 +393,9 @@ static inline void __cpu_init_stage2(void)
3744 "PARange is %d bits, unsupported configuration!", parange);
3745 }
3746
3747 +static inline bool kvm_arm_harden_branch_predictor(void)
3748 +{
3749 + return cpus_have_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
3750 +}
3751 +
3752 #endif /* __ARM64_KVM_HOST_H__ */
3753 diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
3754 index 6d22017ebbad..80bf33715ecb 100644
3755 --- a/arch/arm64/include/asm/kvm_mmu.h
3756 +++ b/arch/arm64/include/asm/kvm_mmu.h
3757 @@ -313,5 +313,43 @@ static inline unsigned int kvm_get_vmid_bits(void)
3758 return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
3759 }
3760
3761 +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
3762 +#include <asm/mmu.h>
3763 +
3764 +static inline void *kvm_get_hyp_vector(void)
3765 +{
3766 + struct bp_hardening_data *data = arm64_get_bp_hardening_data();
3767 + void *vect = kvm_ksym_ref(__kvm_hyp_vector);
3768 +
3769 + if (data->fn) {
3770 + vect = __bp_harden_hyp_vecs_start +
3771 + data->hyp_vectors_slot * SZ_2K;
3772 +
3773 + if (!cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3774 + vect = lm_alias(vect);
3775 + }
3776 +
3777 + return vect;
3778 +}
3779 +
3780 +static inline int kvm_map_vectors(void)
3781 +{
3782 + return create_hyp_mappings(kvm_ksym_ref(__bp_harden_hyp_vecs_start),
3783 + kvm_ksym_ref(__bp_harden_hyp_vecs_end),
3784 + PAGE_HYP_EXEC);
3785 +}
3786 +
3787 +#else
3788 +static inline void *kvm_get_hyp_vector(void)
3789 +{
3790 + return kvm_ksym_ref(__kvm_hyp_vector);
3791 +}
3792 +
3793 +static inline int kvm_map_vectors(void)
3794 +{
3795 + return 0;
3796 +}
3797 +#endif
3798 +
3799 #endif /* __ASSEMBLY__ */
3800 #endif /* __ARM64_KVM_MMU_H__ */
3801 diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
3802 deleted file mode 100644
3803 index bc39e557c56c..000000000000
3804 --- a/arch/arm64/include/asm/kvm_psci.h
3805 +++ /dev/null
3806 @@ -1,27 +0,0 @@
3807 -/*
3808 - * Copyright (C) 2012,2013 - ARM Ltd
3809 - * Author: Marc Zyngier <marc.zyngier@arm.com>
3810 - *
3811 - * This program is free software; you can redistribute it and/or modify
3812 - * it under the terms of the GNU General Public License version 2 as
3813 - * published by the Free Software Foundation.
3814 - *
3815 - * This program is distributed in the hope that it will be useful,
3816 - * but WITHOUT ANY WARRANTY; without even the implied warranty of
3817 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3818 - * GNU General Public License for more details.
3819 - *
3820 - * You should have received a copy of the GNU General Public License
3821 - * along with this program. If not, see <http://www.gnu.org/licenses/>.
3822 - */
3823 -
3824 -#ifndef __ARM64_KVM_PSCI_H__
3825 -#define __ARM64_KVM_PSCI_H__
3826 -
3827 -#define KVM_ARM_PSCI_0_1 1
3828 -#define KVM_ARM_PSCI_0_2 2
3829 -
3830 -int kvm_psci_version(struct kvm_vcpu *vcpu);
3831 -int kvm_psci_call(struct kvm_vcpu *vcpu);
3832 -
3833 -#endif /* __ARM64_KVM_PSCI_H__ */
3834 diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
3835 index 5e3faba689e0..ba917be5565a 100644
3836 --- a/arch/arm64/include/asm/memory.h
3837 +++ b/arch/arm64/include/asm/memory.h
3838 @@ -60,8 +60,6 @@
3839 * KIMAGE_VADDR - the virtual address of the start of the kernel image
3840 * VA_BITS - the maximum number of bits for virtual addresses.
3841 * VA_START - the first kernel virtual address.
3842 - * TASK_SIZE - the maximum size of a user space task.
3843 - * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
3844 */
3845 #define VA_BITS (CONFIG_ARM64_VA_BITS)
3846 #define VA_START (UL(0xffffffffffffffff) - \
3847 @@ -76,19 +74,6 @@
3848 #define PCI_IO_END (VMEMMAP_START - SZ_2M)
3849 #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
3850 #define FIXADDR_TOP (PCI_IO_START - SZ_2M)
3851 -#define TASK_SIZE_64 (UL(1) << VA_BITS)
3852 -
3853 -#ifdef CONFIG_COMPAT
3854 -#define TASK_SIZE_32 UL(0x100000000)
3855 -#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \
3856 - TASK_SIZE_32 : TASK_SIZE_64)
3857 -#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
3858 - TASK_SIZE_32 : TASK_SIZE_64)
3859 -#else
3860 -#define TASK_SIZE TASK_SIZE_64
3861 -#endif /* CONFIG_COMPAT */
3862 -
3863 -#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4))
3864
3865 #define KERNEL_START _text
3866 #define KERNEL_END _end
3867 diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
3868 index a813edf28737..d51158a61892 100644
3869 --- a/arch/arm64/include/asm/mmu.h
3870 +++ b/arch/arm64/include/asm/mmu.h
3871 @@ -20,6 +20,8 @@
3872
3873 #ifndef __ASSEMBLY__
3874
3875 +#include <linux/percpu.h>
3876 +
3877 typedef struct {
3878 atomic64_t id;
3879 void *vdso;
3880 @@ -38,6 +40,43 @@ static inline bool arm64_kernel_unmapped_at_el0(void)
3881 cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0);
3882 }
3883
3884 +typedef void (*bp_hardening_cb_t)(void);
3885 +
3886 +struct bp_hardening_data {
3887 + int hyp_vectors_slot;
3888 + bp_hardening_cb_t fn;
3889 +};
3890 +
3891 +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
3892 +extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[];
3893 +
3894 +DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
3895 +
3896 +static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
3897 +{
3898 + return this_cpu_ptr(&bp_hardening_data);
3899 +}
3900 +
3901 +static inline void arm64_apply_bp_hardening(void)
3902 +{
3903 + struct bp_hardening_data *d;
3904 +
3905 + if (!cpus_have_cap(ARM64_HARDEN_BRANCH_PREDICTOR))
3906 + return;
3907 +
3908 + d = arm64_get_bp_hardening_data();
3909 + if (d->fn)
3910 + d->fn();
3911 +}
3912 +#else
3913 +static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
3914 +{
3915 + return NULL;
3916 +}
3917 +
3918 +static inline void arm64_apply_bp_hardening(void) { }
3919 +#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */
3920 +
3921 extern void paging_init(void);
3922 extern void bootmem_init(void);
3923 extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
3924 diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
3925 index 60e34824e18c..5917147af0c4 100644
3926 --- a/arch/arm64/include/asm/processor.h
3927 +++ b/arch/arm64/include/asm/processor.h
3928 @@ -19,6 +19,13 @@
3929 #ifndef __ASM_PROCESSOR_H
3930 #define __ASM_PROCESSOR_H
3931
3932 +#define TASK_SIZE_64 (UL(1) << VA_BITS)
3933 +
3934 +#define KERNEL_DS UL(-1)
3935 +#define USER_DS (TASK_SIZE_64 - 1)
3936 +
3937 +#ifndef __ASSEMBLY__
3938 +
3939 /*
3940 * Default implementation of macro that returns current
3941 * instruction pointer ("program counter").
3942 @@ -37,6 +44,22 @@
3943 #include <asm/ptrace.h>
3944 #include <asm/types.h>
3945
3946 +/*
3947 + * TASK_SIZE - the maximum size of a user space task.
3948 + * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
3949 + */
3950 +#ifdef CONFIG_COMPAT
3951 +#define TASK_SIZE_32 UL(0x100000000)
3952 +#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \
3953 + TASK_SIZE_32 : TASK_SIZE_64)
3954 +#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
3955 + TASK_SIZE_32 : TASK_SIZE_64)
3956 +#else
3957 +#define TASK_SIZE TASK_SIZE_64
3958 +#endif /* CONFIG_COMPAT */
3959 +
3960 +#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4))
3961 +
3962 #define STACK_TOP_MAX TASK_SIZE_64
3963 #ifdef CONFIG_COMPAT
3964 #define AARCH32_VECTORS_BASE 0xffff0000
3965 @@ -192,4 +215,5 @@ int cpu_enable_pan(void *__unused);
3966 int cpu_enable_uao(void *__unused);
3967 int cpu_enable_cache_maint_trap(void *__unused);
3968
3969 +#endif /* __ASSEMBLY__ */
3970 #endif /* __ASM_PROCESSOR_H */
3971 diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
3972 index 7cb7f7cdcfbc..88bbe364b6ae 100644
3973 --- a/arch/arm64/include/asm/sysreg.h
3974 +++ b/arch/arm64/include/asm/sysreg.h
3975 @@ -118,6 +118,8 @@
3976
3977 /* id_aa64pfr0 */
3978 #define ID_AA64PFR0_CSV3_SHIFT 60
3979 +#define ID_AA64PFR0_CSV2_SHIFT 56
3980 +#define ID_AA64PFR0_SVE_SHIFT 32
3981 #define ID_AA64PFR0_GIC_SHIFT 24
3982 #define ID_AA64PFR0_ASIMD_SHIFT 20
3983 #define ID_AA64PFR0_FP_SHIFT 16
3984 diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
3985 index 811cf16a65f9..1d047d6c421b 100644
3986 --- a/arch/arm64/include/asm/uaccess.h
3987 +++ b/arch/arm64/include/asm/uaccess.h
3988 @@ -28,6 +28,7 @@
3989
3990 #include <asm/alternative.h>
3991 #include <asm/cpufeature.h>
3992 +#include <asm/processor.h>
3993 #include <asm/ptrace.h>
3994 #include <asm/sysreg.h>
3995 #include <asm/errno.h>
3996 @@ -59,16 +60,20 @@ struct exception_table_entry
3997
3998 extern int fixup_exception(struct pt_regs *regs);
3999
4000 -#define KERNEL_DS (-1UL)
4001 #define get_ds() (KERNEL_DS)
4002 -
4003 -#define USER_DS TASK_SIZE_64
4004 #define get_fs() (current_thread_info()->addr_limit)
4005
4006 static inline void set_fs(mm_segment_t fs)
4007 {
4008 current_thread_info()->addr_limit = fs;
4009
4010 + /*
4011 + * Prevent a mispredicted conditional call to set_fs from forwarding
4012 + * the wrong address limit to access_ok under speculation.
4013 + */
4014 + dsb(nsh);
4015 + isb();
4016 +
4017 /*
4018 * Enable/disable UAO so that copy_to_user() etc can access
4019 * kernel memory with the unprivileged instructions.
4020 @@ -87,22 +92,32 @@ static inline void set_fs(mm_segment_t fs)
4021 * Returns 1 if the range is valid, 0 otherwise.
4022 *
4023 * This is equivalent to the following test:
4024 - * (u65)addr + (u65)size <= current->addr_limit
4025 - *
4026 - * This needs 65-bit arithmetic.
4027 + * (u65)addr + (u65)size <= (u65)current->addr_limit + 1
4028 */
4029 -#define __range_ok(addr, size) \
4030 -({ \
4031 - unsigned long __addr = (unsigned long __force)(addr); \
4032 - unsigned long flag, roksum; \
4033 - __chk_user_ptr(addr); \
4034 - asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls" \
4035 - : "=&r" (flag), "=&r" (roksum) \
4036 - : "1" (__addr), "Ir" (size), \
4037 - "r" (current_thread_info()->addr_limit) \
4038 - : "cc"); \
4039 - flag; \
4040 -})
4041 +static inline unsigned long __range_ok(unsigned long addr, unsigned long size)
4042 +{
4043 + unsigned long limit = current_thread_info()->addr_limit;
4044 +
4045 + __chk_user_ptr(addr);
4046 + asm volatile(
4047 + // A + B <= C + 1 for all A,B,C, in four easy steps:
4048 + // 1: X = A + B; X' = X % 2^64
4049 + " adds %0, %0, %2\n"
4050 + // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4
4051 + " csel %1, xzr, %1, hi\n"
4052 + // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X'
4053 + // to compensate for the carry flag being set in step 4. For
4054 + // X > 2^64, X' merely has to remain nonzero, which it does.
4055 + " csinv %0, %0, xzr, cc\n"
4056 + // 4: For X < 2^64, this gives us X' - C - 1 <= 0, where the -1
4057 + // comes from the carry in being clear. Otherwise, we are
4058 + // testing X' - C == 0, subject to the previous adjustments.
4059 + " sbcs xzr, %0, %1\n"
4060 + " cset %0, ls\n"
4061 + : "+r" (addr), "+r" (limit) : "Ir" (size) : "cc");
4062 +
4063 + return addr;
4064 +}
4065
4066 /*
4067 * When dealing with data aborts, watchpoints, or instruction traps we may end
4068 @@ -111,7 +126,7 @@ static inline void set_fs(mm_segment_t fs)
4069 */
4070 #define untagged_addr(addr) sign_extend64(addr, 55)
4071
4072 -#define access_ok(type, addr, size) __range_ok(addr, size)
4073 +#define access_ok(type, addr, size) __range_ok((unsigned long)(addr), size)
4074 #define user_addr_max get_fs
4075
4076 #define _ASM_EXTABLE(from, to) \
4077 @@ -120,6 +135,26 @@ static inline void set_fs(mm_segment_t fs)
4078 " .long (" #from " - .), (" #to " - .)\n" \
4079 " .popsection\n"
4080
4081 +/*
4082 + * Sanitise a uaccess pointer such that it becomes NULL if above the
4083 + * current addr_limit.
4084 + */
4085 +#define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr)
4086 +static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
4087 +{
4088 + void __user *safe_ptr;
4089 +
4090 + asm volatile(
4091 + " bics xzr, %1, %2\n"
4092 + " csel %0, %1, xzr, eq\n"
4093 + : "=&r" (safe_ptr)
4094 + : "r" (ptr), "r" (current_thread_info()->addr_limit)
4095 + : "cc");
4096 +
4097 + csdb();
4098 + return safe_ptr;
4099 +}
4100 +
4101 /*
4102 * The "__xxx" versions of the user access functions do not verify the address
4103 * space - it must have been done previously with a separate "access_ok()"
4104 @@ -174,30 +209,35 @@ do { \
4105 CONFIG_ARM64_PAN)); \
4106 } while (0)
4107
4108 -#define __get_user(x, ptr) \
4109 +#define __get_user_check(x, ptr, err) \
4110 ({ \
4111 - int __gu_err = 0; \
4112 - __get_user_err((x), (ptr), __gu_err); \
4113 - __gu_err; \
4114 + __typeof__(*(ptr)) __user *__p = (ptr); \
4115 + might_fault(); \
4116 + if (access_ok(VERIFY_READ, __p, sizeof(*__p))) { \
4117 + __p = uaccess_mask_ptr(__p); \
4118 + __get_user_err((x), __p, (err)); \
4119 + } else { \
4120 + (x) = 0; (err) = -EFAULT; \
4121 + } \
4122 })
4123
4124 #define __get_user_error(x, ptr, err) \
4125 ({ \
4126 - __get_user_err((x), (ptr), (err)); \
4127 + __get_user_check((x), (ptr), (err)); \
4128 (void)0; \
4129 })
4130
4131 -#define __get_user_unaligned __get_user
4132 -
4133 -#define get_user(x, ptr) \
4134 +#define __get_user(x, ptr) \
4135 ({ \
4136 - __typeof__(*(ptr)) __user *__p = (ptr); \
4137 - might_fault(); \
4138 - access_ok(VERIFY_READ, __p, sizeof(*__p)) ? \
4139 - __get_user((x), __p) : \
4140 - ((x) = 0, -EFAULT); \
4141 + int __gu_err = 0; \
4142 + __get_user_check((x), (ptr), __gu_err); \
4143 + __gu_err; \
4144 })
4145
4146 +#define __get_user_unaligned __get_user
4147 +
4148 +#define get_user __get_user
4149 +
4150 #define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature) \
4151 asm volatile( \
4152 "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \
4153 @@ -242,47 +282,51 @@ do { \
4154 CONFIG_ARM64_PAN)); \
4155 } while (0)
4156
4157 -#define __put_user(x, ptr) \
4158 +#define __put_user_check(x, ptr, err) \
4159 ({ \
4160 - int __pu_err = 0; \
4161 - __put_user_err((x), (ptr), __pu_err); \
4162 - __pu_err; \
4163 + __typeof__(*(ptr)) __user *__p = (ptr); \
4164 + might_fault(); \
4165 + if (access_ok(VERIFY_WRITE, __p, sizeof(*__p))) { \
4166 + __p = uaccess_mask_ptr(__p); \
4167 + __put_user_err((x), __p, (err)); \
4168 + } else { \
4169 + (err) = -EFAULT; \
4170 + } \
4171 })
4172
4173 #define __put_user_error(x, ptr, err) \
4174 ({ \
4175 - __put_user_err((x), (ptr), (err)); \
4176 + __put_user_check((x), (ptr), (err)); \
4177 (void)0; \
4178 })
4179
4180 -#define __put_user_unaligned __put_user
4181 -
4182 -#define put_user(x, ptr) \
4183 +#define __put_user(x, ptr) \
4184 ({ \
4185 - __typeof__(*(ptr)) __user *__p = (ptr); \
4186 - might_fault(); \
4187 - access_ok(VERIFY_WRITE, __p, sizeof(*__p)) ? \
4188 - __put_user((x), __p) : \
4189 - -EFAULT; \
4190 + int __pu_err = 0; \
4191 + __put_user_check((x), (ptr), __pu_err); \
4192 + __pu_err; \
4193 })
4194
4195 +#define __put_user_unaligned __put_user
4196 +
4197 +#define put_user __put_user
4198 +
4199 extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
4200 extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n);
4201 -extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n);
4202 -extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n);
4203 +extern unsigned long __must_check __arch_copy_in_user(void __user *to, const void __user *from, unsigned long n);
4204
4205 static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n)
4206 {
4207 kasan_check_write(to, n);
4208 check_object_size(to, n, false);
4209 - return __arch_copy_from_user(to, from, n);
4210 + return __arch_copy_from_user(to, __uaccess_mask_ptr(from), n);
4211 }
4212
4213 static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n)
4214 {
4215 kasan_check_read(from, n);
4216 check_object_size(from, n, true);
4217 - return __arch_copy_to_user(to, from, n);
4218 + return __arch_copy_to_user(__uaccess_mask_ptr(to), from, n);
4219 }
4220
4221 static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
4222 @@ -310,22 +354,25 @@ static inline unsigned long __must_check copy_to_user(void __user *to, const voi
4223 return n;
4224 }
4225
4226 -static inline unsigned long __must_check copy_in_user(void __user *to, const void __user *from, unsigned long n)
4227 +static inline unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n)
4228 {
4229 if (access_ok(VERIFY_READ, from, n) && access_ok(VERIFY_WRITE, to, n))
4230 - n = __copy_in_user(to, from, n);
4231 + n = __arch_copy_in_user(__uaccess_mask_ptr(to), __uaccess_mask_ptr(from), n);
4232 return n;
4233 }
4234 +#define copy_in_user __copy_in_user
4235
4236 #define __copy_to_user_inatomic __copy_to_user
4237 #define __copy_from_user_inatomic __copy_from_user
4238
4239 -static inline unsigned long __must_check clear_user(void __user *to, unsigned long n)
4240 +extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned long n);
4241 +static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n)
4242 {
4243 if (access_ok(VERIFY_WRITE, to, n))
4244 - n = __clear_user(to, n);
4245 + n = __arch_clear_user(__uaccess_mask_ptr(to), n);
4246 return n;
4247 }
4248 +#define clear_user __clear_user
4249
4250 extern long strncpy_from_user(char *dest, const char __user *src, long count);
4251
4252 diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
4253 index 7d66bbaafc0c..74b8fd860714 100644
4254 --- a/arch/arm64/kernel/Makefile
4255 +++ b/arch/arm64/kernel/Makefile
4256 @@ -51,6 +51,10 @@ arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
4257 arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \
4258 cpu-reset.o
4259
4260 +ifeq ($(CONFIG_KVM),y)
4261 +arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o
4262 +endif
4263 +
4264 obj-y += $(arm64-obj-y) vdso/ probes/
4265 obj-m += $(arm64-obj-m)
4266 head-y := head.o
4267 diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
4268 index e9c4dc9e0ada..66be504edb6c 100644
4269 --- a/arch/arm64/kernel/arm64ksyms.c
4270 +++ b/arch/arm64/kernel/arm64ksyms.c
4271 @@ -37,8 +37,8 @@ EXPORT_SYMBOL(clear_page);
4272 /* user mem (segment) */
4273 EXPORT_SYMBOL(__arch_copy_from_user);
4274 EXPORT_SYMBOL(__arch_copy_to_user);
4275 -EXPORT_SYMBOL(__clear_user);
4276 -EXPORT_SYMBOL(__copy_in_user);
4277 +EXPORT_SYMBOL(__arch_clear_user);
4278 +EXPORT_SYMBOL(__arch_copy_in_user);
4279
4280 /* physical memory */
4281 EXPORT_SYMBOL(memstart_addr);
4282 diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S
4283 new file mode 100644
4284 index 000000000000..dc4eb154e33b
4285 --- /dev/null
4286 +++ b/arch/arm64/kernel/bpi.S
4287 @@ -0,0 +1,75 @@
4288 +/*
4289 + * Contains CPU specific branch predictor invalidation sequences
4290 + *
4291 + * Copyright (C) 2018 ARM Ltd.
4292 + *
4293 + * This program is free software; you can redistribute it and/or modify
4294 + * it under the terms of the GNU General Public License version 2 as
4295 + * published by the Free Software Foundation.
4296 + *
4297 + * This program is distributed in the hope that it will be useful,
4298 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4299 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4300 + * GNU General Public License for more details.
4301 + *
4302 + * You should have received a copy of the GNU General Public License
4303 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
4304 + */
4305 +
4306 +#include <linux/linkage.h>
4307 +#include <linux/arm-smccc.h>
4308 +
4309 +.macro ventry target
4310 + .rept 31
4311 + nop
4312 + .endr
4313 + b \target
4314 +.endm
4315 +
4316 +.macro vectors target
4317 + ventry \target + 0x000
4318 + ventry \target + 0x080
4319 + ventry \target + 0x100
4320 + ventry \target + 0x180
4321 +
4322 + ventry \target + 0x200
4323 + ventry \target + 0x280
4324 + ventry \target + 0x300
4325 + ventry \target + 0x380
4326 +
4327 + ventry \target + 0x400
4328 + ventry \target + 0x480
4329 + ventry \target + 0x500
4330 + ventry \target + 0x580
4331 +
4332 + ventry \target + 0x600
4333 + ventry \target + 0x680
4334 + ventry \target + 0x700
4335 + ventry \target + 0x780
4336 +.endm
4337 +
4338 + .align 11
4339 +ENTRY(__bp_harden_hyp_vecs_start)
4340 + .rept 4
4341 + vectors __kvm_hyp_vector
4342 + .endr
4343 +ENTRY(__bp_harden_hyp_vecs_end)
4344 +
4345 +.macro smccc_workaround_1 inst
4346 + sub sp, sp, #(8 * 4)
4347 + stp x2, x3, [sp, #(8 * 0)]
4348 + stp x0, x1, [sp, #(8 * 2)]
4349 + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
4350 + \inst #0
4351 + ldp x2, x3, [sp, #(8 * 0)]
4352 + ldp x0, x1, [sp, #(8 * 2)]
4353 + add sp, sp, #(8 * 4)
4354 +.endm
4355 +
4356 +ENTRY(__smccc_workaround_1_smc_start)
4357 + smccc_workaround_1 smc
4358 +ENTRY(__smccc_workaround_1_smc_end)
4359 +
4360 +ENTRY(__smccc_workaround_1_hvc_start)
4361 + smccc_workaround_1 hvc
4362 +ENTRY(__smccc_workaround_1_hvc_end)
4363 diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
4364 index b75e917aac46..74107134cc30 100644
4365 --- a/arch/arm64/kernel/cpu_errata.c
4366 +++ b/arch/arm64/kernel/cpu_errata.c
4367 @@ -46,6 +46,147 @@ static int cpu_enable_trap_ctr_access(void *__unused)
4368 return 0;
4369 }
4370
4371 +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
4372 +#include <asm/mmu_context.h>
4373 +#include <asm/cacheflush.h>
4374 +
4375 +DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
4376 +
4377 +#ifdef CONFIG_KVM
4378 +extern char __smccc_workaround_1_smc_start[];
4379 +extern char __smccc_workaround_1_smc_end[];
4380 +extern char __smccc_workaround_1_hvc_start[];
4381 +extern char __smccc_workaround_1_hvc_end[];
4382 +
4383 +static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
4384 + const char *hyp_vecs_end)
4385 +{
4386 + void *dst = __bp_harden_hyp_vecs_start + slot * SZ_2K;
4387 + int i;
4388 +
4389 + for (i = 0; i < SZ_2K; i += 0x80)
4390 + memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start);
4391 +
4392 + flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K);
4393 +}
4394 +
4395 +static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
4396 + const char *hyp_vecs_start,
4397 + const char *hyp_vecs_end)
4398 +{
4399 + static int last_slot = -1;
4400 + static DEFINE_SPINLOCK(bp_lock);
4401 + int cpu, slot = -1;
4402 +
4403 + spin_lock(&bp_lock);
4404 + for_each_possible_cpu(cpu) {
4405 + if (per_cpu(bp_hardening_data.fn, cpu) == fn) {
4406 + slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu);
4407 + break;
4408 + }
4409 + }
4410 +
4411 + if (slot == -1) {
4412 + last_slot++;
4413 + BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start)
4414 + / SZ_2K) <= last_slot);
4415 + slot = last_slot;
4416 + __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
4417 + }
4418 +
4419 + __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot);
4420 + __this_cpu_write(bp_hardening_data.fn, fn);
4421 + spin_unlock(&bp_lock);
4422 +}
4423 +#else
4424 +#define __smccc_workaround_1_smc_start NULL
4425 +#define __smccc_workaround_1_smc_end NULL
4426 +#define __smccc_workaround_1_hvc_start NULL
4427 +#define __smccc_workaround_1_hvc_end NULL
4428 +
4429 +static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
4430 + const char *hyp_vecs_start,
4431 + const char *hyp_vecs_end)
4432 +{
4433 + __this_cpu_write(bp_hardening_data.fn, fn);
4434 +}
4435 +#endif /* CONFIG_KVM */
4436 +
4437 +static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry,
4438 + bp_hardening_cb_t fn,
4439 + const char *hyp_vecs_start,
4440 + const char *hyp_vecs_end)
4441 +{
4442 + u64 pfr0;
4443 +
4444 + if (!entry->matches(entry, SCOPE_LOCAL_CPU))
4445 + return;
4446 +
4447 + pfr0 = read_cpuid(ID_AA64PFR0_EL1);
4448 + if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT))
4449 + return;
4450 +
4451 + __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end);
4452 +}
4453 +
4454 +#include <uapi/linux/psci.h>
4455 +#include <linux/arm-smccc.h>
4456 +#include <linux/psci.h>
4457 +
4458 +static void call_smc_arch_workaround_1(void)
4459 +{
4460 + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
4461 +}
4462 +
4463 +static void call_hvc_arch_workaround_1(void)
4464 +{
4465 + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
4466 +}
4467 +
4468 +static int enable_smccc_arch_workaround_1(void *data)
4469 +{
4470 + const struct arm64_cpu_capabilities *entry = data;
4471 + bp_hardening_cb_t cb;
4472 + void *smccc_start, *smccc_end;
4473 + struct arm_smccc_res res;
4474 +
4475 + if (!entry->matches(entry, SCOPE_LOCAL_CPU))
4476 + return 0;
4477 +
4478 + if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
4479 + return 0;
4480 +
4481 + switch (psci_ops.conduit) {
4482 + case PSCI_CONDUIT_HVC:
4483 + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
4484 + ARM_SMCCC_ARCH_WORKAROUND_1, &res);
4485 + if (res.a0)
4486 + return 0;
4487 + cb = call_hvc_arch_workaround_1;
4488 + smccc_start = __smccc_workaround_1_hvc_start;
4489 + smccc_end = __smccc_workaround_1_hvc_end;
4490 + break;
4491 +
4492 + case PSCI_CONDUIT_SMC:
4493 + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
4494 + ARM_SMCCC_ARCH_WORKAROUND_1, &res);
4495 + if (res.a0)
4496 + return 0;
4497 + cb = call_smc_arch_workaround_1;
4498 + smccc_start = __smccc_workaround_1_smc_start;
4499 + smccc_end = __smccc_workaround_1_smc_end;
4500 + break;
4501 +
4502 + default:
4503 + return 0;
4504 + }
4505 +
4506 + install_bp_hardening_cb(entry, cb, smccc_start, smccc_end);
4507 +
4508 + return 0;
4509 +}
4510 +#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */
4511 +
4512 #define MIDR_RANGE(model, min, max) \
4513 .def_scope = SCOPE_LOCAL_CPU, \
4514 .matches = is_affected_midr_range, \
4515 @@ -53,6 +194,13 @@ static int cpu_enable_trap_ctr_access(void *__unused)
4516 .midr_range_min = min, \
4517 .midr_range_max = max
4518
4519 +#define MIDR_ALL_VERSIONS(model) \
4520 + .def_scope = SCOPE_LOCAL_CPU, \
4521 + .matches = is_affected_midr_range, \
4522 + .midr_model = model, \
4523 + .midr_range_min = 0, \
4524 + .midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK)
4525 +
4526 const struct arm64_cpu_capabilities arm64_errata[] = {
4527 #if defined(CONFIG_ARM64_ERRATUM_826319) || \
4528 defined(CONFIG_ARM64_ERRATUM_827319) || \
4529 @@ -130,6 +278,38 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
4530 .def_scope = SCOPE_LOCAL_CPU,
4531 .enable = cpu_enable_trap_ctr_access,
4532 },
4533 +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
4534 + {
4535 + .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
4536 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
4537 + .enable = enable_smccc_arch_workaround_1,
4538 + },
4539 + {
4540 + .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
4541 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
4542 + .enable = enable_smccc_arch_workaround_1,
4543 + },
4544 + {
4545 + .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
4546 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
4547 + .enable = enable_smccc_arch_workaround_1,
4548 + },
4549 + {
4550 + .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
4551 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
4552 + .enable = enable_smccc_arch_workaround_1,
4553 + },
4554 + {
4555 + .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
4556 + MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
4557 + .enable = enable_smccc_arch_workaround_1,
4558 + },
4559 + {
4560 + .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
4561 + MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
4562 + .enable = enable_smccc_arch_workaround_1,
4563 + },
4564 +#endif
4565 {
4566 }
4567 };
4568 @@ -143,15 +323,18 @@ void verify_local_cpu_errata_workarounds(void)
4569 {
4570 const struct arm64_cpu_capabilities *caps = arm64_errata;
4571
4572 - for (; caps->matches; caps++)
4573 - if (!cpus_have_cap(caps->capability) &&
4574 - caps->matches(caps, SCOPE_LOCAL_CPU)) {
4575 + for (; caps->matches; caps++) {
4576 + if (cpus_have_cap(caps->capability)) {
4577 + if (caps->enable)
4578 + caps->enable((void *)caps);
4579 + } else if (caps->matches(caps, SCOPE_LOCAL_CPU)) {
4580 pr_crit("CPU%d: Requires work around for %s, not detected"
4581 " at boot time\n",
4582 smp_processor_id(),
4583 caps->desc ? : "an erratum");
4584 cpu_die_early();
4585 }
4586 + }
4587 }
4588
4589 void update_cpu_errata_workarounds(void)
4590 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
4591 index 5056fc597ae9..a0ee01202503 100644
4592 --- a/arch/arm64/kernel/cpufeature.c
4593 +++ b/arch/arm64/kernel/cpufeature.c
4594 @@ -94,7 +94,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
4595
4596 static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
4597 ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0),
4598 - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 28, 0),
4599 + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0),
4600 + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 24, 0),
4601 ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),
4602 ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
4603 S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
4604 @@ -1024,9 +1025,8 @@ static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array,
4605 if (WARN_ON(preemptible()))
4606 return false;
4607
4608 - for (caps = cap_array; caps->desc; caps++)
4609 + for (caps = cap_array; caps->matches; caps++)
4610 if (caps->capability == cap &&
4611 - caps->matches &&
4612 caps->matches(caps, SCOPE_LOCAL_CPU))
4613 return true;
4614 return false;
4615 @@ -1059,7 +1059,7 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
4616 * uses an IPI, giving us a PSTATE that disappears when
4617 * we return.
4618 */
4619 - stop_machine(caps->enable, NULL, cpu_online_mask);
4620 + stop_machine(caps->enable, (void *)caps, cpu_online_mask);
4621 }
4622
4623 /*
4624 @@ -1116,7 +1116,7 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps_list)
4625 cpu_die_early();
4626 }
4627 if (caps->enable)
4628 - caps->enable(NULL);
4629 + caps->enable((void *)caps);
4630 }
4631 }
4632
4633 diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
4634 index 8d1600b18562..b79e302d2a3e 100644
4635 --- a/arch/arm64/kernel/entry.S
4636 +++ b/arch/arm64/kernel/entry.S
4637 @@ -30,6 +30,7 @@
4638 #include <asm/irq.h>
4639 #include <asm/memory.h>
4640 #include <asm/mmu.h>
4641 +#include <asm/processor.h>
4642 #include <asm/thread_info.h>
4643 #include <asm/asm-uaccess.h>
4644 #include <asm/unistd.h>
4645 @@ -125,10 +126,10 @@ alternative_else_nop_endif
4646 .else
4647 add x21, sp, #S_FRAME_SIZE
4648 get_thread_info tsk
4649 - /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
4650 + /* Save the task's original addr_limit and set USER_DS */
4651 ldr x20, [tsk, #TI_ADDR_LIMIT]
4652 str x20, [sp, #S_ORIG_ADDR_LIMIT]
4653 - mov x20, #TASK_SIZE_64
4654 + mov x20, #USER_DS
4655 str x20, [tsk, #TI_ADDR_LIMIT]
4656 /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
4657 .endif /* \el == 0 */
4658 @@ -588,13 +589,15 @@ el0_ia:
4659 * Instruction abort handling
4660 */
4661 mrs x26, far_el1
4662 - // enable interrupts before calling the main handler
4663 - enable_dbg_and_irq
4664 + msr daifclr, #(8 | 4 | 1)
4665 +#ifdef CONFIG_TRACE_IRQFLAGS
4666 + bl trace_hardirqs_off
4667 +#endif
4668 ct_user_exit
4669 mov x0, x26
4670 mov x1, x25
4671 mov x2, sp
4672 - bl do_mem_abort
4673 + bl do_el0_ia_bp_hardening
4674 b ret_to_user
4675 el0_fpsimd_acc:
4676 /*
4677 @@ -621,8 +624,10 @@ el0_sp_pc:
4678 * Stack or PC alignment exception handling
4679 */
4680 mrs x26, far_el1
4681 - // enable interrupts before calling the main handler
4682 - enable_dbg_and_irq
4683 + enable_dbg
4684 +#ifdef CONFIG_TRACE_IRQFLAGS
4685 + bl trace_hardirqs_off
4686 +#endif
4687 ct_user_exit
4688 mov x0, x26
4689 mov x1, x25
4690 @@ -681,6 +686,11 @@ el0_irq_naked:
4691 #endif
4692
4693 ct_user_exit
4694 +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
4695 + tbz x22, #55, 1f
4696 + bl do_el0_irq_bp_hardening
4697 +1:
4698 +#endif
4699 irq_handler
4700
4701 #ifdef CONFIG_TRACE_IRQFLAGS
4702 @@ -794,6 +804,7 @@ el0_svc_naked: // compat entry point
4703 b.ne __sys_trace
4704 cmp scno, sc_nr // check upper syscall limit
4705 b.hs ni_sys
4706 + mask_nospec64 scno, sc_nr, x19 // enforce bounds for syscall number
4707 ldr x16, [stbl, scno, lsl #3] // address in the syscall table
4708 blr x16 // call sys_* routine
4709 b ret_fast_syscall
4710 diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
4711 index 2e6e9e99977b..efe43c5f2dc1 100644
4712 --- a/arch/arm64/kvm/handle_exit.c
4713 +++ b/arch/arm64/kvm/handle_exit.c
4714 @@ -22,12 +22,15 @@
4715 #include <linux/kvm.h>
4716 #include <linux/kvm_host.h>
4717
4718 +#include <kvm/arm_psci.h>
4719 +
4720 #include <asm/esr.h>
4721 #include <asm/kvm_asm.h>
4722 #include <asm/kvm_coproc.h>
4723 #include <asm/kvm_emulate.h>
4724 #include <asm/kvm_mmu.h>
4725 -#include <asm/kvm_psci.h>
4726 +#include <asm/debug-monitors.h>
4727 +#include <asm/traps.h>
4728
4729 #define CREATE_TRACE_POINTS
4730 #include "trace.h"
4731 @@ -42,7 +45,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
4732 kvm_vcpu_hvc_get_imm(vcpu));
4733 vcpu->stat.hvc_exit_stat++;
4734
4735 - ret = kvm_psci_call(vcpu);
4736 + ret = kvm_hvc_call_handler(vcpu);
4737 if (ret < 0) {
4738 vcpu_set_reg(vcpu, 0, ~0UL);
4739 return 1;
4740 @@ -53,7 +56,16 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
4741
4742 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
4743 {
4744 + /*
4745 + * "If an SMC instruction executed at Non-secure EL1 is
4746 + * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a
4747 + * Trap exception, not a Secure Monitor Call exception [...]"
4748 + *
4749 + * We need to advance the PC after the trap, as it would
4750 + * otherwise return to the same address...
4751 + */
4752 vcpu_set_reg(vcpu, 0, ~0UL);
4753 + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
4754 return 1;
4755 }
4756
4757 diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
4758 index 4e92399f7105..4e9d50c3e658 100644
4759 --- a/arch/arm64/kvm/hyp/hyp-entry.S
4760 +++ b/arch/arm64/kvm/hyp/hyp-entry.S
4761 @@ -15,6 +15,7 @@
4762 * along with this program. If not, see <http://www.gnu.org/licenses/>.
4763 */
4764
4765 +#include <linux/arm-smccc.h>
4766 #include <linux/linkage.h>
4767
4768 #include <asm/alternative.h>
4769 @@ -79,10 +80,11 @@ alternative_endif
4770 lsr x0, x1, #ESR_ELx_EC_SHIFT
4771
4772 cmp x0, #ESR_ELx_EC_HVC64
4773 + ccmp x0, #ESR_ELx_EC_HVC32, #4, ne
4774 b.ne el1_trap
4775
4776 - mrs x1, vttbr_el2 // If vttbr is valid, the 64bit guest
4777 - cbnz x1, el1_trap // called HVC
4778 + mrs x1, vttbr_el2 // If vttbr is valid, the guest
4779 + cbnz x1, el1_hvc_guest // called HVC
4780
4781 /* Here, we're pretty sure the host called HVC. */
4782 ldp x0, x1, [sp], #16
4783 @@ -101,6 +103,20 @@ alternative_endif
4784
4785 2: eret
4786
4787 +el1_hvc_guest:
4788 + /*
4789 + * Fastest possible path for ARM_SMCCC_ARCH_WORKAROUND_1.
4790 + * The workaround has already been applied on the host,
4791 + * so let's quickly get back to the guest. We don't bother
4792 + * restoring x1, as it can be clobbered anyway.
4793 + */
4794 + ldr x1, [sp] // Guest's x0
4795 + eor w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1
4796 + cbnz w1, el1_trap
4797 + mov x0, x1
4798 + add sp, sp, #16
4799 + eret
4800 +
4801 el1_trap:
4802 /*
4803 * x0: ESR_EC
4804 diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
4805 index 9174ba917d65..c49d09387192 100644
4806 --- a/arch/arm64/kvm/hyp/switch.c
4807 +++ b/arch/arm64/kvm/hyp/switch.c
4808 @@ -17,6 +17,9 @@
4809
4810 #include <linux/types.h>
4811 #include <linux/jump_label.h>
4812 +#include <uapi/linux/psci.h>
4813 +
4814 +#include <kvm/arm_psci.h>
4815
4816 #include <asm/kvm_asm.h>
4817 #include <asm/kvm_emulate.h>
4818 @@ -50,7 +53,7 @@ static void __hyp_text __activate_traps_vhe(void)
4819 val &= ~CPACR_EL1_FPEN;
4820 write_sysreg(val, cpacr_el1);
4821
4822 - write_sysreg(__kvm_hyp_vector, vbar_el1);
4823 + write_sysreg(kvm_get_hyp_vector(), vbar_el1);
4824 }
4825
4826 static void __hyp_text __activate_traps_nvhe(void)
4827 diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
4828 index 5d1cad3ce6d6..efbf610eaf4e 100644
4829 --- a/arch/arm64/lib/clear_user.S
4830 +++ b/arch/arm64/lib/clear_user.S
4831 @@ -24,7 +24,7 @@
4832
4833 .text
4834
4835 -/* Prototype: int __clear_user(void *addr, size_t sz)
4836 +/* Prototype: int __arch_clear_user(void *addr, size_t sz)
4837 * Purpose : clear some user memory
4838 * Params : addr - user memory address to clear
4839 * : sz - number of bytes to clear
4840 @@ -32,7 +32,7 @@
4841 *
4842 * Alignment fixed up by hardware.
4843 */
4844 -ENTRY(__clear_user)
4845 +ENTRY(__arch_clear_user)
4846 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
4847 CONFIG_ARM64_PAN)
4848 mov x2, x1 // save the size for fixup return
4849 @@ -57,7 +57,7 @@ uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
4850 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
4851 CONFIG_ARM64_PAN)
4852 ret
4853 -ENDPROC(__clear_user)
4854 +ENDPROC(__arch_clear_user)
4855
4856 .section .fixup,"ax"
4857 .align 2
4858 diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
4859 index f7292dd08c84..841bf8f7fab7 100644
4860 --- a/arch/arm64/lib/copy_in_user.S
4861 +++ b/arch/arm64/lib/copy_in_user.S
4862 @@ -67,7 +67,7 @@
4863 .endm
4864
4865 end .req x5
4866 -ENTRY(__copy_in_user)
4867 +ENTRY(__arch_copy_in_user)
4868 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
4869 CONFIG_ARM64_PAN)
4870 add end, x0, x2
4871 @@ -76,7 +76,7 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
4872 CONFIG_ARM64_PAN)
4873 mov x0, #0
4874 ret
4875 -ENDPROC(__copy_in_user)
4876 +ENDPROC(__arch_copy_in_user)
4877
4878 .section .fixup,"ax"
4879 .align 2
4880 diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
4881 index f00f5eeb556f..62d976e843fc 100644
4882 --- a/arch/arm64/mm/context.c
4883 +++ b/arch/arm64/mm/context.c
4884 @@ -230,9 +230,21 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
4885 raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
4886
4887 switch_mm_fastpath:
4888 +
4889 + arm64_apply_bp_hardening();
4890 +
4891 cpu_switch_mm(mm->pgd, mm);
4892 }
4893
4894 +/* Errata workaround post TTBRx_EL1 update. */
4895 +asmlinkage void post_ttbr_update_workaround(void)
4896 +{
4897 + asm(ALTERNATIVE("nop; nop; nop",
4898 + "ic iallu; dsb nsh; isb",
4899 + ARM64_WORKAROUND_CAVIUM_27456,
4900 + CONFIG_CAVIUM_ERRATUM_27456));
4901 +}
4902 +
4903 static int asids_init(void)
4904 {
4905 asid_bits = get_cpu_asid_bits();
4906 diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
4907 index 403fe9e57135..ad49ae8f3967 100644
4908 --- a/arch/arm64/mm/fault.c
4909 +++ b/arch/arm64/mm/fault.c
4910 @@ -332,7 +332,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
4911 mm_flags |= FAULT_FLAG_WRITE;
4912 }
4913
4914 - if (is_permission_fault(esr) && (addr < USER_DS)) {
4915 + if (is_permission_fault(esr) && (addr < TASK_SIZE)) {
4916 /* regs->orig_addr_limit may be 0 if we entered from EL0 */
4917 if (regs->orig_addr_limit == KERNEL_DS)
4918 die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
4919 @@ -590,6 +590,29 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
4920 arm64_notify_die("", regs, &info, esr);
4921 }
4922
4923 +asmlinkage void __exception do_el0_irq_bp_hardening(void)
4924 +{
4925 + /* PC has already been checked in entry.S */
4926 + arm64_apply_bp_hardening();
4927 +}
4928 +
4929 +asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr,
4930 + unsigned int esr,
4931 + struct pt_regs *regs)
4932 +{
4933 + /*
4934 + * We've taken an instruction abort from userspace and not yet
4935 + * re-enabled IRQs. If the address is a kernel address, apply
4936 + * BP hardening prior to enabling IRQs and pre-emption.
4937 + */
4938 + if (addr > TASK_SIZE)
4939 + arm64_apply_bp_hardening();
4940 +
4941 + local_irq_enable();
4942 + do_mem_abort(addr, esr, regs);
4943 +}
4944 +
4945 +
4946 /*
4947 * Handle stack alignment exceptions.
4948 */
4949 @@ -600,6 +623,12 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
4950 struct siginfo info;
4951 struct task_struct *tsk = current;
4952
4953 + if (user_mode(regs)) {
4954 + if (instruction_pointer(regs) > TASK_SIZE)
4955 + arm64_apply_bp_hardening();
4956 + local_irq_enable();
4957 + }
4958 +
4959 if (show_unhandled_signals && unhandled_signal(tsk, SIGBUS))
4960 pr_info_ratelimited("%s[%d]: %s exception: pc=%p sp=%p\n",
4961 tsk->comm, task_pid_nr(tsk),
4962 @@ -659,6 +688,9 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
4963 if (interrupts_enabled(regs))
4964 trace_hardirqs_off();
4965
4966 + if (user_mode(regs) && instruction_pointer(regs) > TASK_SIZE)
4967 + arm64_apply_bp_hardening();
4968 +
4969 if (!inf->fn(addr, esr, regs)) {
4970 rv = 1;
4971 } else {
4972 diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
4973 index c07d9cc057e6..619da1cbd32b 100644
4974 --- a/arch/arm64/mm/proc.S
4975 +++ b/arch/arm64/mm/proc.S
4976 @@ -139,12 +139,7 @@ ENTRY(cpu_do_switch_mm)
4977 isb
4978 msr ttbr0_el1, x0 // now update TTBR0
4979 isb
4980 -alternative_if ARM64_WORKAROUND_CAVIUM_27456
4981 - ic iallu
4982 - dsb nsh
4983 - isb
4984 -alternative_else_nop_endif
4985 - ret
4986 + b post_ttbr_update_workaround // Back to C code...
4987 ENDPROC(cpu_do_switch_mm)
4988
4989 .pushsection ".idmap.text", "awx"
4990 diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
4991 index 700e2d2da096..2e68ca1fe0db 100644
4992 --- a/arch/parisc/kernel/drivers.c
4993 +++ b/arch/parisc/kernel/drivers.c
4994 @@ -648,6 +648,10 @@ static int match_pci_device(struct device *dev, int index,
4995 (modpath->mod == PCI_FUNC(devfn)));
4996 }
4997
4998 + /* index might be out of bounds for bc[] */
4999 + if (index >= 6)
5000 + return 0;
5001 +
5002 id = PCI_SLOT(pdev->devfn) | (PCI_FUNC(pdev->devfn) << 5);
5003 return (modpath->bc[index] == id);
5004 }
5005 diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
5006 index 295bfb7124bc..39127b691b78 100644
5007 --- a/arch/s390/kernel/ipl.c
5008 +++ b/arch/s390/kernel/ipl.c
5009 @@ -798,6 +798,7 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
5010 /* copy and convert to ebcdic */
5011 memcpy(ipb->hdr.loadparm, buf, lp_len);
5012 ASCEBC(ipb->hdr.loadparm, LOADPARM_LEN);
5013 + ipb->hdr.flags |= DIAG308_FLAGS_LP_VALID;
5014 return len;
5015 }
5016
5017 diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
5018 index b1815b20a99c..37032545c58e 100644
5019 --- a/drivers/acpi/nfit/core.c
5020 +++ b/drivers/acpi/nfit/core.c
5021 @@ -2547,15 +2547,21 @@ static void acpi_nfit_scrub(struct work_struct *work)
5022 static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
5023 {
5024 struct nfit_spa *nfit_spa;
5025 - int rc;
5026
5027 - list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
5028 - if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) {
5029 - /* BLK regions don't need to wait for ars results */
5030 - rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
5031 - if (rc)
5032 - return rc;
5033 - }
5034 + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
5035 + int rc, type = nfit_spa_type(nfit_spa->spa);
5036 +
5037 + /* PMEM and VMEM will be registered by the ARS workqueue */
5038 + if (type == NFIT_SPA_PM || type == NFIT_SPA_VOLATILE)
5039 + continue;
5040 + /* BLK apertures belong to BLK region registration below */
5041 + if (type == NFIT_SPA_BDW)
5042 + continue;
5043 + /* BLK regions don't need to wait for ARS results */
5044 + rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
5045 + if (rc)
5046 + return rc;
5047 + }
5048
5049 queue_work(nfit_wq, &acpi_desc->work);
5050 return 0;
5051 diff --git a/drivers/block/loop.c b/drivers/block/loop.c
5052 index dc318b9100c2..ff1c4d7aa025 100644
5053 --- a/drivers/block/loop.c
5054 +++ b/drivers/block/loop.c
5055 @@ -1110,11 +1110,15 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
5056 if (info->lo_encrypt_type) {
5057 unsigned int type = info->lo_encrypt_type;
5058
5059 - if (type >= MAX_LO_CRYPT)
5060 - return -EINVAL;
5061 + if (type >= MAX_LO_CRYPT) {
5062 + err = -EINVAL;
5063 + goto exit;
5064 + }
5065 xfer = xfer_funcs[type];
5066 - if (xfer == NULL)
5067 - return -EINVAL;
5068 + if (xfer == NULL) {
5069 + err = -EINVAL;
5070 + goto exit;
5071 + }
5072 } else
5073 xfer = NULL;
5074
5075 diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c
5076 index 8263429e21b8..79a48c37fb35 100644
5077 --- a/drivers/firmware/psci.c
5078 +++ b/drivers/firmware/psci.c
5079 @@ -59,7 +59,10 @@ bool psci_tos_resident_on(int cpu)
5080 return cpu == resident_cpu;
5081 }
5082
5083 -struct psci_operations psci_ops;
5084 +struct psci_operations psci_ops = {
5085 + .conduit = PSCI_CONDUIT_NONE,
5086 + .smccc_version = SMCCC_VERSION_1_0,
5087 +};
5088
5089 typedef unsigned long (psci_fn)(unsigned long, unsigned long,
5090 unsigned long, unsigned long);
5091 @@ -210,6 +213,22 @@ static unsigned long psci_migrate_info_up_cpu(void)
5092 0, 0, 0);
5093 }
5094
5095 +static void set_conduit(enum psci_conduit conduit)
5096 +{
5097 + switch (conduit) {
5098 + case PSCI_CONDUIT_HVC:
5099 + invoke_psci_fn = __invoke_psci_fn_hvc;
5100 + break;
5101 + case PSCI_CONDUIT_SMC:
5102 + invoke_psci_fn = __invoke_psci_fn_smc;
5103 + break;
5104 + default:
5105 + WARN(1, "Unexpected PSCI conduit %d\n", conduit);
5106 + }
5107 +
5108 + psci_ops.conduit = conduit;
5109 +}
5110 +
5111 static int get_set_conduit_method(struct device_node *np)
5112 {
5113 const char *method;
5114 @@ -222,9 +241,9 @@ static int get_set_conduit_method(struct device_node *np)
5115 }
5116
5117 if (!strcmp("hvc", method)) {
5118 - invoke_psci_fn = __invoke_psci_fn_hvc;
5119 + set_conduit(PSCI_CONDUIT_HVC);
5120 } else if (!strcmp("smc", method)) {
5121 - invoke_psci_fn = __invoke_psci_fn_smc;
5122 + set_conduit(PSCI_CONDUIT_SMC);
5123 } else {
5124 pr_warn("invalid \"method\" property: %s\n", method);
5125 return -EINVAL;
5126 @@ -493,9 +512,36 @@ static void __init psci_init_migrate(void)
5127 pr_info("Trusted OS resident on physical CPU 0x%lx\n", cpuid);
5128 }
5129
5130 +static void __init psci_init_smccc(void)
5131 +{
5132 + u32 ver = ARM_SMCCC_VERSION_1_0;
5133 + int feature;
5134 +
5135 + feature = psci_features(ARM_SMCCC_VERSION_FUNC_ID);
5136 +
5137 + if (feature != PSCI_RET_NOT_SUPPORTED) {
5138 + u32 ret;
5139 + ret = invoke_psci_fn(ARM_SMCCC_VERSION_FUNC_ID, 0, 0, 0);
5140 + if (ret == ARM_SMCCC_VERSION_1_1) {
5141 + psci_ops.smccc_version = SMCCC_VERSION_1_1;
5142 + ver = ret;
5143 + }
5144 + }
5145 +
5146 + /*
5147 + * Conveniently, the SMCCC and PSCI versions are encoded the
5148 + * same way. No, this isn't accidental.
5149 + */
5150 + pr_info("SMC Calling Convention v%d.%d\n",
5151 + PSCI_VERSION_MAJOR(ver), PSCI_VERSION_MINOR(ver));
5152 +
5153 +}
5154 +
5155 static void __init psci_0_2_set_functions(void)
5156 {
5157 pr_info("Using standard PSCI v0.2 function IDs\n");
5158 + psci_ops.get_version = psci_get_version;
5159 +
5160 psci_function_id[PSCI_FN_CPU_SUSPEND] =
5161 PSCI_FN_NATIVE(0_2, CPU_SUSPEND);
5162 psci_ops.cpu_suspend = psci_cpu_suspend;
5163 @@ -539,6 +585,7 @@ static int __init psci_probe(void)
5164 psci_init_migrate();
5165
5166 if (PSCI_VERSION_MAJOR(ver) >= 1) {
5167 + psci_init_smccc();
5168 psci_init_cpu_suspend();
5169 psci_init_system_suspend();
5170 }
5171 @@ -652,9 +699,9 @@ int __init psci_acpi_init(void)
5172 pr_info("probing for conduit method from ACPI.\n");
5173
5174 if (acpi_psci_use_hvc())
5175 - invoke_psci_fn = __invoke_psci_fn_hvc;
5176 + set_conduit(PSCI_CONDUIT_HVC);
5177 else
5178 - invoke_psci_fn = __invoke_psci_fn_smc;
5179 + set_conduit(PSCI_CONDUIT_SMC);
5180
5181 return psci_probe();
5182 }
5183 diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
5184 index 41b72ce6613f..83e1345db9e2 100644
5185 --- a/drivers/gpu/drm/radeon/radeon_object.c
5186 +++ b/drivers/gpu/drm/radeon/radeon_object.c
5187 @@ -238,9 +238,10 @@ int radeon_bo_create(struct radeon_device *rdev,
5188 * may be slow
5189 * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
5190 */
5191 -
5192 +#ifndef CONFIG_COMPILE_TEST
5193 #warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
5194 thanks to write-combining
5195 +#endif
5196
5197 if (bo->flags & RADEON_GEM_GTT_WC)
5198 DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
5199 diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
5200 index d8bc4b910192..9360cdce740e 100644
5201 --- a/drivers/hv/channel_mgmt.c
5202 +++ b/drivers/hv/channel_mgmt.c
5203 @@ -70,7 +70,7 @@ static const struct vmbus_device vmbus_devs[] = {
5204 /* PCIE */
5205 { .dev_type = HV_PCIE,
5206 HV_PCIE_GUID,
5207 - .perf_device = true,
5208 + .perf_device = false,
5209 },
5210
5211 /* Synthetic Frame Buffer */
5212 diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c
5213 index a629f7c130f0..ac63e562071f 100644
5214 --- a/drivers/hwmon/ina2xx.c
5215 +++ b/drivers/hwmon/ina2xx.c
5216 @@ -447,6 +447,7 @@ static int ina2xx_probe(struct i2c_client *client,
5217
5218 /* set the device type */
5219 data->config = &ina2xx_config[id->driver_data];
5220 + mutex_init(&data->config_lock);
5221
5222 if (of_property_read_u32(dev->of_node, "shunt-resistor", &val) < 0) {
5223 struct ina2xx_platform_data *pdata = dev_get_platdata(dev);
5224 @@ -473,8 +474,6 @@ static int ina2xx_probe(struct i2c_client *client,
5225 return -ENODEV;
5226 }
5227
5228 - mutex_init(&data->config_lock);
5229 -
5230 data->groups[group++] = &ina2xx_group;
5231 if (id->driver_data == ina226)
5232 data->groups[group++] = &ina226_group;
5233 diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
5234 index 48a39222fdf9..a9fc64557c53 100644
5235 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
5236 +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
5237 @@ -101,7 +101,7 @@ static int get_v4l2_window32(struct v4l2_window __user *kp,
5238 static int put_v4l2_window32(struct v4l2_window __user *kp,
5239 struct v4l2_window32 __user *up)
5240 {
5241 - struct v4l2_clip __user *kclips = kp->clips;
5242 + struct v4l2_clip __user *kclips;
5243 struct v4l2_clip32 __user *uclips;
5244 compat_caddr_t p;
5245 u32 clipcount;
5246 @@ -116,6 +116,8 @@ static int put_v4l2_window32(struct v4l2_window __user *kp,
5247 if (!clipcount)
5248 return 0;
5249
5250 + if (get_user(kclips, &kp->clips))
5251 + return -EFAULT;
5252 if (get_user(p, &up->clips))
5253 return -EFAULT;
5254 uclips = compat_ptr(p);
5255 diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
5256 index 4da73e2c37cf..2032a6de026b 100644
5257 --- a/drivers/net/phy/micrel.c
5258 +++ b/drivers/net/phy/micrel.c
5259 @@ -268,12 +268,23 @@ static int kszphy_nand_tree_disable(struct phy_device *phydev)
5260 return ret;
5261 }
5262
5263 -/* Some config bits need to be set again on resume, handle them here. */
5264 -static int kszphy_config_reset(struct phy_device *phydev)
5265 +static int kszphy_config_init(struct phy_device *phydev)
5266 {
5267 struct kszphy_priv *priv = phydev->priv;
5268 + const struct kszphy_type *type;
5269 int ret;
5270
5271 + if (!priv)
5272 + return 0;
5273 +
5274 + type = priv->type;
5275 +
5276 + if (type->has_broadcast_disable)
5277 + kszphy_broadcast_disable(phydev);
5278 +
5279 + if (type->has_nand_tree_disable)
5280 + kszphy_nand_tree_disable(phydev);
5281 +
5282 if (priv->rmii_ref_clk_sel) {
5283 ret = kszphy_rmii_clk_sel(phydev, priv->rmii_ref_clk_sel_val);
5284 if (ret) {
5285 @@ -284,7 +295,7 @@ static int kszphy_config_reset(struct phy_device *phydev)
5286 }
5287
5288 if (priv->led_mode >= 0)
5289 - kszphy_setup_led(phydev, priv->type->led_mode_reg, priv->led_mode);
5290 + kszphy_setup_led(phydev, type->led_mode_reg, priv->led_mode);
5291
5292 if (phy_interrupt_is_valid(phydev)) {
5293 int ctl = phy_read(phydev, MII_BMCR);
5294 @@ -300,25 +311,6 @@ static int kszphy_config_reset(struct phy_device *phydev)
5295 return 0;
5296 }
5297
5298 -static int kszphy_config_init(struct phy_device *phydev)
5299 -{
5300 - struct kszphy_priv *priv = phydev->priv;
5301 - const struct kszphy_type *type;
5302 -
5303 - if (!priv)
5304 - return 0;
5305 -
5306 - type = priv->type;
5307 -
5308 - if (type->has_broadcast_disable)
5309 - kszphy_broadcast_disable(phydev);
5310 -
5311 - if (type->has_nand_tree_disable)
5312 - kszphy_nand_tree_disable(phydev);
5313 -
5314 - return kszphy_config_reset(phydev);
5315 -}
5316 -
5317 static int ksz8041_config_init(struct phy_device *phydev)
5318 {
5319 struct device_node *of_node = phydev->mdio.dev.of_node;
5320 @@ -723,14 +715,8 @@ static int kszphy_suspend(struct phy_device *phydev)
5321
5322 static int kszphy_resume(struct phy_device *phydev)
5323 {
5324 - int ret;
5325 -
5326 genphy_resume(phydev);
5327
5328 - ret = kszphy_config_reset(phydev);
5329 - if (ret)
5330 - return ret;
5331 -
5332 /* Enable PHY Interrupts */
5333 if (phy_interrupt_is_valid(phydev)) {
5334 phydev->interrupts = PHY_INTERRUPT_ENABLED;
5335 diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c
5336 index 27ed25252aac..cfd81eb1b532 100644
5337 --- a/drivers/net/slip/slhc.c
5338 +++ b/drivers/net/slip/slhc.c
5339 @@ -509,6 +509,10 @@ slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize)
5340 if(x < 0 || x > comp->rslot_limit)
5341 goto bad;
5342
5343 + /* Check if the cstate is initialized */
5344 + if (!comp->rstate[x].initialized)
5345 + goto bad;
5346 +
5347 comp->flags &=~ SLF_TOSS;
5348 comp->recv_current = x;
5349 } else {
5350 @@ -673,6 +677,7 @@ slhc_remember(struct slcompress *comp, unsigned char *icp, int isize)
5351 if (cs->cs_tcp.doff > 5)
5352 memcpy(cs->cs_tcpopt, icp + ihl*4 + sizeof(struct tcphdr), (cs->cs_tcp.doff - 5) * 4);
5353 cs->cs_hsize = ihl*2 + cs->cs_tcp.doff*2;
5354 + cs->initialized = true;
5355 /* Put headers back on packet
5356 * Neither header checksum is recalculated
5357 */
5358 diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
5359 index 1fca0024f294..4fb468666b19 100644
5360 --- a/drivers/net/usb/cdc_ether.c
5361 +++ b/drivers/net/usb/cdc_ether.c
5362 @@ -773,6 +773,12 @@ static const struct usb_device_id products[] = {
5363 USB_CDC_SUBCLASS_ETHERNET,
5364 USB_CDC_PROTO_NONE),
5365 .driver_info = (unsigned long)&wwan_info,
5366 +}, {
5367 + /* Cinterion AHS3 modem by GEMALTO */
5368 + USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0055, USB_CLASS_COMM,
5369 + USB_CDC_SUBCLASS_ETHERNET,
5370 + USB_CDC_PROTO_NONE),
5371 + .driver_info = (unsigned long)&wwan_info,
5372 }, {
5373 /* Telit modules */
5374 USB_VENDOR_AND_INTERFACE_INFO(0x1bc7, USB_CLASS_COMM,
5375 diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
5376 index c53385a0052f..f5a96678494b 100644
5377 --- a/drivers/net/usb/lan78xx.c
5378 +++ b/drivers/net/usb/lan78xx.c
5379 @@ -873,7 +873,8 @@ static int lan78xx_read_otp(struct lan78xx_net *dev, u32 offset,
5380 offset += 0x100;
5381 else
5382 ret = -EINVAL;
5383 - ret = lan78xx_read_raw_otp(dev, offset, length, data);
5384 + if (!ret)
5385 + ret = lan78xx_read_raw_otp(dev, offset, length, data);
5386 }
5387
5388 return ret;
5389 diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
5390 index 231f84db9ab0..6113624ccec3 100644
5391 --- a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
5392 +++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
5393 @@ -1454,6 +1454,7 @@ static int rtl8187_probe(struct usb_interface *intf,
5394 goto err_free_dev;
5395 }
5396 mutex_init(&priv->io_mutex);
5397 + mutex_init(&priv->conf_mutex);
5398
5399 SET_IEEE80211_DEV(dev, &intf->dev);
5400 usb_set_intfdata(intf, dev);
5401 @@ -1627,7 +1628,6 @@ static int rtl8187_probe(struct usb_interface *intf,
5402 printk(KERN_ERR "rtl8187: Cannot register device\n");
5403 goto err_free_dmabuf;
5404 }
5405 - mutex_init(&priv->conf_mutex);
5406 skb_queue_head_init(&priv->b_tx_status.queue);
5407
5408 wiphy_info(dev->wiphy, "hwaddr %pM, %s V%d + %s, rfkill mask %d\n",
5409 diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
5410 index 71bf9bded485..66e9bb053629 100644
5411 --- a/drivers/s390/cio/qdio_main.c
5412 +++ b/drivers/s390/cio/qdio_main.c
5413 @@ -126,7 +126,7 @@ static inline int qdio_check_ccq(struct qdio_q *q, unsigned int ccq)
5414 static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state,
5415 int start, int count, int auto_ack)
5416 {
5417 - int rc, tmp_count = count, tmp_start = start, nr = q->nr, retried = 0;
5418 + int rc, tmp_count = count, tmp_start = start, nr = q->nr;
5419 unsigned int ccq = 0;
5420
5421 qperf_inc(q, eqbs);
5422 @@ -149,14 +149,7 @@ static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state,
5423 qperf_inc(q, eqbs_partial);
5424 DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, "EQBS part:%02x",
5425 tmp_count);
5426 - /*
5427 - * Retry once, if that fails bail out and process the
5428 - * extracted buffers before trying again.
5429 - */
5430 - if (!retried++)
5431 - goto again;
5432 - else
5433 - return count - tmp_count;
5434 + return count - tmp_count;
5435 }
5436
5437 DBF_ERROR("%4x EQBS ERROR", SCH_NO(q));
5438 @@ -212,7 +205,10 @@ static int qdio_do_sqbs(struct qdio_q *q, unsigned char state, int start,
5439 return 0;
5440 }
5441
5442 -/* returns number of examined buffers and their common state in *state */
5443 +/*
5444 + * Returns number of examined buffers and their common state in *state.
5445 + * Requested number of buffers-to-examine must be > 0.
5446 + */
5447 static inline int get_buf_states(struct qdio_q *q, unsigned int bufnr,
5448 unsigned char *state, unsigned int count,
5449 int auto_ack, int merge_pending)
5450 @@ -223,17 +219,23 @@ static inline int get_buf_states(struct qdio_q *q, unsigned int bufnr,
5451 if (is_qebsm(q))
5452 return qdio_do_eqbs(q, state, bufnr, count, auto_ack);
5453
5454 - for (i = 0; i < count; i++) {
5455 - if (!__state) {
5456 - __state = q->slsb.val[bufnr];
5457 - if (merge_pending && __state == SLSB_P_OUTPUT_PENDING)
5458 - __state = SLSB_P_OUTPUT_EMPTY;
5459 - } else if (merge_pending) {
5460 - if ((q->slsb.val[bufnr] & __state) != __state)
5461 - break;
5462 - } else if (q->slsb.val[bufnr] != __state)
5463 - break;
5464 + /* get initial state: */
5465 + __state = q->slsb.val[bufnr];
5466 + if (merge_pending && __state == SLSB_P_OUTPUT_PENDING)
5467 + __state = SLSB_P_OUTPUT_EMPTY;
5468 +
5469 + for (i = 1; i < count; i++) {
5470 bufnr = next_buf(bufnr);
5471 +
5472 + /* merge PENDING into EMPTY: */
5473 + if (merge_pending &&
5474 + q->slsb.val[bufnr] == SLSB_P_OUTPUT_PENDING &&
5475 + __state == SLSB_P_OUTPUT_EMPTY)
5476 + continue;
5477 +
5478 + /* stop if next state differs from initial state: */
5479 + if (q->slsb.val[bufnr] != __state)
5480 + break;
5481 }
5482 *state = __state;
5483 return i;
5484 diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
5485 index e2c37aeed45a..fce49ebc575d 100644
5486 --- a/drivers/vhost/vhost.c
5487 +++ b/drivers/vhost/vhost.c
5488 @@ -1175,10 +1175,12 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq,
5489 /* Caller should have vq mutex and device mutex */
5490 int vhost_vq_access_ok(struct vhost_virtqueue *vq)
5491 {
5492 - int ret = vq_log_access_ok(vq, vq->log_base);
5493 + if (!vq_log_access_ok(vq, vq->log_base))
5494 + return 0;
5495
5496 - if (ret || vq->iotlb)
5497 - return ret;
5498 + /* Access validation occurs at prefetch time with IOTLB */
5499 + if (vq->iotlb)
5500 + return 1;
5501
5502 return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used);
5503 }
5504 diff --git a/fs/namei.c b/fs/namei.c
5505 index 891670e0956b..85ac38b99065 100644
5506 --- a/fs/namei.c
5507 +++ b/fs/namei.c
5508 @@ -221,9 +221,10 @@ getname_kernel(const char * filename)
5509 if (len <= EMBEDDED_NAME_MAX) {
5510 result->name = (char *)result->iname;
5511 } else if (len <= PATH_MAX) {
5512 + const size_t size = offsetof(struct filename, iname[1]);
5513 struct filename *tmp;
5514
5515 - tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
5516 + tmp = kmalloc(size, GFP_KERNEL);
5517 if (unlikely(!tmp)) {
5518 __putname(result);
5519 return ERR_PTR(-ENOMEM);
5520 diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h
5521 new file mode 100644
5522 index 000000000000..e518e4e3dfb5
5523 --- /dev/null
5524 +++ b/include/kvm/arm_psci.h
5525 @@ -0,0 +1,51 @@
5526 +/*
5527 + * Copyright (C) 2012,2013 - ARM Ltd
5528 + * Author: Marc Zyngier <marc.zyngier@arm.com>
5529 + *
5530 + * This program is free software; you can redistribute it and/or modify
5531 + * it under the terms of the GNU General Public License version 2 as
5532 + * published by the Free Software Foundation.
5533 + *
5534 + * This program is distributed in the hope that it will be useful,
5535 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
5536 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5537 + * GNU General Public License for more details.
5538 + *
5539 + * You should have received a copy of the GNU General Public License
5540 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
5541 + */
5542 +
5543 +#ifndef __KVM_ARM_PSCI_H__
5544 +#define __KVM_ARM_PSCI_H__
5545 +
5546 +#include <linux/kvm_host.h>
5547 +#include <uapi/linux/psci.h>
5548 +
5549 +#define KVM_ARM_PSCI_0_1 PSCI_VERSION(0, 1)
5550 +#define KVM_ARM_PSCI_0_2 PSCI_VERSION(0, 2)
5551 +#define KVM_ARM_PSCI_1_0 PSCI_VERSION(1, 0)
5552 +
5553 +#define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_0
5554 +
5555 +/*
5556 + * We need the KVM pointer independently from the vcpu as we can call
5557 + * this from HYP, and need to apply kern_hyp_va on it...
5558 + */
5559 +static inline int kvm_psci_version(struct kvm_vcpu *vcpu, struct kvm *kvm)
5560 +{
5561 + /*
5562 + * Our PSCI implementation stays the same across versions from
5563 + * v0.2 onward, only adding the few mandatory functions (such
5564 + * as FEATURES with 1.0) that are required by newer
5565 + * revisions. It is thus safe to return the latest.
5566 + */
5567 + if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
5568 + return KVM_ARM_PSCI_LATEST;
5569 +
5570 + return KVM_ARM_PSCI_0_1;
5571 +}
5572 +
5573 +
5574 +int kvm_hvc_call_handler(struct kvm_vcpu *vcpu);
5575 +
5576 +#endif /* __KVM_ARM_PSCI_H__ */
5577 diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
5578 index 4c5bca38c653..a031897fca76 100644
5579 --- a/include/linux/arm-smccc.h
5580 +++ b/include/linux/arm-smccc.h
5581 @@ -14,14 +14,16 @@
5582 #ifndef __LINUX_ARM_SMCCC_H
5583 #define __LINUX_ARM_SMCCC_H
5584
5585 +#include <uapi/linux/const.h>
5586 +
5587 /*
5588 * This file provides common defines for ARM SMC Calling Convention as
5589 * specified in
5590 * http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html
5591 */
5592
5593 -#define ARM_SMCCC_STD_CALL 0
5594 -#define ARM_SMCCC_FAST_CALL 1
5595 +#define ARM_SMCCC_STD_CALL _AC(0,U)
5596 +#define ARM_SMCCC_FAST_CALL _AC(1,U)
5597 #define ARM_SMCCC_TYPE_SHIFT 31
5598
5599 #define ARM_SMCCC_SMC_32 0
5600 @@ -60,6 +62,24 @@
5601 #define ARM_SMCCC_QUIRK_NONE 0
5602 #define ARM_SMCCC_QUIRK_QCOM_A6 1 /* Save/restore register a6 */
5603
5604 +#define ARM_SMCCC_VERSION_1_0 0x10000
5605 +#define ARM_SMCCC_VERSION_1_1 0x10001
5606 +
5607 +#define ARM_SMCCC_VERSION_FUNC_ID \
5608 + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
5609 + ARM_SMCCC_SMC_32, \
5610 + 0, 0)
5611 +
5612 +#define ARM_SMCCC_ARCH_FEATURES_FUNC_ID \
5613 + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
5614 + ARM_SMCCC_SMC_32, \
5615 + 0, 1)
5616 +
5617 +#define ARM_SMCCC_ARCH_WORKAROUND_1 \
5618 + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
5619 + ARM_SMCCC_SMC_32, \
5620 + 0, 0x8000)
5621 +
5622 #ifndef __ASSEMBLY__
5623
5624 #include <linux/linkage.h>
5625 @@ -130,5 +150,146 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
5626
5627 #define arm_smccc_hvc_quirk(...) __arm_smccc_hvc(__VA_ARGS__)
5628
5629 +/* SMCCC v1.1 implementation madness follows */
5630 +#ifdef CONFIG_ARM64
5631 +
5632 +#define SMCCC_SMC_INST "smc #0"
5633 +#define SMCCC_HVC_INST "hvc #0"
5634 +
5635 +#elif defined(CONFIG_ARM)
5636 +#include <asm/opcodes-sec.h>
5637 +#include <asm/opcodes-virt.h>
5638 +
5639 +#define SMCCC_SMC_INST __SMC(0)
5640 +#define SMCCC_HVC_INST __HVC(0)
5641 +
5642 +#endif
5643 +
5644 +#define ___count_args(_0, _1, _2, _3, _4, _5, _6, _7, _8, x, ...) x
5645 +
5646 +#define __count_args(...) \
5647 + ___count_args(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0)
5648 +
5649 +#define __constraint_write_0 \
5650 + "+r" (r0), "=&r" (r1), "=&r" (r2), "=&r" (r3)
5651 +#define __constraint_write_1 \
5652 + "+r" (r0), "+r" (r1), "=&r" (r2), "=&r" (r3)
5653 +#define __constraint_write_2 \
5654 + "+r" (r0), "+r" (r1), "+r" (r2), "=&r" (r3)
5655 +#define __constraint_write_3 \
5656 + "+r" (r0), "+r" (r1), "+r" (r2), "+r" (r3)
5657 +#define __constraint_write_4 __constraint_write_3
5658 +#define __constraint_write_5 __constraint_write_4
5659 +#define __constraint_write_6 __constraint_write_5
5660 +#define __constraint_write_7 __constraint_write_6
5661 +
5662 +#define __constraint_read_0
5663 +#define __constraint_read_1
5664 +#define __constraint_read_2
5665 +#define __constraint_read_3
5666 +#define __constraint_read_4 "r" (r4)
5667 +#define __constraint_read_5 __constraint_read_4, "r" (r5)
5668 +#define __constraint_read_6 __constraint_read_5, "r" (r6)
5669 +#define __constraint_read_7 __constraint_read_6, "r" (r7)
5670 +
5671 +#define __declare_arg_0(a0, res) \
5672 + struct arm_smccc_res *___res = res; \
5673 + register u32 r0 asm("r0") = a0; \
5674 + register unsigned long r1 asm("r1"); \
5675 + register unsigned long r2 asm("r2"); \
5676 + register unsigned long r3 asm("r3")
5677 +
5678 +#define __declare_arg_1(a0, a1, res) \
5679 + struct arm_smccc_res *___res = res; \
5680 + register u32 r0 asm("r0") = a0; \
5681 + register typeof(a1) r1 asm("r1") = a1; \
5682 + register unsigned long r2 asm("r2"); \
5683 + register unsigned long r3 asm("r3")
5684 +
5685 +#define __declare_arg_2(a0, a1, a2, res) \
5686 + struct arm_smccc_res *___res = res; \
5687 + register u32 r0 asm("r0") = a0; \
5688 + register typeof(a1) r1 asm("r1") = a1; \
5689 + register typeof(a2) r2 asm("r2") = a2; \
5690 + register unsigned long r3 asm("r3")
5691 +
5692 +#define __declare_arg_3(a0, a1, a2, a3, res) \
5693 + struct arm_smccc_res *___res = res; \
5694 + register u32 r0 asm("r0") = a0; \
5695 + register typeof(a1) r1 asm("r1") = a1; \
5696 + register typeof(a2) r2 asm("r2") = a2; \
5697 + register typeof(a3) r3 asm("r3") = a3
5698 +
5699 +#define __declare_arg_4(a0, a1, a2, a3, a4, res) \
5700 + __declare_arg_3(a0, a1, a2, a3, res); \
5701 + register typeof(a4) r4 asm("r4") = a4
5702 +
5703 +#define __declare_arg_5(a0, a1, a2, a3, a4, a5, res) \
5704 + __declare_arg_4(a0, a1, a2, a3, a4, res); \
5705 + register typeof(a5) r5 asm("r5") = a5
5706 +
5707 +#define __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res) \
5708 + __declare_arg_5(a0, a1, a2, a3, a4, a5, res); \
5709 + register typeof(a6) r6 asm("r6") = a6
5710 +
5711 +#define __declare_arg_7(a0, a1, a2, a3, a4, a5, a6, a7, res) \
5712 + __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res); \
5713 + register typeof(a7) r7 asm("r7") = a7
5714 +
5715 +#define ___declare_args(count, ...) __declare_arg_ ## count(__VA_ARGS__)
5716 +#define __declare_args(count, ...) ___declare_args(count, __VA_ARGS__)
5717 +
5718 +#define ___constraints(count) \
5719 + : __constraint_write_ ## count \
5720 + : __constraint_read_ ## count \
5721 + : "memory"
5722 +#define __constraints(count) ___constraints(count)
5723 +
5724 +/*
5725 + * We have an output list that is not necessarily used, and GCC feels
5726 + * entitled to optimise the whole sequence away. "volatile" is what
5727 + * makes it stick.
5728 + */
5729 +#define __arm_smccc_1_1(inst, ...) \
5730 + do { \
5731 + __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \
5732 + asm volatile(inst "\n" \
5733 + __constraints(__count_args(__VA_ARGS__))); \
5734 + if (___res) \
5735 + *___res = (typeof(*___res)){r0, r1, r2, r3}; \
5736 + } while (0)
5737 +
5738 +/*
5739 + * arm_smccc_1_1_smc() - make an SMCCC v1.1 compliant SMC call
5740 + *
5741 + * This is a variadic macro taking one to eight source arguments, and
5742 + * an optional return structure.
5743 + *
5744 + * @a0-a7: arguments passed in registers 0 to 7
5745 + * @res: result values from registers 0 to 3
5746 + *
5747 + * This macro is used to make SMC calls following SMC Calling Convention v1.1.
5748 + * The content of the supplied param are copied to registers 0 to 7 prior
5749 + * to the SMC instruction. The return values are updated with the content
5750 + * from register 0 to 3 on return from the SMC instruction if not NULL.
5751 + */
5752 +#define arm_smccc_1_1_smc(...) __arm_smccc_1_1(SMCCC_SMC_INST, __VA_ARGS__)
5753 +
5754 +/*
5755 + * arm_smccc_1_1_hvc() - make an SMCCC v1.1 compliant HVC call
5756 + *
5757 + * This is a variadic macro taking one to eight source arguments, and
5758 + * an optional return structure.
5759 + *
5760 + * @a0-a7: arguments passed in registers 0 to 7
5761 + * @res: result values from registers 0 to 3
5762 + *
5763 + * This macro is used to make HVC calls following SMC Calling Convention v1.1.
5764 + * The content of the supplied param are copied to registers 0 to 7 prior
5765 + * to the HVC instruction. The return values are updated with the content
5766 + * from register 0 to 3 on return from the HVC instruction if not NULL.
5767 + */
5768 +#define arm_smccc_1_1_hvc(...) __arm_smccc_1_1(SMCCC_HVC_INST, __VA_ARGS__)
5769 +
5770 #endif /*__ASSEMBLY__*/
5771 #endif /*__LINUX_ARM_SMCCC_H*/
5772 diff --git a/include/linux/mm.h b/include/linux/mm.h
5773 index 8e506783631b..4a07ff4f38e1 100644
5774 --- a/include/linux/mm.h
5775 +++ b/include/linux/mm.h
5776 @@ -76,6 +76,10 @@ extern int mmap_rnd_compat_bits __read_mostly;
5777 #define page_to_virt(x) __va(PFN_PHYS(page_to_pfn(x)))
5778 #endif
5779
5780 +#ifndef lm_alias
5781 +#define lm_alias(x) __va(__pa_symbol(x))
5782 +#endif
5783 +
5784 /*
5785 * To prevent common memory management code establishing
5786 * a zero page mapping on a read fault.
5787 diff --git a/include/linux/psci.h b/include/linux/psci.h
5788 index bdea1cb5e1db..347077cf19c6 100644
5789 --- a/include/linux/psci.h
5790 +++ b/include/linux/psci.h
5791 @@ -25,7 +25,19 @@ bool psci_tos_resident_on(int cpu);
5792 int psci_cpu_init_idle(unsigned int cpu);
5793 int psci_cpu_suspend_enter(unsigned long index);
5794
5795 +enum psci_conduit {
5796 + PSCI_CONDUIT_NONE,
5797 + PSCI_CONDUIT_SMC,
5798 + PSCI_CONDUIT_HVC,
5799 +};
5800 +
5801 +enum smccc_version {
5802 + SMCCC_VERSION_1_0,
5803 + SMCCC_VERSION_1_1,
5804 +};
5805 +
5806 struct psci_operations {
5807 + u32 (*get_version)(void);
5808 int (*cpu_suspend)(u32 state, unsigned long entry_point);
5809 int (*cpu_off)(u32 state);
5810 int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
5811 @@ -33,6 +45,8 @@ struct psci_operations {
5812 int (*affinity_info)(unsigned long target_affinity,
5813 unsigned long lowest_affinity_level);
5814 int (*migrate_info_type)(void);
5815 + enum psci_conduit conduit;
5816 + enum smccc_version smccc_version;
5817 };
5818
5819 extern struct psci_operations psci_ops;
5820 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
5821 index 554671c81f4a..4931787193c3 100644
5822 --- a/include/net/bluetooth/hci_core.h
5823 +++ b/include/net/bluetooth/hci_core.h
5824 @@ -893,7 +893,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
5825 u16 conn_timeout);
5826 struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
5827 u8 dst_type, u8 sec_level, u16 conn_timeout,
5828 - u8 role);
5829 + u8 role, bdaddr_t *direct_rpa);
5830 struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
5831 u8 sec_level, u8 auth_type);
5832 struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
5833 diff --git a/include/net/slhc_vj.h b/include/net/slhc_vj.h
5834 index 8716d5942b65..8fcf8908a694 100644
5835 --- a/include/net/slhc_vj.h
5836 +++ b/include/net/slhc_vj.h
5837 @@ -127,6 +127,7 @@ typedef __u32 int32;
5838 */
5839 struct cstate {
5840 byte_t cs_this; /* connection id number (xmit) */
5841 + bool initialized; /* true if initialized */
5842 struct cstate *next; /* next in ring (xmit) */
5843 struct iphdr cs_ip; /* ip/tcp hdr from most recent packet */
5844 struct tcphdr cs_tcp;
5845 diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h
5846 index 3d7a0fc021a7..39930ca998cd 100644
5847 --- a/include/uapi/linux/psci.h
5848 +++ b/include/uapi/linux/psci.h
5849 @@ -87,6 +87,9 @@
5850 (((ver) & PSCI_VERSION_MAJOR_MASK) >> PSCI_VERSION_MAJOR_SHIFT)
5851 #define PSCI_VERSION_MINOR(ver) \
5852 ((ver) & PSCI_VERSION_MINOR_MASK)
5853 +#define PSCI_VERSION(maj, min) \
5854 + ((((maj) << PSCI_VERSION_MAJOR_SHIFT) & PSCI_VERSION_MAJOR_MASK) | \
5855 + ((min) & PSCI_VERSION_MINOR_MASK))
5856
5857 /* PSCI features decoding (>=1.0) */
5858 #define PSCI_1_0_FEATURES_CPU_SUSPEND_PF_SHIFT 1
5859 diff --git a/kernel/events/core.c b/kernel/events/core.c
5860 index c4100c38a467..74710fad35d5 100644
5861 --- a/kernel/events/core.c
5862 +++ b/kernel/events/core.c
5863 @@ -4091,6 +4091,9 @@ static void _free_event(struct perf_event *event)
5864 if (event->ctx)
5865 put_ctx(event->ctx);
5866
5867 + if (event->hw.target)
5868 + put_task_struct(event->hw.target);
5869 +
5870 exclusive_event_destroy(event);
5871 module_put(event->pmu->module);
5872
5873 @@ -9214,6 +9217,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
5874 * and we cannot use the ctx information because we need the
5875 * pmu before we get a ctx.
5876 */
5877 + get_task_struct(task);
5878 event->hw.target = task;
5879 }
5880
5881 @@ -9331,6 +9335,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
5882 perf_detach_cgroup(event);
5883 if (event->ns)
5884 put_pid_ns(event->ns);
5885 + if (event->hw.target)
5886 + put_task_struct(event->hw.target);
5887 kfree(event);
5888
5889 return ERR_PTR(err);
5890 diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
5891 index dc59eae54717..cc061495f653 100644
5892 --- a/net/bluetooth/hci_conn.c
5893 +++ b/net/bluetooth/hci_conn.c
5894 @@ -749,18 +749,31 @@ static bool conn_use_rpa(struct hci_conn *conn)
5895 }
5896
5897 static void hci_req_add_le_create_conn(struct hci_request *req,
5898 - struct hci_conn *conn)
5899 + struct hci_conn *conn,
5900 + bdaddr_t *direct_rpa)
5901 {
5902 struct hci_cp_le_create_conn cp;
5903 struct hci_dev *hdev = conn->hdev;
5904 u8 own_addr_type;
5905
5906 - /* Update random address, but set require_privacy to false so
5907 - * that we never connect with an non-resolvable address.
5908 + /* If direct address was provided we use it instead of current
5909 + * address.
5910 */
5911 - if (hci_update_random_address(req, false, conn_use_rpa(conn),
5912 - &own_addr_type))
5913 - return;
5914 + if (direct_rpa) {
5915 + if (bacmp(&req->hdev->random_addr, direct_rpa))
5916 + hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6,
5917 + direct_rpa);
5918 +
5919 + /* direct address is always RPA */
5920 + own_addr_type = ADDR_LE_DEV_RANDOM;
5921 + } else {
5922 + /* Update random address, but set require_privacy to false so
5923 + * that we never connect with an non-resolvable address.
5924 + */
5925 + if (hci_update_random_address(req, false, conn_use_rpa(conn),
5926 + &own_addr_type))
5927 + return;
5928 + }
5929
5930 memset(&cp, 0, sizeof(cp));
5931
5932 @@ -825,7 +838,7 @@ static void hci_req_directed_advertising(struct hci_request *req,
5933
5934 struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
5935 u8 dst_type, u8 sec_level, u16 conn_timeout,
5936 - u8 role)
5937 + u8 role, bdaddr_t *direct_rpa)
5938 {
5939 struct hci_conn_params *params;
5940 struct hci_conn *conn;
5941 @@ -940,7 +953,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
5942 hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED);
5943 }
5944
5945 - hci_req_add_le_create_conn(&req, conn);
5946 + hci_req_add_le_create_conn(&req, conn, direct_rpa);
5947
5948 create_conn:
5949 err = hci_req_run(&req, create_le_conn_complete);
5950 diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
5951 index e17aacbc5630..d2f9eb169ba8 100644
5952 --- a/net/bluetooth/hci_event.c
5953 +++ b/net/bluetooth/hci_event.c
5954 @@ -4646,7 +4646,8 @@ static void hci_le_conn_update_complete_evt(struct hci_dev *hdev,
5955 /* This function requires the caller holds hdev->lock */
5956 static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
5957 bdaddr_t *addr,
5958 - u8 addr_type, u8 adv_type)
5959 + u8 addr_type, u8 adv_type,
5960 + bdaddr_t *direct_rpa)
5961 {
5962 struct hci_conn *conn;
5963 struct hci_conn_params *params;
5964 @@ -4697,7 +4698,8 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
5965 }
5966
5967 conn = hci_connect_le(hdev, addr, addr_type, BT_SECURITY_LOW,
5968 - HCI_LE_AUTOCONN_TIMEOUT, HCI_ROLE_MASTER);
5969 + HCI_LE_AUTOCONN_TIMEOUT, HCI_ROLE_MASTER,
5970 + direct_rpa);
5971 if (!IS_ERR(conn)) {
5972 /* If HCI_AUTO_CONN_EXPLICIT is set, conn is already owned
5973 * by higher layer that tried to connect, if no then
5974 @@ -4807,8 +4809,13 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
5975 bdaddr_type = irk->addr_type;
5976 }
5977
5978 - /* Check if we have been requested to connect to this device */
5979 - conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type);
5980 + /* Check if we have been requested to connect to this device.
5981 + *
5982 + * direct_addr is set only for directed advertising reports (it is NULL
5983 + * for advertising reports) and is already verified to be RPA above.
5984 + */
5985 + conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type,
5986 + direct_addr);
5987 if (conn && type == LE_ADV_IND) {
5988 /* Store report for later inclusion by
5989 * mgmt_device_connected
5990 diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
5991 index 2bbca23a9d05..1fc23cb4a3e0 100644
5992 --- a/net/bluetooth/l2cap_core.c
5993 +++ b/net/bluetooth/l2cap_core.c
5994 @@ -7148,7 +7148,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
5995 hcon = hci_connect_le(hdev, dst, dst_type,
5996 chan->sec_level,
5997 HCI_LE_CONN_TIMEOUT,
5998 - HCI_ROLE_SLAVE);
5999 + HCI_ROLE_SLAVE, NULL);
6000 else
6001 hcon = hci_connect_le_scan(hdev, dst, dst_type,
6002 chan->sec_level,
6003 diff --git a/net/rds/send.c b/net/rds/send.c
6004 index ef53d164e146..50241d30e16d 100644
6005 --- a/net/rds/send.c
6006 +++ b/net/rds/send.c
6007 @@ -1,5 +1,5 @@
6008 /*
6009 - * Copyright (c) 2006 Oracle. All rights reserved.
6010 + * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
6011 *
6012 * This software is available to you under a choice of one of two
6013 * licenses. You may choose to be licensed under the terms of the GNU
6014 @@ -983,10 +983,15 @@ static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn)
6015 if (conn->c_npaths == 0 && hash != 0) {
6016 rds_send_ping(conn);
6017
6018 - if (conn->c_npaths == 0) {
6019 - wait_event_interruptible(conn->c_hs_waitq,
6020 - (conn->c_npaths != 0));
6021 - }
6022 + /* The underlying connection is not up yet. Need to wait
6023 + * until it is up to be sure that the non-zero c_path can be
6024 + * used. But if we are interrupted, we have to use the zero
6025 + * c_path in case the connection ends up being non-MP capable.
6026 + */
6027 + if (conn->c_npaths == 0)
6028 + if (wait_event_interruptible(conn->c_hs_waitq,
6029 + conn->c_npaths != 0))
6030 + hash = 0;
6031 if (conn->c_npaths == 1)
6032 hash = 0;
6033 }
6034 diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
6035 index 79aec90259cd..4afd4149a632 100644
6036 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
6037 +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
6038 @@ -237,9 +237,6 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
6039
6040 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
6041
6042 - err = crypto_ahash_init(req);
6043 - if (err)
6044 - goto out;
6045 err = crypto_ahash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
6046 if (err)
6047 goto out;
6048 diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
6049 index 150334064071..ff5bc6363a79 100644
6050 --- a/tools/perf/tests/code-reading.c
6051 +++ b/tools/perf/tests/code-reading.c
6052 @@ -224,8 +224,6 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
6053 unsigned char buf2[BUFSZ];
6054 size_t ret_len;
6055 u64 objdump_addr;
6056 - const char *objdump_name;
6057 - char decomp_name[KMOD_DECOMP_LEN];
6058 int ret;
6059
6060 pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr);
6061 @@ -286,25 +284,9 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
6062 state->done[state->done_cnt++] = al.map->start;
6063 }
6064
6065 - objdump_name = al.map->dso->long_name;
6066 - if (dso__needs_decompress(al.map->dso)) {
6067 - if (dso__decompress_kmodule_path(al.map->dso, objdump_name,
6068 - decomp_name,
6069 - sizeof(decomp_name)) < 0) {
6070 - pr_debug("decompression failed\n");
6071 - return -1;
6072 - }
6073 -
6074 - objdump_name = decomp_name;
6075 - }
6076 -
6077 /* Read the object code using objdump */
6078 objdump_addr = map__rip_2objdump(al.map, al.addr);
6079 - ret = read_via_objdump(objdump_name, objdump_addr, buf2, len);
6080 -
6081 - if (dso__needs_decompress(al.map->dso))
6082 - unlink(objdump_name);
6083 -
6084 + ret = read_via_objdump(al.map->dso->long_name, objdump_addr, buf2, len);
6085 if (ret > 0) {
6086 /*
6087 * The kernel maps are inaccurate - assume objdump is right in
6088 diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
6089 index 7e27207d0f45..cac39532c057 100644
6090 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
6091 +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
6092 @@ -1300,6 +1300,7 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
6093 intel_pt_clear_tx_flags(decoder);
6094 decoder->have_tma = false;
6095 decoder->cbr = 0;
6096 + decoder->timestamp_insn_cnt = 0;
6097 decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
6098 decoder->overflow = true;
6099 return -EOVERFLOW;
6100 @@ -1522,6 +1523,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
6101 case INTEL_PT_PSBEND:
6102 intel_pt_log("ERROR: Missing TIP after FUP\n");
6103 decoder->pkt_state = INTEL_PT_STATE_ERR3;
6104 + decoder->pkt_step = 0;
6105 return -ENOENT;
6106
6107 case INTEL_PT_OVF:
6108 @@ -2182,14 +2184,6 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
6109 return &decoder->state;
6110 }
6111
6112 -static bool intel_pt_at_psb(unsigned char *buf, size_t len)
6113 -{
6114 - if (len < INTEL_PT_PSB_LEN)
6115 - return false;
6116 - return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR,
6117 - INTEL_PT_PSB_LEN);
6118 -}
6119 -
6120 /**
6121 * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
6122 * @buf: pointer to buffer pointer
6123 @@ -2278,6 +2272,7 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
6124 * @buf: buffer
6125 * @len: size of buffer
6126 * @tsc: TSC value returned
6127 + * @rem: returns remaining size when TSC is found
6128 *
6129 * Find a TSC packet in @buf and return the TSC value. This function assumes
6130 * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
6131 @@ -2285,7 +2280,8 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
6132 *
6133 * Return: %true if TSC is found, false otherwise.
6134 */
6135 -static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc)
6136 +static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc,
6137 + size_t *rem)
6138 {
6139 struct intel_pt_pkt packet;
6140 int ret;
6141 @@ -2296,6 +2292,7 @@ static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc)
6142 return false;
6143 if (packet.type == INTEL_PT_TSC) {
6144 *tsc = packet.payload;
6145 + *rem = len;
6146 return true;
6147 }
6148 if (packet.type == INTEL_PT_PSBEND)
6149 @@ -2346,6 +2343,8 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
6150 * @len_a: size of first buffer
6151 * @buf_b: second buffer
6152 * @len_b: size of second buffer
6153 + * @consecutive: returns true if there is data in buf_b that is consecutive
6154 + * to buf_a
6155 *
6156 * If the trace contains TSC we can look at the last TSC of @buf_a and the
6157 * first TSC of @buf_b in order to determine if the buffers overlap, and then
6158 @@ -2358,33 +2357,41 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
6159 static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
6160 size_t len_a,
6161 unsigned char *buf_b,
6162 - size_t len_b)
6163 + size_t len_b, bool *consecutive)
6164 {
6165 uint64_t tsc_a, tsc_b;
6166 unsigned char *p;
6167 - size_t len;
6168 + size_t len, rem_a, rem_b;
6169
6170 p = intel_pt_last_psb(buf_a, len_a);
6171 if (!p)
6172 return buf_b; /* No PSB in buf_a => no overlap */
6173
6174 len = len_a - (p - buf_a);
6175 - if (!intel_pt_next_tsc(p, len, &tsc_a)) {
6176 + if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) {
6177 /* The last PSB+ in buf_a is incomplete, so go back one more */
6178 len_a -= len;
6179 p = intel_pt_last_psb(buf_a, len_a);
6180 if (!p)
6181 return buf_b; /* No full PSB+ => assume no overlap */
6182 len = len_a - (p - buf_a);
6183 - if (!intel_pt_next_tsc(p, len, &tsc_a))
6184 + if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a))
6185 return buf_b; /* No TSC in buf_a => assume no overlap */
6186 }
6187
6188 while (1) {
6189 /* Ignore PSB+ with no TSC */
6190 - if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) &&
6191 - intel_pt_tsc_cmp(tsc_a, tsc_b) < 0)
6192 - return buf_b; /* tsc_a < tsc_b => no overlap */
6193 + if (intel_pt_next_tsc(buf_b, len_b, &tsc_b, &rem_b)) {
6194 + int cmp = intel_pt_tsc_cmp(tsc_a, tsc_b);
6195 +
6196 + /* Same TSC, so buffers are consecutive */
6197 + if (!cmp && rem_b >= rem_a) {
6198 + *consecutive = true;
6199 + return buf_b + len_b - (rem_b - rem_a);
6200 + }
6201 + if (cmp < 0)
6202 + return buf_b; /* tsc_a < tsc_b => no overlap */
6203 + }
6204
6205 if (!intel_pt_step_psb(&buf_b, &len_b))
6206 return buf_b + len_b; /* No PSB in buf_b => no data */
6207 @@ -2398,6 +2405,8 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
6208 * @buf_b: second buffer
6209 * @len_b: size of second buffer
6210 * @have_tsc: can use TSC packets to detect overlap
6211 + * @consecutive: returns true if there is data in buf_b that is consecutive
6212 + * to buf_a
6213 *
6214 * When trace samples or snapshots are recorded there is the possibility that
6215 * the data overlaps. Note that, for the purposes of decoding, data is only
6216 @@ -2408,7 +2417,7 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
6217 */
6218 unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
6219 unsigned char *buf_b, size_t len_b,
6220 - bool have_tsc)
6221 + bool have_tsc, bool *consecutive)
6222 {
6223 unsigned char *found;
6224
6225 @@ -2420,7 +2429,8 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
6226 return buf_b; /* No overlap */
6227
6228 if (have_tsc) {
6229 - found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b);
6230 + found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b,
6231 + consecutive);
6232 if (found)
6233 return found;
6234 }
6235 @@ -2435,28 +2445,16 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
6236 }
6237
6238 /* Now len_b >= len_a */
6239 - if (len_b > len_a) {
6240 - /* The leftover buffer 'b' must start at a PSB */
6241 - while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
6242 - if (!intel_pt_step_psb(&buf_a, &len_a))
6243 - return buf_b; /* No overlap */
6244 - }
6245 - }
6246 -
6247 while (1) {
6248 /* Potential overlap so check the bytes */
6249 found = memmem(buf_a, len_a, buf_b, len_a);
6250 - if (found)
6251 + if (found) {
6252 + *consecutive = true;
6253 return buf_b + len_a;
6254 + }
6255
6256 /* Try again at next PSB in buffer 'a' */
6257 if (!intel_pt_step_psb(&buf_a, &len_a))
6258 return buf_b; /* No overlap */
6259 -
6260 - /* The leftover buffer 'b' must start at a PSB */
6261 - while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
6262 - if (!intel_pt_step_psb(&buf_a, &len_a))
6263 - return buf_b; /* No overlap */
6264 - }
6265 }
6266 }
6267 diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
6268 index 89399985fa4d..9ae4df1dcedc 100644
6269 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
6270 +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
6271 @@ -103,7 +103,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
6272
6273 unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
6274 unsigned char *buf_b, size_t len_b,
6275 - bool have_tsc);
6276 + bool have_tsc, bool *consecutive);
6277
6278 int intel_pt__strerror(int code, char *buf, size_t buflen);
6279
6280 diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
6281 index dc041d4368c8..b1161d725ce9 100644
6282 --- a/tools/perf/util/intel-pt.c
6283 +++ b/tools/perf/util/intel-pt.c
6284 @@ -131,6 +131,7 @@ struct intel_pt_queue {
6285 bool stop;
6286 bool step_through_buffers;
6287 bool use_buffer_pid_tid;
6288 + bool sync_switch;
6289 pid_t pid, tid;
6290 int cpu;
6291 int switch_state;
6292 @@ -194,14 +195,17 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
6293 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
6294 struct auxtrace_buffer *b)
6295 {
6296 + bool consecutive = false;
6297 void *start;
6298
6299 start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
6300 - pt->have_tsc);
6301 + pt->have_tsc, &consecutive);
6302 if (!start)
6303 return -EINVAL;
6304 b->use_size = b->data + b->size - start;
6305 b->use_data = start;
6306 + if (b->use_size && consecutive)
6307 + b->consecutive = true;
6308 return 0;
6309 }
6310
6311 @@ -928,10 +932,12 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
6312 if (pt->timeless_decoding || !pt->have_sched_switch)
6313 ptq->use_buffer_pid_tid = true;
6314 }
6315 +
6316 + ptq->sync_switch = pt->sync_switch;
6317 }
6318
6319 if (!ptq->on_heap &&
6320 - (!pt->sync_switch ||
6321 + (!ptq->sync_switch ||
6322 ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
6323 const struct intel_pt_state *state;
6324 int ret;
6325 @@ -1333,7 +1339,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
6326 if (pt->synth_opts.last_branch)
6327 intel_pt_update_last_branch_rb(ptq);
6328
6329 - if (!pt->sync_switch)
6330 + if (!ptq->sync_switch)
6331 return 0;
6332
6333 if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
6334 @@ -1414,6 +1420,21 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
6335 return switch_ip;
6336 }
6337
6338 +static void intel_pt_enable_sync_switch(struct intel_pt *pt)
6339 +{
6340 + unsigned int i;
6341 +
6342 + pt->sync_switch = true;
6343 +
6344 + for (i = 0; i < pt->queues.nr_queues; i++) {
6345 + struct auxtrace_queue *queue = &pt->queues.queue_array[i];
6346 + struct intel_pt_queue *ptq = queue->priv;
6347 +
6348 + if (ptq)
6349 + ptq->sync_switch = true;
6350 + }
6351 +}
6352 +
6353 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
6354 {
6355 const struct intel_pt_state *state = ptq->state;
6356 @@ -1430,7 +1451,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
6357 if (pt->switch_ip) {
6358 intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
6359 pt->switch_ip, pt->ptss_ip);
6360 - pt->sync_switch = true;
6361 + intel_pt_enable_sync_switch(pt);
6362 }
6363 }
6364 }
6365 @@ -1446,9 +1467,9 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
6366 if (state->err) {
6367 if (state->err == INTEL_PT_ERR_NODATA)
6368 return 1;
6369 - if (pt->sync_switch &&
6370 + if (ptq->sync_switch &&
6371 state->from_ip >= pt->kernel_start) {
6372 - pt->sync_switch = false;
6373 + ptq->sync_switch = false;
6374 intel_pt_next_tid(pt, ptq);
6375 }
6376 if (pt->synth_opts.errors) {
6377 @@ -1474,7 +1495,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
6378 state->timestamp, state->est_timestamp);
6379 ptq->timestamp = state->est_timestamp;
6380 /* Use estimated TSC in unknown switch state */
6381 - } else if (pt->sync_switch &&
6382 + } else if (ptq->sync_switch &&
6383 ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
6384 intel_pt_is_switch_ip(ptq, state->to_ip) &&
6385 ptq->next_tid == -1) {
6386 @@ -1621,7 +1642,7 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
6387 return 1;
6388
6389 ptq = intel_pt_cpu_to_ptq(pt, cpu);
6390 - if (!ptq)
6391 + if (!ptq || !ptq->sync_switch)
6392 return 1;
6393
6394 switch (ptq->switch_state) {