Magellan Linux

Contents of /trunk/kernel-magellan/patches-4.8/0109-4.8.10-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2849 - (show annotations) (download)
Tue Nov 22 13:19:55 2016 UTC (7 years, 5 months ago) by niro
File size: 141674 byte(s)
-linux-4.8.10
1 diff --git a/Makefile b/Makefile
2 index c1519ab85258..7cf2b4985703 100644
3 --- a/Makefile
4 +++ b/Makefile
5 @@ -1,6 +1,6 @@
6 VERSION = 4
7 PATCHLEVEL = 8
8 -SUBLEVEL = 9
9 +SUBLEVEL = 10
10 EXTRAVERSION =
11 NAME = Psychotic Stoned Sheep
12
13 diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
14 index 37a315d0ddd4..a6847fc05a6d 100644
15 --- a/arch/sparc/include/asm/uaccess_64.h
16 +++ b/arch/sparc/include/asm/uaccess_64.h
17 @@ -98,7 +98,6 @@ struct exception_table_entry {
18 unsigned int insn, fixup;
19 };
20
21 -void __ret_efault(void);
22 void __retl_efault(void);
23
24 /* Uh, these should become the main single-value transfer routines..
25 @@ -205,55 +204,34 @@ int __get_user_bad(void);
26 unsigned long __must_check ___copy_from_user(void *to,
27 const void __user *from,
28 unsigned long size);
29 -unsigned long copy_from_user_fixup(void *to, const void __user *from,
30 - unsigned long size);
31 static inline unsigned long __must_check
32 copy_from_user(void *to, const void __user *from, unsigned long size)
33 {
34 - unsigned long ret;
35 -
36 check_object_size(to, size, false);
37
38 - ret = ___copy_from_user(to, from, size);
39 - if (unlikely(ret))
40 - ret = copy_from_user_fixup(to, from, size);
41 -
42 - return ret;
43 + return ___copy_from_user(to, from, size);
44 }
45 #define __copy_from_user copy_from_user
46
47 unsigned long __must_check ___copy_to_user(void __user *to,
48 const void *from,
49 unsigned long size);
50 -unsigned long copy_to_user_fixup(void __user *to, const void *from,
51 - unsigned long size);
52 static inline unsigned long __must_check
53 copy_to_user(void __user *to, const void *from, unsigned long size)
54 {
55 - unsigned long ret;
56 -
57 check_object_size(from, size, true);
58
59 - ret = ___copy_to_user(to, from, size);
60 - if (unlikely(ret))
61 - ret = copy_to_user_fixup(to, from, size);
62 - return ret;
63 + return ___copy_to_user(to, from, size);
64 }
65 #define __copy_to_user copy_to_user
66
67 unsigned long __must_check ___copy_in_user(void __user *to,
68 const void __user *from,
69 unsigned long size);
70 -unsigned long copy_in_user_fixup(void __user *to, void __user *from,
71 - unsigned long size);
72 static inline unsigned long __must_check
73 copy_in_user(void __user *to, void __user *from, unsigned long size)
74 {
75 - unsigned long ret = ___copy_in_user(to, from, size);
76 -
77 - if (unlikely(ret))
78 - ret = copy_in_user_fixup(to, from, size);
79 - return ret;
80 + return ___copy_in_user(to, from, size);
81 }
82 #define __copy_in_user copy_in_user
83
84 diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
85 index a076b4249e62..5f1f3ae21657 100644
86 --- a/arch/sparc/kernel/head_64.S
87 +++ b/arch/sparc/kernel/head_64.S
88 @@ -922,47 +922,11 @@ prom_tba: .xword 0
89 tlb_type: .word 0 /* Must NOT end up in BSS */
90 .section ".fixup",#alloc,#execinstr
91
92 - .globl __ret_efault, __retl_efault, __ret_one, __retl_one
93 -ENTRY(__ret_efault)
94 - ret
95 - restore %g0, -EFAULT, %o0
96 -ENDPROC(__ret_efault)
97 -
98 ENTRY(__retl_efault)
99 retl
100 mov -EFAULT, %o0
101 ENDPROC(__retl_efault)
102
103 -ENTRY(__retl_one)
104 - retl
105 - mov 1, %o0
106 -ENDPROC(__retl_one)
107 -
108 -ENTRY(__retl_one_fp)
109 - VISExitHalf
110 - retl
111 - mov 1, %o0
112 -ENDPROC(__retl_one_fp)
113 -
114 -ENTRY(__ret_one_asi)
115 - wr %g0, ASI_AIUS, %asi
116 - ret
117 - restore %g0, 1, %o0
118 -ENDPROC(__ret_one_asi)
119 -
120 -ENTRY(__retl_one_asi)
121 - wr %g0, ASI_AIUS, %asi
122 - retl
123 - mov 1, %o0
124 -ENDPROC(__retl_one_asi)
125 -
126 -ENTRY(__retl_one_asi_fp)
127 - wr %g0, ASI_AIUS, %asi
128 - VISExitHalf
129 - retl
130 - mov 1, %o0
131 -ENDPROC(__retl_one_asi_fp)
132 -
133 ENTRY(__retl_o1)
134 retl
135 mov %o1, %o0
136 diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
137 index 59bbeff55024..07933b9e9ce0 100644
138 --- a/arch/sparc/kernel/jump_label.c
139 +++ b/arch/sparc/kernel/jump_label.c
140 @@ -13,19 +13,30 @@
141 void arch_jump_label_transform(struct jump_entry *entry,
142 enum jump_label_type type)
143 {
144 - u32 val;
145 u32 *insn = (u32 *) (unsigned long) entry->code;
146 + u32 val;
147
148 if (type == JUMP_LABEL_JMP) {
149 s32 off = (s32)entry->target - (s32)entry->code;
150 + bool use_v9_branch = false;
151 +
152 + BUG_ON(off & 3);
153
154 #ifdef CONFIG_SPARC64
155 - /* ba,pt %xcc, . + (off << 2) */
156 - val = 0x10680000 | ((u32) off >> 2);
157 -#else
158 - /* ba . + (off << 2) */
159 - val = 0x10800000 | ((u32) off >> 2);
160 + if (off <= 0xfffff && off >= -0x100000)
161 + use_v9_branch = true;
162 #endif
163 + if (use_v9_branch) {
164 + /* WDISP19 - target is . + immed << 2 */
165 + /* ba,pt %xcc, . + off */
166 + val = 0x10680000 | (((u32) off >> 2) & 0x7ffff);
167 + } else {
168 + /* WDISP22 - target is . + immed << 2 */
169 + BUG_ON(off > 0x7fffff);
170 + BUG_ON(off < -0x800000);
171 + /* ba . + off */
172 + val = 0x10800000 | (((u32) off >> 2) & 0x3fffff);
173 + }
174 } else {
175 val = 0x01000000;
176 }
177 diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c
178 index 9e034f29dcc5..20ffb052fe38 100644
179 --- a/arch/sparc/kernel/sparc_ksyms_64.c
180 +++ b/arch/sparc/kernel/sparc_ksyms_64.c
181 @@ -27,7 +27,6 @@ EXPORT_SYMBOL(__flushw_user);
182 EXPORT_SYMBOL_GPL(real_hard_smp_processor_id);
183
184 /* from head_64.S */
185 -EXPORT_SYMBOL(__ret_efault);
186 EXPORT_SYMBOL(tlb_type);
187 EXPORT_SYMBOL(sun4v_chip_type);
188 EXPORT_SYMBOL(prom_root_node);
189 diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S
190 index b7d0bd6b1406..69a439fa2fc1 100644
191 --- a/arch/sparc/lib/GENcopy_from_user.S
192 +++ b/arch/sparc/lib/GENcopy_from_user.S
193 @@ -3,11 +3,11 @@
194 * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
195 */
196
197 -#define EX_LD(x) \
198 +#define EX_LD(x,y) \
199 98: x; \
200 .section __ex_table,"a";\
201 .align 4; \
202 - .word 98b, __retl_one; \
203 + .word 98b, y; \
204 .text; \
205 .align 4;
206
207 diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S
208 index 780550e1afc7..9947427ce354 100644
209 --- a/arch/sparc/lib/GENcopy_to_user.S
210 +++ b/arch/sparc/lib/GENcopy_to_user.S
211 @@ -3,11 +3,11 @@
212 * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
213 */
214
215 -#define EX_ST(x) \
216 +#define EX_ST(x,y) \
217 98: x; \
218 .section __ex_table,"a";\
219 .align 4; \
220 - .word 98b, __retl_one; \
221 + .word 98b, y; \
222 .text; \
223 .align 4;
224
225 diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S
226 index 89358ee94851..059ea24ad73d 100644
227 --- a/arch/sparc/lib/GENmemcpy.S
228 +++ b/arch/sparc/lib/GENmemcpy.S
229 @@ -4,21 +4,18 @@
230 */
231
232 #ifdef __KERNEL__
233 +#include <linux/linkage.h>
234 #define GLOBAL_SPARE %g7
235 #else
236 #define GLOBAL_SPARE %g5
237 #endif
238
239 #ifndef EX_LD
240 -#define EX_LD(x) x
241 +#define EX_LD(x,y) x
242 #endif
243
244 #ifndef EX_ST
245 -#define EX_ST(x) x
246 -#endif
247 -
248 -#ifndef EX_RETVAL
249 -#define EX_RETVAL(x) x
250 +#define EX_ST(x,y) x
251 #endif
252
253 #ifndef LOAD
254 @@ -45,6 +42,29 @@
255 .register %g3,#scratch
256
257 .text
258 +
259 +#ifndef EX_RETVAL
260 +#define EX_RETVAL(x) x
261 +ENTRY(GEN_retl_o4_1)
262 + add %o4, %o2, %o4
263 + retl
264 + add %o4, 1, %o0
265 +ENDPROC(GEN_retl_o4_1)
266 +ENTRY(GEN_retl_g1_8)
267 + add %g1, %o2, %g1
268 + retl
269 + add %g1, 8, %o0
270 +ENDPROC(GEN_retl_g1_8)
271 +ENTRY(GEN_retl_o2_4)
272 + retl
273 + add %o2, 4, %o0
274 +ENDPROC(GEN_retl_o2_4)
275 +ENTRY(GEN_retl_o2_1)
276 + retl
277 + add %o2, 1, %o0
278 +ENDPROC(GEN_retl_o2_1)
279 +#endif
280 +
281 .align 64
282
283 .globl FUNC_NAME
284 @@ -73,8 +93,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
285 sub %g0, %o4, %o4
286 sub %o2, %o4, %o2
287 1: subcc %o4, 1, %o4
288 - EX_LD(LOAD(ldub, %o1, %g1))
289 - EX_ST(STORE(stb, %g1, %o0))
290 + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1)
291 + EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1)
292 add %o1, 1, %o1
293 bne,pt %XCC, 1b
294 add %o0, 1, %o0
295 @@ -82,8 +102,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
296 andn %o2, 0x7, %g1
297 sub %o2, %g1, %o2
298 1: subcc %g1, 0x8, %g1
299 - EX_LD(LOAD(ldx, %o1, %g2))
300 - EX_ST(STORE(stx, %g2, %o0))
301 + EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8)
302 + EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8)
303 add %o1, 0x8, %o1
304 bne,pt %XCC, 1b
305 add %o0, 0x8, %o0
306 @@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
307
308 1:
309 subcc %o2, 4, %o2
310 - EX_LD(LOAD(lduw, %o1, %g1))
311 - EX_ST(STORE(stw, %g1, %o1 + %o3))
312 + EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4)
313 + EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4)
314 bgu,pt %XCC, 1b
315 add %o1, 4, %o1
316
317 @@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
318 .align 32
319 90:
320 subcc %o2, 1, %o2
321 - EX_LD(LOAD(ldub, %o1, %g1))
322 - EX_ST(STORE(stb, %g1, %o1 + %o3))
323 + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1)
324 + EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1)
325 bgu,pt %XCC, 90b
326 add %o1, 1, %o1
327 retl
328 diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
329 index 3269b0234093..4f2384a4286a 100644
330 --- a/arch/sparc/lib/Makefile
331 +++ b/arch/sparc/lib/Makefile
332 @@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o
333 lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
334 lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
335
336 -lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o
337 +lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o
338 lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
339
340 obj-$(CONFIG_SPARC64) += iomap.o
341 diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S
342 index d5242b8c4f94..b79a6998d87c 100644
343 --- a/arch/sparc/lib/NG2copy_from_user.S
344 +++ b/arch/sparc/lib/NG2copy_from_user.S
345 @@ -3,19 +3,19 @@
346 * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
347 */
348
349 -#define EX_LD(x) \
350 +#define EX_LD(x,y) \
351 98: x; \
352 .section __ex_table,"a";\
353 .align 4; \
354 - .word 98b, __retl_one_asi;\
355 + .word 98b, y; \
356 .text; \
357 .align 4;
358
359 -#define EX_LD_FP(x) \
360 +#define EX_LD_FP(x,y) \
361 98: x; \
362 .section __ex_table,"a";\
363 .align 4; \
364 - .word 98b, __retl_one_asi_fp;\
365 + .word 98b, y##_fp; \
366 .text; \
367 .align 4;
368
369 diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S
370 index 4e962d993b10..dcec55f254ab 100644
371 --- a/arch/sparc/lib/NG2copy_to_user.S
372 +++ b/arch/sparc/lib/NG2copy_to_user.S
373 @@ -3,19 +3,19 @@
374 * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
375 */
376
377 -#define EX_ST(x) \
378 +#define EX_ST(x,y) \
379 98: x; \
380 .section __ex_table,"a";\
381 .align 4; \
382 - .word 98b, __retl_one_asi;\
383 + .word 98b, y; \
384 .text; \
385 .align 4;
386
387 -#define EX_ST_FP(x) \
388 +#define EX_ST_FP(x,y) \
389 98: x; \
390 .section __ex_table,"a";\
391 .align 4; \
392 - .word 98b, __retl_one_asi_fp;\
393 + .word 98b, y##_fp; \
394 .text; \
395 .align 4;
396
397 diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S
398 index d5f585df2f3f..c629dbd121b6 100644
399 --- a/arch/sparc/lib/NG2memcpy.S
400 +++ b/arch/sparc/lib/NG2memcpy.S
401 @@ -4,6 +4,7 @@
402 */
403
404 #ifdef __KERNEL__
405 +#include <linux/linkage.h>
406 #include <asm/visasm.h>
407 #include <asm/asi.h>
408 #define GLOBAL_SPARE %g7
409 @@ -32,21 +33,17 @@
410 #endif
411
412 #ifndef EX_LD
413 -#define EX_LD(x) x
414 +#define EX_LD(x,y) x
415 #endif
416 #ifndef EX_LD_FP
417 -#define EX_LD_FP(x) x
418 +#define EX_LD_FP(x,y) x
419 #endif
420
421 #ifndef EX_ST
422 -#define EX_ST(x) x
423 +#define EX_ST(x,y) x
424 #endif
425 #ifndef EX_ST_FP
426 -#define EX_ST_FP(x) x
427 -#endif
428 -
429 -#ifndef EX_RETVAL
430 -#define EX_RETVAL(x) x
431 +#define EX_ST_FP(x,y) x
432 #endif
433
434 #ifndef LOAD
435 @@ -140,45 +137,110 @@
436 fsrc2 %x6, %f12; \
437 fsrc2 %x7, %f14;
438 #define FREG_LOAD_1(base, x0) \
439 - EX_LD_FP(LOAD(ldd, base + 0x00, %x0))
440 + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1)
441 #define FREG_LOAD_2(base, x0, x1) \
442 - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
443 - EX_LD_FP(LOAD(ldd, base + 0x08, %x1));
444 + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
445 + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1);
446 #define FREG_LOAD_3(base, x0, x1, x2) \
447 - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
448 - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
449 - EX_LD_FP(LOAD(ldd, base + 0x10, %x2));
450 + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
451 + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
452 + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1);
453 #define FREG_LOAD_4(base, x0, x1, x2, x3) \
454 - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
455 - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
456 - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
457 - EX_LD_FP(LOAD(ldd, base + 0x18, %x3));
458 + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
459 + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
460 + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
461 + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1);
462 #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
463 - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
464 - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
465 - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
466 - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
467 - EX_LD_FP(LOAD(ldd, base + 0x20, %x4));
468 + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
469 + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
470 + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
471 + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
472 + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1);
473 #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
474 - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
475 - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
476 - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
477 - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
478 - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
479 - EX_LD_FP(LOAD(ldd, base + 0x28, %x5));
480 + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
481 + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
482 + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
483 + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
484 + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
485 + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1);
486 #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
487 - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
488 - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
489 - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
490 - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
491 - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
492 - EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \
493 - EX_LD_FP(LOAD(ldd, base + 0x30, %x6));
494 + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
495 + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
496 + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
497 + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
498 + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
499 + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \
500 + EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1);
501
502 .register %g2,#scratch
503 .register %g3,#scratch
504
505 .text
506 +#ifndef EX_RETVAL
507 +#define EX_RETVAL(x) x
508 +__restore_fp:
509 + VISExitHalf
510 +__restore_asi:
511 + retl
512 + wr %g0, ASI_AIUS, %asi
513 +ENTRY(NG2_retl_o2)
514 + ba,pt %xcc, __restore_asi
515 + mov %o2, %o0
516 +ENDPROC(NG2_retl_o2)
517 +ENTRY(NG2_retl_o2_plus_1)
518 + ba,pt %xcc, __restore_asi
519 + add %o2, 1, %o0
520 +ENDPROC(NG2_retl_o2_plus_1)
521 +ENTRY(NG2_retl_o2_plus_4)
522 + ba,pt %xcc, __restore_asi
523 + add %o2, 4, %o0
524 +ENDPROC(NG2_retl_o2_plus_4)
525 +ENTRY(NG2_retl_o2_plus_8)
526 + ba,pt %xcc, __restore_asi
527 + add %o2, 8, %o0
528 +ENDPROC(NG2_retl_o2_plus_8)
529 +ENTRY(NG2_retl_o2_plus_o4_plus_1)
530 + add %o4, 1, %o4
531 + ba,pt %xcc, __restore_asi
532 + add %o2, %o4, %o0
533 +ENDPROC(NG2_retl_o2_plus_o4_plus_1)
534 +ENTRY(NG2_retl_o2_plus_o4_plus_8)
535 + add %o4, 8, %o4
536 + ba,pt %xcc, __restore_asi
537 + add %o2, %o4, %o0
538 +ENDPROC(NG2_retl_o2_plus_o4_plus_8)
539 +ENTRY(NG2_retl_o2_plus_o4_plus_16)
540 + add %o4, 16, %o4
541 + ba,pt %xcc, __restore_asi
542 + add %o2, %o4, %o0
543 +ENDPROC(NG2_retl_o2_plus_o4_plus_16)
544 +ENTRY(NG2_retl_o2_plus_g1_fp)
545 + ba,pt %xcc, __restore_fp
546 + add %o2, %g1, %o0
547 +ENDPROC(NG2_retl_o2_plus_g1_fp)
548 +ENTRY(NG2_retl_o2_plus_g1_plus_64_fp)
549 + add %g1, 64, %g1
550 + ba,pt %xcc, __restore_fp
551 + add %o2, %g1, %o0
552 +ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp)
553 +ENTRY(NG2_retl_o2_plus_g1_plus_1)
554 + add %g1, 1, %g1
555 + ba,pt %xcc, __restore_asi
556 + add %o2, %g1, %o0
557 +ENDPROC(NG2_retl_o2_plus_g1_plus_1)
558 +ENTRY(NG2_retl_o2_and_7_plus_o4)
559 + and %o2, 7, %o2
560 + ba,pt %xcc, __restore_asi
561 + add %o2, %o4, %o0
562 +ENDPROC(NG2_retl_o2_and_7_plus_o4)
563 +ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8)
564 + and %o2, 7, %o2
565 + add %o4, 8, %o4
566 + ba,pt %xcc, __restore_asi
567 + add %o2, %o4, %o0
568 +ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8)
569 +#endif
570 +
571 .align 64
572
573 .globl FUNC_NAME
574 @@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
575 sub %g0, %o4, %o4 ! bytes to align dst
576 sub %o2, %o4, %o2
577 1: subcc %o4, 1, %o4
578 - EX_LD(LOAD(ldub, %o1, %g1))
579 - EX_ST(STORE(stb, %g1, %o0))
580 + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1)
581 + EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1)
582 add %o1, 1, %o1
583 bne,pt %XCC, 1b
584 add %o0, 1, %o0
585 @@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
586 nop
587 /* fall through for 0 < low bits < 8 */
588 110: sub %o4, 64, %g2
589 - EX_LD_FP(LOAD_BLK(%g2, %f0))
590 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
591 - EX_LD_FP(LOAD_BLK(%o4, %f16))
592 + EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1)
593 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
594 + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
595 FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
596 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
597 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
598 FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
599 subcc %g1, 64, %g1
600 add %o4, 64, %o4
601 @@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
602
603 120: sub %o4, 56, %g2
604 FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
605 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
606 - EX_LD_FP(LOAD_BLK(%o4, %f16))
607 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
608 + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
609 FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
610 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
611 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
612 FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
613 subcc %g1, 64, %g1
614 add %o4, 64, %o4
615 @@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
616
617 130: sub %o4, 48, %g2
618 FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
619 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
620 - EX_LD_FP(LOAD_BLK(%o4, %f16))
621 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
622 + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
623 FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
624 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
625 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
626 FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
627 subcc %g1, 64, %g1
628 add %o4, 64, %o4
629 @@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
630
631 140: sub %o4, 40, %g2
632 FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
633 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
634 - EX_LD_FP(LOAD_BLK(%o4, %f16))
635 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
636 + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
637 FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
638 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
639 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
640 FREG_MOVE_5(f22, f24, f26, f28, f30)
641 subcc %g1, 64, %g1
642 add %o4, 64, %o4
643 @@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
644
645 150: sub %o4, 32, %g2
646 FREG_LOAD_4(%g2, f0, f2, f4, f6)
647 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
648 - EX_LD_FP(LOAD_BLK(%o4, %f16))
649 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
650 + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
651 FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
652 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
653 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
654 FREG_MOVE_4(f24, f26, f28, f30)
655 subcc %g1, 64, %g1
656 add %o4, 64, %o4
657 @@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
658
659 160: sub %o4, 24, %g2
660 FREG_LOAD_3(%g2, f0, f2, f4)
661 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
662 - EX_LD_FP(LOAD_BLK(%o4, %f16))
663 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
664 + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
665 FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
666 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
667 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
668 FREG_MOVE_3(f26, f28, f30)
669 subcc %g1, 64, %g1
670 add %o4, 64, %o4
671 @@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
672
673 170: sub %o4, 16, %g2
674 FREG_LOAD_2(%g2, f0, f2)
675 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
676 - EX_LD_FP(LOAD_BLK(%o4, %f16))
677 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
678 + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
679 FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
680 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
681 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
682 FREG_MOVE_2(f28, f30)
683 subcc %g1, 64, %g1
684 add %o4, 64, %o4
685 @@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
686
687 180: sub %o4, 8, %g2
688 FREG_LOAD_1(%g2, f0)
689 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
690 - EX_LD_FP(LOAD_BLK(%o4, %f16))
691 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
692 + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
693 FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
694 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
695 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
696 FREG_MOVE_1(f30)
697 subcc %g1, 64, %g1
698 add %o4, 64, %o4
699 @@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
700 nop
701
702 190:
703 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
704 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
705 subcc %g1, 64, %g1
706 - EX_LD_FP(LOAD_BLK(%o4, %f0))
707 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
708 + EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64)
709 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64)
710 add %o4, 64, %o4
711 bne,pt %xcc, 1b
712 LOAD(prefetch, %o4 + 64, #one_read)
713 @@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
714 andn %o2, 0xf, %o4
715 and %o2, 0xf, %o2
716 1: subcc %o4, 0x10, %o4
717 - EX_LD(LOAD(ldx, %o1, %o5))
718 + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16)
719 add %o1, 0x08, %o1
720 - EX_LD(LOAD(ldx, %o1, %g1))
721 + EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16)
722 sub %o1, 0x08, %o1
723 - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
724 + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16)
725 add %o1, 0x8, %o1
726 - EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE))
727 + EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8)
728 bgu,pt %XCC, 1b
729 add %o1, 0x8, %o1
730 73: andcc %o2, 0x8, %g0
731 be,pt %XCC, 1f
732 nop
733 sub %o2, 0x8, %o2
734 - EX_LD(LOAD(ldx, %o1, %o5))
735 - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
736 + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8)
737 + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8)
738 add %o1, 0x8, %o1
739 1: andcc %o2, 0x4, %g0
740 be,pt %XCC, 1f
741 nop
742 sub %o2, 0x4, %o2
743 - EX_LD(LOAD(lduw, %o1, %o5))
744 - EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE))
745 + EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4)
746 + EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
747 add %o1, 0x4, %o1
748 1: cmp %o2, 0
749 be,pt %XCC, 85f
750 @@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
751 sub %o2, %g1, %o2
752
753 1: subcc %g1, 1, %g1
754 - EX_LD(LOAD(ldub, %o1, %o5))
755 - EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE))
756 + EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1)
757 + EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1)
758 bgu,pt %icc, 1b
759 add %o1, 1, %o1
760
761 @@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
762
763 8: mov 64, GLOBAL_SPARE
764 andn %o1, 0x7, %o1
765 - EX_LD(LOAD(ldx, %o1, %g2))
766 + EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2)
767 sub GLOBAL_SPARE, %g1, GLOBAL_SPARE
768 andn %o2, 0x7, %o4
769 sllx %g2, %g1, %g2
770 1: add %o1, 0x8, %o1
771 - EX_LD(LOAD(ldx, %o1, %g3))
772 + EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4)
773 subcc %o4, 0x8, %o4
774 srlx %g3, GLOBAL_SPARE, %o5
775 or %o5, %g2, %o5
776 - EX_ST(STORE(stx, %o5, %o0))
777 + EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8)
778 add %o0, 0x8, %o0
779 bgu,pt %icc, 1b
780 sllx %g3, %g1, %g2
781 @@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
782
783 1:
784 subcc %o2, 4, %o2
785 - EX_LD(LOAD(lduw, %o1, %g1))
786 - EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE))
787 + EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4)
788 + EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
789 bgu,pt %XCC, 1b
790 add %o1, 4, %o1
791
792 @@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
793 .align 32
794 90:
795 subcc %o2, 1, %o2
796 - EX_LD(LOAD(ldub, %o1, %g1))
797 - EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE))
798 + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1)
799 + EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1)
800 bgu,pt %XCC, 90b
801 add %o1, 1, %o1
802 retl
803 diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S
804 index 2e8ee7ad07a9..16a286c1a528 100644
805 --- a/arch/sparc/lib/NG4copy_from_user.S
806 +++ b/arch/sparc/lib/NG4copy_from_user.S
807 @@ -3,19 +3,19 @@
808 * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
809 */
810
811 -#define EX_LD(x) \
812 +#define EX_LD(x, y) \
813 98: x; \
814 .section __ex_table,"a";\
815 .align 4; \
816 - .word 98b, __retl_one_asi;\
817 + .word 98b, y; \
818 .text; \
819 .align 4;
820
821 -#define EX_LD_FP(x) \
822 +#define EX_LD_FP(x,y) \
823 98: x; \
824 .section __ex_table,"a";\
825 .align 4; \
826 - .word 98b, __retl_one_asi_fp;\
827 + .word 98b, y##_fp; \
828 .text; \
829 .align 4;
830
831 diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S
832 index be0bf4590df8..6b0276ffc858 100644
833 --- a/arch/sparc/lib/NG4copy_to_user.S
834 +++ b/arch/sparc/lib/NG4copy_to_user.S
835 @@ -3,19 +3,19 @@
836 * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
837 */
838
839 -#define EX_ST(x) \
840 +#define EX_ST(x,y) \
841 98: x; \
842 .section __ex_table,"a";\
843 .align 4; \
844 - .word 98b, __retl_one_asi;\
845 + .word 98b, y; \
846 .text; \
847 .align 4;
848
849 -#define EX_ST_FP(x) \
850 +#define EX_ST_FP(x,y) \
851 98: x; \
852 .section __ex_table,"a";\
853 .align 4; \
854 - .word 98b, __retl_one_asi_fp;\
855 + .word 98b, y##_fp; \
856 .text; \
857 .align 4;
858
859 diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
860 index 8e13ee1f4454..75bb93b1437f 100644
861 --- a/arch/sparc/lib/NG4memcpy.S
862 +++ b/arch/sparc/lib/NG4memcpy.S
863 @@ -4,6 +4,7 @@
864 */
865
866 #ifdef __KERNEL__
867 +#include <linux/linkage.h>
868 #include <asm/visasm.h>
869 #include <asm/asi.h>
870 #define GLOBAL_SPARE %g7
871 @@ -46,22 +47,19 @@
872 #endif
873
874 #ifndef EX_LD
875 -#define EX_LD(x) x
876 +#define EX_LD(x,y) x
877 #endif
878 #ifndef EX_LD_FP
879 -#define EX_LD_FP(x) x
880 +#define EX_LD_FP(x,y) x
881 #endif
882
883 #ifndef EX_ST
884 -#define EX_ST(x) x
885 +#define EX_ST(x,y) x
886 #endif
887 #ifndef EX_ST_FP
888 -#define EX_ST_FP(x) x
889 +#define EX_ST_FP(x,y) x
890 #endif
891
892 -#ifndef EX_RETVAL
893 -#define EX_RETVAL(x) x
894 -#endif
895
896 #ifndef LOAD
897 #define LOAD(type,addr,dest) type [addr], dest
898 @@ -94,6 +92,158 @@
899 .register %g3,#scratch
900
901 .text
902 +#ifndef EX_RETVAL
903 +#define EX_RETVAL(x) x
904 +__restore_asi_fp:
905 + VISExitHalf
906 +__restore_asi:
907 + retl
908 + wr %g0, ASI_AIUS, %asi
909 +
910 +ENTRY(NG4_retl_o2)
911 + ba,pt %xcc, __restore_asi
912 + mov %o2, %o0
913 +ENDPROC(NG4_retl_o2)
914 +ENTRY(NG4_retl_o2_plus_1)
915 + ba,pt %xcc, __restore_asi
916 + add %o2, 1, %o0
917 +ENDPROC(NG4_retl_o2_plus_1)
918 +ENTRY(NG4_retl_o2_plus_4)
919 + ba,pt %xcc, __restore_asi
920 + add %o2, 4, %o0
921 +ENDPROC(NG4_retl_o2_plus_4)
922 +ENTRY(NG4_retl_o2_plus_o5)
923 + ba,pt %xcc, __restore_asi
924 + add %o2, %o5, %o0
925 +ENDPROC(NG4_retl_o2_plus_o5)
926 +ENTRY(NG4_retl_o2_plus_o5_plus_4)
927 + add %o5, 4, %o5
928 + ba,pt %xcc, __restore_asi
929 + add %o2, %o5, %o0
930 +ENDPROC(NG4_retl_o2_plus_o5_plus_4)
931 +ENTRY(NG4_retl_o2_plus_o5_plus_8)
932 + add %o5, 8, %o5
933 + ba,pt %xcc, __restore_asi
934 + add %o2, %o5, %o0
935 +ENDPROC(NG4_retl_o2_plus_o5_plus_8)
936 +ENTRY(NG4_retl_o2_plus_o5_plus_16)
937 + add %o5, 16, %o5
938 + ba,pt %xcc, __restore_asi
939 + add %o2, %o5, %o0
940 +ENDPROC(NG4_retl_o2_plus_o5_plus_16)
941 +ENTRY(NG4_retl_o2_plus_o5_plus_24)
942 + add %o5, 24, %o5
943 + ba,pt %xcc, __restore_asi
944 + add %o2, %o5, %o0
945 +ENDPROC(NG4_retl_o2_plus_o5_plus_24)
946 +ENTRY(NG4_retl_o2_plus_o5_plus_32)
947 + add %o5, 32, %o5
948 + ba,pt %xcc, __restore_asi
949 + add %o2, %o5, %o0
950 +ENDPROC(NG4_retl_o2_plus_o5_plus_32)
951 +ENTRY(NG4_retl_o2_plus_g1)
952 + ba,pt %xcc, __restore_asi
953 + add %o2, %g1, %o0
954 +ENDPROC(NG4_retl_o2_plus_g1)
955 +ENTRY(NG4_retl_o2_plus_g1_plus_1)
956 + add %g1, 1, %g1
957 + ba,pt %xcc, __restore_asi
958 + add %o2, %g1, %o0
959 +ENDPROC(NG4_retl_o2_plus_g1_plus_1)
960 +ENTRY(NG4_retl_o2_plus_g1_plus_8)
961 + add %g1, 8, %g1
962 + ba,pt %xcc, __restore_asi
963 + add %o2, %g1, %o0
964 +ENDPROC(NG4_retl_o2_plus_g1_plus_8)
965 +ENTRY(NG4_retl_o2_plus_o4)
966 + ba,pt %xcc, __restore_asi
967 + add %o2, %o4, %o0
968 +ENDPROC(NG4_retl_o2_plus_o4)
969 +ENTRY(NG4_retl_o2_plus_o4_plus_8)
970 + add %o4, 8, %o4
971 + ba,pt %xcc, __restore_asi
972 + add %o2, %o4, %o0
973 +ENDPROC(NG4_retl_o2_plus_o4_plus_8)
974 +ENTRY(NG4_retl_o2_plus_o4_plus_16)
975 + add %o4, 16, %o4
976 + ba,pt %xcc, __restore_asi
977 + add %o2, %o4, %o0
978 +ENDPROC(NG4_retl_o2_plus_o4_plus_16)
979 +ENTRY(NG4_retl_o2_plus_o4_plus_24)
980 + add %o4, 24, %o4
981 + ba,pt %xcc, __restore_asi
982 + add %o2, %o4, %o0
983 +ENDPROC(NG4_retl_o2_plus_o4_plus_24)
984 +ENTRY(NG4_retl_o2_plus_o4_plus_32)
985 + add %o4, 32, %o4
986 + ba,pt %xcc, __restore_asi
987 + add %o2, %o4, %o0
988 +ENDPROC(NG4_retl_o2_plus_o4_plus_32)
989 +ENTRY(NG4_retl_o2_plus_o4_plus_40)
990 + add %o4, 40, %o4
991 + ba,pt %xcc, __restore_asi
992 + add %o2, %o4, %o0
993 +ENDPROC(NG4_retl_o2_plus_o4_plus_40)
994 +ENTRY(NG4_retl_o2_plus_o4_plus_48)
995 + add %o4, 48, %o4
996 + ba,pt %xcc, __restore_asi
997 + add %o2, %o4, %o0
998 +ENDPROC(NG4_retl_o2_plus_o4_plus_48)
999 +ENTRY(NG4_retl_o2_plus_o4_plus_56)
1000 + add %o4, 56, %o4
1001 + ba,pt %xcc, __restore_asi
1002 + add %o2, %o4, %o0
1003 +ENDPROC(NG4_retl_o2_plus_o4_plus_56)
1004 +ENTRY(NG4_retl_o2_plus_o4_plus_64)
1005 + add %o4, 64, %o4
1006 + ba,pt %xcc, __restore_asi
1007 + add %o2, %o4, %o0
1008 +ENDPROC(NG4_retl_o2_plus_o4_plus_64)
1009 +ENTRY(NG4_retl_o2_plus_o4_fp)
1010 + ba,pt %xcc, __restore_asi_fp
1011 + add %o2, %o4, %o0
1012 +ENDPROC(NG4_retl_o2_plus_o4_fp)
1013 +ENTRY(NG4_retl_o2_plus_o4_plus_8_fp)
1014 + add %o4, 8, %o4
1015 + ba,pt %xcc, __restore_asi_fp
1016 + add %o2, %o4, %o0
1017 +ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp)
1018 +ENTRY(NG4_retl_o2_plus_o4_plus_16_fp)
1019 + add %o4, 16, %o4
1020 + ba,pt %xcc, __restore_asi_fp
1021 + add %o2, %o4, %o0
1022 +ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp)
1023 +ENTRY(NG4_retl_o2_plus_o4_plus_24_fp)
1024 + add %o4, 24, %o4
1025 + ba,pt %xcc, __restore_asi_fp
1026 + add %o2, %o4, %o0
1027 +ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp)
1028 +ENTRY(NG4_retl_o2_plus_o4_plus_32_fp)
1029 + add %o4, 32, %o4
1030 + ba,pt %xcc, __restore_asi_fp
1031 + add %o2, %o4, %o0
1032 +ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp)
1033 +ENTRY(NG4_retl_o2_plus_o4_plus_40_fp)
1034 + add %o4, 40, %o4
1035 + ba,pt %xcc, __restore_asi_fp
1036 + add %o2, %o4, %o0
1037 +ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp)
1038 +ENTRY(NG4_retl_o2_plus_o4_plus_48_fp)
1039 + add %o4, 48, %o4
1040 + ba,pt %xcc, __restore_asi_fp
1041 + add %o2, %o4, %o0
1042 +ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp)
1043 +ENTRY(NG4_retl_o2_plus_o4_plus_56_fp)
1044 + add %o4, 56, %o4
1045 + ba,pt %xcc, __restore_asi_fp
1046 + add %o2, %o4, %o0
1047 +ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp)
1048 +ENTRY(NG4_retl_o2_plus_o4_plus_64_fp)
1049 + add %o4, 64, %o4
1050 + ba,pt %xcc, __restore_asi_fp
1051 + add %o2, %o4, %o0
1052 +ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp)
1053 +#endif
1054 .align 64
1055
1056 .globl FUNC_NAME
1057 @@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1058 brz,pt %g1, 51f
1059 sub %o2, %g1, %o2
1060
1061 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
1062 +
1063 +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
1064 add %o1, 1, %o1
1065 subcc %g1, 1, %g1
1066 add %o0, 1, %o0
1067 bne,pt %icc, 1b
1068 - EX_ST(STORE(stb, %g2, %o0 - 0x01))
1069 + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
1070
1071 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
1072 LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
1073 @@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1074 brz,pt %g1, .Llarge_aligned
1075 sub %o2, %g1, %o2
1076
1077 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2))
1078 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
1079 add %o1, 8, %o1
1080 subcc %g1, 8, %g1
1081 add %o0, 8, %o0
1082 bne,pt %icc, 1b
1083 - EX_ST(STORE(stx, %g2, %o0 - 0x08))
1084 + EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8)
1085
1086 .Llarge_aligned:
1087 /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
1088 andn %o2, 0x3f, %o4
1089 sub %o2, %o4, %o2
1090
1091 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
1092 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4)
1093 add %o1, 0x40, %o1
1094 - EX_LD(LOAD(ldx, %o1 - 0x38, %g2))
1095 + EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4)
1096 subcc %o4, 0x40, %o4
1097 - EX_LD(LOAD(ldx, %o1 - 0x30, %g3))
1098 - EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE))
1099 - EX_LD(LOAD(ldx, %o1 - 0x20, %o5))
1100 - EX_ST(STORE_INIT(%g1, %o0))
1101 + EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64)
1102 + EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64)
1103 + EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64)
1104 + EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64)
1105 add %o0, 0x08, %o0
1106 - EX_ST(STORE_INIT(%g2, %o0))
1107 + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56)
1108 add %o0, 0x08, %o0
1109 - EX_LD(LOAD(ldx, %o1 - 0x18, %g2))
1110 - EX_ST(STORE_INIT(%g3, %o0))
1111 + EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48)
1112 + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48)
1113 add %o0, 0x08, %o0
1114 - EX_LD(LOAD(ldx, %o1 - 0x10, %g3))
1115 - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
1116 + EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40)
1117 + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40)
1118 add %o0, 0x08, %o0
1119 - EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE))
1120 - EX_ST(STORE_INIT(%o5, %o0))
1121 + EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32)
1122 + EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32)
1123 add %o0, 0x08, %o0
1124 - EX_ST(STORE_INIT(%g2, %o0))
1125 + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24)
1126 add %o0, 0x08, %o0
1127 - EX_ST(STORE_INIT(%g3, %o0))
1128 + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16)
1129 add %o0, 0x08, %o0
1130 - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
1131 + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8)
1132 add %o0, 0x08, %o0
1133 bne,pt %icc, 1b
1134 LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
1135 @@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1136 sub %o2, %o4, %o2
1137 alignaddr %o1, %g0, %g1
1138 add %o1, %o4, %o1
1139 - EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0))
1140 -1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2))
1141 + EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4)
1142 +1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4)
1143 subcc %o4, 0x40, %o4
1144 - EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4))
1145 - EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6))
1146 - EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8))
1147 - EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10))
1148 - EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12))
1149 - EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14))
1150 + EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64)
1151 + EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64)
1152 + EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64)
1153 + EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64)
1154 + EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64)
1155 + EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64)
1156 faligndata %f0, %f2, %f16
1157 - EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0))
1158 + EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64)
1159 faligndata %f2, %f4, %f18
1160 add %g1, 0x40, %g1
1161 faligndata %f4, %f6, %f20
1162 @@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1163 faligndata %f10, %f12, %f26
1164 faligndata %f12, %f14, %f28
1165 faligndata %f14, %f0, %f30
1166 - EX_ST_FP(STORE(std, %f16, %o0 + 0x00))
1167 - EX_ST_FP(STORE(std, %f18, %o0 + 0x08))
1168 - EX_ST_FP(STORE(std, %f20, %o0 + 0x10))
1169 - EX_ST_FP(STORE(std, %f22, %o0 + 0x18))
1170 - EX_ST_FP(STORE(std, %f24, %o0 + 0x20))
1171 - EX_ST_FP(STORE(std, %f26, %o0 + 0x28))
1172 - EX_ST_FP(STORE(std, %f28, %o0 + 0x30))
1173 - EX_ST_FP(STORE(std, %f30, %o0 + 0x38))
1174 + EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64)
1175 + EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56)
1176 + EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48)
1177 + EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40)
1178 + EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32)
1179 + EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24)
1180 + EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16)
1181 + EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8)
1182 add %o0, 0x40, %o0
1183 bne,pt %icc, 1b
1184 LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
1185 @@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1186 andncc %o2, 0x20 - 1, %o5
1187 be,pn %icc, 2f
1188 sub %o2, %o5, %o2
1189 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
1190 - EX_LD(LOAD(ldx, %o1 + 0x08, %g2))
1191 - EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE))
1192 - EX_LD(LOAD(ldx, %o1 + 0x18, %o4))
1193 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
1194 + EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5)
1195 + EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5)
1196 + EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5)
1197 add %o1, 0x20, %o1
1198 subcc %o5, 0x20, %o5
1199 - EX_ST(STORE(stx, %g1, %o0 + 0x00))
1200 - EX_ST(STORE(stx, %g2, %o0 + 0x08))
1201 - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10))
1202 - EX_ST(STORE(stx, %o4, %o0 + 0x18))
1203 + EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32)
1204 + EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24)
1205 + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24)
1206 + EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8)
1207 bne,pt %icc, 1b
1208 add %o0, 0x20, %o0
1209 2: andcc %o2, 0x18, %o5
1210 be,pt %icc, 3f
1211 sub %o2, %o5, %o2
1212 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
1213 +
1214 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
1215 add %o1, 0x08, %o1
1216 add %o0, 0x08, %o0
1217 subcc %o5, 0x08, %o5
1218 bne,pt %icc, 1b
1219 - EX_ST(STORE(stx, %g1, %o0 - 0x08))
1220 + EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8)
1221 3: brz,pt %o2, .Lexit
1222 cmp %o2, 0x04
1223 bl,pn %icc, .Ltiny
1224 nop
1225 - EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
1226 + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2)
1227 add %o1, 0x04, %o1
1228 add %o0, 0x04, %o0
1229 subcc %o2, 0x04, %o2
1230 bne,pn %icc, .Ltiny
1231 - EX_ST(STORE(stw, %g1, %o0 - 0x04))
1232 + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4)
1233 ba,a,pt %icc, .Lexit
1234 .Lmedium_unaligned:
1235 /* First get dest 8 byte aligned. */
1236 @@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1237 brz,pt %g1, 2f
1238 sub %o2, %g1, %o2
1239
1240 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
1241 +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
1242 add %o1, 1, %o1
1243 subcc %g1, 1, %g1
1244 add %o0, 1, %o0
1245 bne,pt %icc, 1b
1246 - EX_ST(STORE(stb, %g2, %o0 - 0x01))
1247 + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
1248 2:
1249 and %o1, 0x7, %g1
1250 brz,pn %g1, .Lmedium_noprefetch
1251 @@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1252 mov 64, %g2
1253 sub %g2, %g1, %g2
1254 andn %o1, 0x7, %o1
1255 - EX_LD(LOAD(ldx, %o1 + 0x00, %o4))
1256 + EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2)
1257 sllx %o4, %g1, %o4
1258 andn %o2, 0x08 - 1, %o5
1259 sub %o2, %o5, %o2
1260 -1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3))
1261 +1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5)
1262 add %o1, 0x08, %o1
1263 subcc %o5, 0x08, %o5
1264 srlx %g3, %g2, GLOBAL_SPARE
1265 or GLOBAL_SPARE, %o4, GLOBAL_SPARE
1266 - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00))
1267 + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8)
1268 add %o0, 0x08, %o0
1269 bne,pt %icc, 1b
1270 sllx %g3, %g1, %o4
1271 @@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1272 ba,pt %icc, .Lsmall_unaligned
1273
1274 .Ltiny:
1275 - EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
1276 + EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
1277 subcc %o2, 1, %o2
1278 be,pn %icc, .Lexit
1279 - EX_ST(STORE(stb, %g1, %o0 + 0x00))
1280 - EX_LD(LOAD(ldub, %o1 + 0x01, %g1))
1281 + EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1)
1282 + EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2)
1283 subcc %o2, 1, %o2
1284 be,pn %icc, .Lexit
1285 - EX_ST(STORE(stb, %g1, %o0 + 0x01))
1286 - EX_LD(LOAD(ldub, %o1 + 0x02, %g1))
1287 + EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1)
1288 + EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2)
1289 ba,pt %icc, .Lexit
1290 - EX_ST(STORE(stb, %g1, %o0 + 0x02))
1291 + EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2)
1292
1293 .Lsmall:
1294 andcc %g2, 0x3, %g0
1295 @@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1296 andn %o2, 0x4 - 1, %o5
1297 sub %o2, %o5, %o2
1298 1:
1299 - EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
1300 + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
1301 add %o1, 0x04, %o1
1302 subcc %o5, 0x04, %o5
1303 add %o0, 0x04, %o0
1304 bne,pt %icc, 1b
1305 - EX_ST(STORE(stw, %g1, %o0 - 0x04))
1306 + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4)
1307 brz,pt %o2, .Lexit
1308 nop
1309 ba,a,pt %icc, .Ltiny
1310
1311 .Lsmall_unaligned:
1312 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
1313 +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
1314 add %o1, 1, %o1
1315 add %o0, 1, %o0
1316 subcc %o2, 1, %o2
1317 bne,pt %icc, 1b
1318 - EX_ST(STORE(stb, %g1, %o0 - 0x01))
1319 + EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
1320 ba,a,pt %icc, .Lexit
1321 .size FUNC_NAME, .-FUNC_NAME
1322 diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S
1323 index 5d1e4d1ac21e..9cd42fcbc781 100644
1324 --- a/arch/sparc/lib/NGcopy_from_user.S
1325 +++ b/arch/sparc/lib/NGcopy_from_user.S
1326 @@ -3,11 +3,11 @@
1327 * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
1328 */
1329
1330 -#define EX_LD(x) \
1331 +#define EX_LD(x,y) \
1332 98: x; \
1333 .section __ex_table,"a";\
1334 .align 4; \
1335 - .word 98b, __ret_one_asi;\
1336 + .word 98b, y; \
1337 .text; \
1338 .align 4;
1339
1340 diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S
1341 index ff630dcb273c..5c358afd464e 100644
1342 --- a/arch/sparc/lib/NGcopy_to_user.S
1343 +++ b/arch/sparc/lib/NGcopy_to_user.S
1344 @@ -3,11 +3,11 @@
1345 * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
1346 */
1347
1348 -#define EX_ST(x) \
1349 +#define EX_ST(x,y) \
1350 98: x; \
1351 .section __ex_table,"a";\
1352 .align 4; \
1353 - .word 98b, __ret_one_asi;\
1354 + .word 98b, y; \
1355 .text; \
1356 .align 4;
1357
1358 diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S
1359 index 96a14caf6966..d88c4ed50a00 100644
1360 --- a/arch/sparc/lib/NGmemcpy.S
1361 +++ b/arch/sparc/lib/NGmemcpy.S
1362 @@ -4,6 +4,7 @@
1363 */
1364
1365 #ifdef __KERNEL__
1366 +#include <linux/linkage.h>
1367 #include <asm/asi.h>
1368 #include <asm/thread_info.h>
1369 #define GLOBAL_SPARE %g7
1370 @@ -27,15 +28,11 @@
1371 #endif
1372
1373 #ifndef EX_LD
1374 -#define EX_LD(x) x
1375 +#define EX_LD(x,y) x
1376 #endif
1377
1378 #ifndef EX_ST
1379 -#define EX_ST(x) x
1380 -#endif
1381 -
1382 -#ifndef EX_RETVAL
1383 -#define EX_RETVAL(x) x
1384 +#define EX_ST(x,y) x
1385 #endif
1386
1387 #ifndef LOAD
1388 @@ -79,6 +76,92 @@
1389 .register %g3,#scratch
1390
1391 .text
1392 +#ifndef EX_RETVAL
1393 +#define EX_RETVAL(x) x
1394 +__restore_asi:
1395 + ret
1396 + wr %g0, ASI_AIUS, %asi
1397 + restore
1398 +ENTRY(NG_ret_i2_plus_i4_plus_1)
1399 + ba,pt %xcc, __restore_asi
1400 + add %i2, %i5, %i0
1401 +ENDPROC(NG_ret_i2_plus_i4_plus_1)
1402 +ENTRY(NG_ret_i2_plus_g1)
1403 + ba,pt %xcc, __restore_asi
1404 + add %i2, %g1, %i0
1405 +ENDPROC(NG_ret_i2_plus_g1)
1406 +ENTRY(NG_ret_i2_plus_g1_minus_8)
1407 + sub %g1, 8, %g1
1408 + ba,pt %xcc, __restore_asi
1409 + add %i2, %g1, %i0
1410 +ENDPROC(NG_ret_i2_plus_g1_minus_8)
1411 +ENTRY(NG_ret_i2_plus_g1_minus_16)
1412 + sub %g1, 16, %g1
1413 + ba,pt %xcc, __restore_asi
1414 + add %i2, %g1, %i0
1415 +ENDPROC(NG_ret_i2_plus_g1_minus_16)
1416 +ENTRY(NG_ret_i2_plus_g1_minus_24)
1417 + sub %g1, 24, %g1
1418 + ba,pt %xcc, __restore_asi
1419 + add %i2, %g1, %i0
1420 +ENDPROC(NG_ret_i2_plus_g1_minus_24)
1421 +ENTRY(NG_ret_i2_plus_g1_minus_32)
1422 + sub %g1, 32, %g1
1423 + ba,pt %xcc, __restore_asi
1424 + add %i2, %g1, %i0
1425 +ENDPROC(NG_ret_i2_plus_g1_minus_32)
1426 +ENTRY(NG_ret_i2_plus_g1_minus_40)
1427 + sub %g1, 40, %g1
1428 + ba,pt %xcc, __restore_asi
1429 + add %i2, %g1, %i0
1430 +ENDPROC(NG_ret_i2_plus_g1_minus_40)
1431 +ENTRY(NG_ret_i2_plus_g1_minus_48)
1432 + sub %g1, 48, %g1
1433 + ba,pt %xcc, __restore_asi
1434 + add %i2, %g1, %i0
1435 +ENDPROC(NG_ret_i2_plus_g1_minus_48)
1436 +ENTRY(NG_ret_i2_plus_g1_minus_56)
1437 + sub %g1, 56, %g1
1438 + ba,pt %xcc, __restore_asi
1439 + add %i2, %g1, %i0
1440 +ENDPROC(NG_ret_i2_plus_g1_minus_56)
1441 +ENTRY(NG_ret_i2_plus_i4)
1442 + ba,pt %xcc, __restore_asi
1443 + add %i2, %i4, %i0
1444 +ENDPROC(NG_ret_i2_plus_i4)
1445 +ENTRY(NG_ret_i2_plus_i4_minus_8)
1446 + sub %i4, 8, %i4
1447 + ba,pt %xcc, __restore_asi
1448 + add %i2, %i4, %i0
1449 +ENDPROC(NG_ret_i2_plus_i4_minus_8)
1450 +ENTRY(NG_ret_i2_plus_8)
1451 + ba,pt %xcc, __restore_asi
1452 + add %i2, 8, %i0
1453 +ENDPROC(NG_ret_i2_plus_8)
1454 +ENTRY(NG_ret_i2_plus_4)
1455 + ba,pt %xcc, __restore_asi
1456 + add %i2, 4, %i0
1457 +ENDPROC(NG_ret_i2_plus_4)
1458 +ENTRY(NG_ret_i2_plus_1)
1459 + ba,pt %xcc, __restore_asi
1460 + add %i2, 1, %i0
1461 +ENDPROC(NG_ret_i2_plus_1)
1462 +ENTRY(NG_ret_i2_plus_g1_plus_1)
1463 + add %g1, 1, %g1
1464 + ba,pt %xcc, __restore_asi
1465 + add %i2, %g1, %i0
1466 +ENDPROC(NG_ret_i2_plus_g1_plus_1)
1467 +ENTRY(NG_ret_i2)
1468 + ba,pt %xcc, __restore_asi
1469 + mov %i2, %i0
1470 +ENDPROC(NG_ret_i2)
1471 +ENTRY(NG_ret_i2_and_7_plus_i4)
1472 + and %i2, 7, %i2
1473 + ba,pt %xcc, __restore_asi
1474 + add %i2, %i4, %i0
1475 +ENDPROC(NG_ret_i2_and_7_plus_i4)
1476 +#endif
1477 +
1478 .align 64
1479
1480 .globl FUNC_NAME
1481 @@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1482 sub %g0, %i4, %i4 ! bytes to align dst
1483 sub %i2, %i4, %i2
1484 1: subcc %i4, 1, %i4
1485 - EX_LD(LOAD(ldub, %i1, %g1))
1486 - EX_ST(STORE(stb, %g1, %o0))
1487 + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1)
1488 + EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1)
1489 add %i1, 1, %i1
1490 bne,pt %XCC, 1b
1491 add %o0, 1, %o0
1492 @@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1493 and %i4, 0x7, GLOBAL_SPARE
1494 sll GLOBAL_SPARE, 3, GLOBAL_SPARE
1495 mov 64, %i5
1496 - EX_LD(LOAD_TWIN(%i1, %g2, %g3))
1497 + EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1)
1498 sub %i5, GLOBAL_SPARE, %i5
1499 mov 16, %o4
1500 mov 32, %o5
1501 @@ -178,31 +261,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1502 srlx WORD3, PRE_SHIFT, TMP; \
1503 or WORD2, TMP, WORD2;
1504
1505 -8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
1506 +8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
1507 MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
1508 LOAD(prefetch, %i1 + %i3, #one_read)
1509
1510 - EX_ST(STORE_INIT(%g2, %o0 + 0x00))
1511 - EX_ST(STORE_INIT(%g3, %o0 + 0x08))
1512 + EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1)
1513 + EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
1514
1515 - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
1516 + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
1517 MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
1518
1519 - EX_ST(STORE_INIT(%o2, %o0 + 0x10))
1520 - EX_ST(STORE_INIT(%o3, %o0 + 0x18))
1521 + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
1522 + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
1523
1524 - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
1525 + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
1526 MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
1527
1528 - EX_ST(STORE_INIT(%g2, %o0 + 0x20))
1529 - EX_ST(STORE_INIT(%g3, %o0 + 0x28))
1530 + EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
1531 + EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
1532
1533 - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
1534 + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
1535 add %i1, 64, %i1
1536 MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
1537
1538 - EX_ST(STORE_INIT(%o2, %o0 + 0x30))
1539 - EX_ST(STORE_INIT(%o3, %o0 + 0x38))
1540 + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
1541 + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
1542
1543 subcc %g1, 64, %g1
1544 bne,pt %XCC, 8b
1545 @@ -211,31 +294,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1546 ba,pt %XCC, 60f
1547 add %i1, %i4, %i1
1548
1549 -9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
1550 +9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
1551 MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
1552 LOAD(prefetch, %i1 + %i3, #one_read)
1553
1554 - EX_ST(STORE_INIT(%g3, %o0 + 0x00))
1555 - EX_ST(STORE_INIT(%o2, %o0 + 0x08))
1556 + EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1)
1557 + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
1558
1559 - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
1560 + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
1561 MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
1562
1563 - EX_ST(STORE_INIT(%o3, %o0 + 0x10))
1564 - EX_ST(STORE_INIT(%g2, %o0 + 0x18))
1565 + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
1566 + EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
1567
1568 - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
1569 + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
1570 MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
1571
1572 - EX_ST(STORE_INIT(%g3, %o0 + 0x20))
1573 - EX_ST(STORE_INIT(%o2, %o0 + 0x28))
1574 + EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
1575 + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
1576
1577 - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
1578 + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
1579 add %i1, 64, %i1
1580 MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
1581
1582 - EX_ST(STORE_INIT(%o3, %o0 + 0x30))
1583 - EX_ST(STORE_INIT(%g2, %o0 + 0x38))
1584 + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
1585 + EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
1586
1587 subcc %g1, 64, %g1
1588 bne,pt %XCC, 9b
1589 @@ -249,25 +332,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1590 * one twin load ahead, then add 8 back into source when
1591 * we finish the loop.
1592 */
1593 - EX_LD(LOAD_TWIN(%i1, %o4, %o5))
1594 + EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1)
1595 mov 16, %o7
1596 mov 32, %g2
1597 mov 48, %g3
1598 mov 64, %o1
1599 -1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
1600 +1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
1601 LOAD(prefetch, %i1 + %o1, #one_read)
1602 - EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line
1603 - EX_ST(STORE_INIT(%o2, %o0 + 0x08))
1604 - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
1605 - EX_ST(STORE_INIT(%o3, %o0 + 0x10))
1606 - EX_ST(STORE_INIT(%o4, %o0 + 0x18))
1607 - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
1608 - EX_ST(STORE_INIT(%o5, %o0 + 0x20))
1609 - EX_ST(STORE_INIT(%o2, %o0 + 0x28))
1610 - EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5))
1611 + EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
1612 + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
1613 + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
1614 + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
1615 + EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
1616 + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
1617 + EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
1618 + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
1619 + EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48)
1620 add %i1, 64, %i1
1621 - EX_ST(STORE_INIT(%o3, %o0 + 0x30))
1622 - EX_ST(STORE_INIT(%o4, %o0 + 0x38))
1623 + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
1624 + EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
1625 subcc %g1, 64, %g1
1626 bne,pt %XCC, 1b
1627 add %o0, 64, %o0
1628 @@ -282,20 +365,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1629 mov 32, %g2
1630 mov 48, %g3
1631 mov 64, %o1
1632 -1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5))
1633 - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
1634 +1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1)
1635 + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
1636 LOAD(prefetch, %i1 + %o1, #one_read)
1637 - EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line
1638 - EX_ST(STORE_INIT(%o5, %o0 + 0x08))
1639 - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
1640 - EX_ST(STORE_INIT(%o2, %o0 + 0x10))
1641 - EX_ST(STORE_INIT(%o3, %o0 + 0x18))
1642 - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
1643 + EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
1644 + EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
1645 + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
1646 + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
1647 + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
1648 + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
1649 add %i1, 64, %i1
1650 - EX_ST(STORE_INIT(%o4, %o0 + 0x20))
1651 - EX_ST(STORE_INIT(%o5, %o0 + 0x28))
1652 - EX_ST(STORE_INIT(%o2, %o0 + 0x30))
1653 - EX_ST(STORE_INIT(%o3, %o0 + 0x38))
1654 + EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
1655 + EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
1656 + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
1657 + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
1658 subcc %g1, 64, %g1
1659 bne,pt %XCC, 1b
1660 add %o0, 64, %o0
1661 @@ -321,28 +404,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1662 andn %i2, 0xf, %i4
1663 and %i2, 0xf, %i2
1664 1: subcc %i4, 0x10, %i4
1665 - EX_LD(LOAD(ldx, %i1, %o4))
1666 + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4)
1667 add %i1, 0x08, %i1
1668 - EX_LD(LOAD(ldx, %i1, %g1))
1669 + EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4)
1670 sub %i1, 0x08, %i1
1671 - EX_ST(STORE(stx, %o4, %i1 + %i3))
1672 + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4)
1673 add %i1, 0x8, %i1
1674 - EX_ST(STORE(stx, %g1, %i1 + %i3))
1675 + EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8)
1676 bgu,pt %XCC, 1b
1677 add %i1, 0x8, %i1
1678 73: andcc %i2, 0x8, %g0
1679 be,pt %XCC, 1f
1680 nop
1681 sub %i2, 0x8, %i2
1682 - EX_LD(LOAD(ldx, %i1, %o4))
1683 - EX_ST(STORE(stx, %o4, %i1 + %i3))
1684 + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8)
1685 + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8)
1686 add %i1, 0x8, %i1
1687 1: andcc %i2, 0x4, %g0
1688 be,pt %XCC, 1f
1689 nop
1690 sub %i2, 0x4, %i2
1691 - EX_LD(LOAD(lduw, %i1, %i5))
1692 - EX_ST(STORE(stw, %i5, %i1 + %i3))
1693 + EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4)
1694 + EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4)
1695 add %i1, 0x4, %i1
1696 1: cmp %i2, 0
1697 be,pt %XCC, 85f
1698 @@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1699 sub %i2, %g1, %i2
1700
1701 1: subcc %g1, 1, %g1
1702 - EX_LD(LOAD(ldub, %i1, %i5))
1703 - EX_ST(STORE(stb, %i5, %i1 + %i3))
1704 + EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1)
1705 + EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1)
1706 bgu,pt %icc, 1b
1707 add %i1, 1, %i1
1708
1709 @@ -375,16 +458,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1710
1711 8: mov 64, %i3
1712 andn %i1, 0x7, %i1
1713 - EX_LD(LOAD(ldx, %i1, %g2))
1714 + EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2)
1715 sub %i3, %g1, %i3
1716 andn %i2, 0x7, %i4
1717 sllx %g2, %g1, %g2
1718 1: add %i1, 0x8, %i1
1719 - EX_LD(LOAD(ldx, %i1, %g3))
1720 + EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4)
1721 subcc %i4, 0x8, %i4
1722 srlx %g3, %i3, %i5
1723 or %i5, %g2, %i5
1724 - EX_ST(STORE(stx, %i5, %o0))
1725 + EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4)
1726 add %o0, 0x8, %o0
1727 bgu,pt %icc, 1b
1728 sllx %g3, %g1, %g2
1729 @@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1730
1731 1:
1732 subcc %i2, 4, %i2
1733 - EX_LD(LOAD(lduw, %i1, %g1))
1734 - EX_ST(STORE(stw, %g1, %i1 + %i3))
1735 + EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4)
1736 + EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4)
1737 bgu,pt %XCC, 1b
1738 add %i1, 4, %i1
1739
1740 @@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1741 .align 32
1742 90:
1743 subcc %i2, 1, %i2
1744 - EX_LD(LOAD(ldub, %i1, %g1))
1745 - EX_ST(STORE(stb, %g1, %i1 + %i3))
1746 + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1)
1747 + EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1)
1748 bgu,pt %XCC, 90b
1749 add %i1, 1, %i1
1750 ret
1751 diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S
1752 index ecc5692fa2b4..bb6ff73229e3 100644
1753 --- a/arch/sparc/lib/U1copy_from_user.S
1754 +++ b/arch/sparc/lib/U1copy_from_user.S
1755 @@ -3,19 +3,19 @@
1756 * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
1757 */
1758
1759 -#define EX_LD(x) \
1760 +#define EX_LD(x,y) \
1761 98: x; \
1762 .section __ex_table,"a";\
1763 .align 4; \
1764 - .word 98b, __retl_one; \
1765 + .word 98b, y; \
1766 .text; \
1767 .align 4;
1768
1769 -#define EX_LD_FP(x) \
1770 +#define EX_LD_FP(x,y) \
1771 98: x; \
1772 .section __ex_table,"a";\
1773 .align 4; \
1774 - .word 98b, __retl_one_fp;\
1775 + .word 98b, y; \
1776 .text; \
1777 .align 4;
1778
1779 diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S
1780 index 9eea392e44d4..ed92ce739558 100644
1781 --- a/arch/sparc/lib/U1copy_to_user.S
1782 +++ b/arch/sparc/lib/U1copy_to_user.S
1783 @@ -3,19 +3,19 @@
1784 * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
1785 */
1786
1787 -#define EX_ST(x) \
1788 +#define EX_ST(x,y) \
1789 98: x; \
1790 .section __ex_table,"a";\
1791 .align 4; \
1792 - .word 98b, __retl_one; \
1793 + .word 98b, y; \
1794 .text; \
1795 .align 4;
1796
1797 -#define EX_ST_FP(x) \
1798 +#define EX_ST_FP(x,y) \
1799 98: x; \
1800 .section __ex_table,"a";\
1801 .align 4; \
1802 - .word 98b, __retl_one_fp;\
1803 + .word 98b, y; \
1804 .text; \
1805 .align 4;
1806
1807 diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S
1808 index 3e6209ebb7d7..f30d2ab2c371 100644
1809 --- a/arch/sparc/lib/U1memcpy.S
1810 +++ b/arch/sparc/lib/U1memcpy.S
1811 @@ -5,6 +5,7 @@
1812 */
1813
1814 #ifdef __KERNEL__
1815 +#include <linux/linkage.h>
1816 #include <asm/visasm.h>
1817 #include <asm/asi.h>
1818 #define GLOBAL_SPARE g7
1819 @@ -23,21 +24,17 @@
1820 #endif
1821
1822 #ifndef EX_LD
1823 -#define EX_LD(x) x
1824 +#define EX_LD(x,y) x
1825 #endif
1826 #ifndef EX_LD_FP
1827 -#define EX_LD_FP(x) x
1828 +#define EX_LD_FP(x,y) x
1829 #endif
1830
1831 #ifndef EX_ST
1832 -#define EX_ST(x) x
1833 +#define EX_ST(x,y) x
1834 #endif
1835 #ifndef EX_ST_FP
1836 -#define EX_ST_FP(x) x
1837 -#endif
1838 -
1839 -#ifndef EX_RETVAL
1840 -#define EX_RETVAL(x) x
1841 +#define EX_ST_FP(x,y) x
1842 #endif
1843
1844 #ifndef LOAD
1845 @@ -78,53 +75,169 @@
1846 faligndata %f7, %f8, %f60; \
1847 faligndata %f8, %f9, %f62;
1848
1849 -#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \
1850 - EX_LD_FP(LOAD_BLK(%src, %fdest)); \
1851 - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
1852 - add %src, 0x40, %src; \
1853 - subcc %len, 0x40, %len; \
1854 - be,pn %xcc, jmptgt; \
1855 - add %dest, 0x40, %dest; \
1856 -
1857 -#define LOOP_CHUNK1(src, dest, len, branch_dest) \
1858 - MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest)
1859 -#define LOOP_CHUNK2(src, dest, len, branch_dest) \
1860 - MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
1861 -#define LOOP_CHUNK3(src, dest, len, branch_dest) \
1862 - MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
1863 +#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \
1864 + EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \
1865 + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
1866 + add %src, 0x40, %src; \
1867 + subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \
1868 + be,pn %xcc, jmptgt; \
1869 + add %dest, 0x40, %dest; \
1870 +
1871 +#define LOOP_CHUNK1(src, dest, branch_dest) \
1872 + MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest)
1873 +#define LOOP_CHUNK2(src, dest, branch_dest) \
1874 + MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest)
1875 +#define LOOP_CHUNK3(src, dest, branch_dest) \
1876 + MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest)
1877
1878 #define DO_SYNC membar #Sync;
1879 #define STORE_SYNC(dest, fsrc) \
1880 - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
1881 + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
1882 add %dest, 0x40, %dest; \
1883 DO_SYNC
1884
1885 #define STORE_JUMP(dest, fsrc, target) \
1886 - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
1887 + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \
1888 add %dest, 0x40, %dest; \
1889 ba,pt %xcc, target; \
1890 nop;
1891
1892 -#define FINISH_VISCHUNK(dest, f0, f1, left) \
1893 - subcc %left, 8, %left;\
1894 - bl,pn %xcc, 95f; \
1895 - faligndata %f0, %f1, %f48; \
1896 - EX_ST_FP(STORE(std, %f48, %dest)); \
1897 +#define FINISH_VISCHUNK(dest, f0, f1) \
1898 + subcc %g3, 8, %g3; \
1899 + bl,pn %xcc, 95f; \
1900 + faligndata %f0, %f1, %f48; \
1901 + EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \
1902 add %dest, 8, %dest;
1903
1904 -#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \
1905 - subcc %left, 8, %left; \
1906 - bl,pn %xcc, 95f; \
1907 +#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
1908 + subcc %g3, 8, %g3; \
1909 + bl,pn %xcc, 95f; \
1910 fsrc2 %f0, %f1;
1911
1912 -#define UNEVEN_VISCHUNK(dest, f0, f1, left) \
1913 - UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \
1914 +#define UNEVEN_VISCHUNK(dest, f0, f1) \
1915 + UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
1916 ba,a,pt %xcc, 93f;
1917
1918 .register %g2,#scratch
1919 .register %g3,#scratch
1920
1921 .text
1922 +#ifndef EX_RETVAL
1923 +#define EX_RETVAL(x) x
1924 +ENTRY(U1_g1_1_fp)
1925 + VISExitHalf
1926 + add %g1, 1, %g1
1927 + add %g1, %g2, %g1
1928 + retl
1929 + add %g1, %o2, %o0
1930 +ENDPROC(U1_g1_1_fp)
1931 +ENTRY(U1_g2_0_fp)
1932 + VISExitHalf
1933 + retl
1934 + add %g2, %o2, %o0
1935 +ENDPROC(U1_g2_0_fp)
1936 +ENTRY(U1_g2_8_fp)
1937 + VISExitHalf
1938 + add %g2, 8, %g2
1939 + retl
1940 + add %g2, %o2, %o0
1941 +ENDPROC(U1_g2_8_fp)
1942 +ENTRY(U1_gs_0_fp)
1943 + VISExitHalf
1944 + add %GLOBAL_SPARE, %g3, %o0
1945 + retl
1946 + add %o0, %o2, %o0
1947 +ENDPROC(U1_gs_0_fp)
1948 +ENTRY(U1_gs_80_fp)
1949 + VISExitHalf
1950 + add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
1951 + add %GLOBAL_SPARE, %g3, %o0
1952 + retl
1953 + add %o0, %o2, %o0
1954 +ENDPROC(U1_gs_80_fp)
1955 +ENTRY(U1_gs_40_fp)
1956 + VISExitHalf
1957 + add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE
1958 + add %GLOBAL_SPARE, %g3, %o0
1959 + retl
1960 + add %o0, %o2, %o0
1961 +ENDPROC(U1_gs_40_fp)
1962 +ENTRY(U1_g3_0_fp)
1963 + VISExitHalf
1964 + retl
1965 + add %g3, %o2, %o0
1966 +ENDPROC(U1_g3_0_fp)
1967 +ENTRY(U1_g3_8_fp)
1968 + VISExitHalf
1969 + add %g3, 8, %g3
1970 + retl
1971 + add %g3, %o2, %o0
1972 +ENDPROC(U1_g3_8_fp)
1973 +ENTRY(U1_o2_0_fp)
1974 + VISExitHalf
1975 + retl
1976 + mov %o2, %o0
1977 +ENDPROC(U1_o2_0_fp)
1978 +ENTRY(U1_o2_1_fp)
1979 + VISExitHalf
1980 + retl
1981 + add %o2, 1, %o0
1982 +ENDPROC(U1_o2_1_fp)
1983 +ENTRY(U1_gs_0)
1984 + VISExitHalf
1985 + retl
1986 + add %GLOBAL_SPARE, %o2, %o0
1987 +ENDPROC(U1_gs_0)
1988 +ENTRY(U1_gs_8)
1989 + VISExitHalf
1990 + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
1991 + retl
1992 + add %GLOBAL_SPARE, 0x8, %o0
1993 +ENDPROC(U1_gs_8)
1994 +ENTRY(U1_gs_10)
1995 + VISExitHalf
1996 + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
1997 + retl
1998 + add %GLOBAL_SPARE, 0x10, %o0
1999 +ENDPROC(U1_gs_10)
2000 +ENTRY(U1_o2_0)
2001 + retl
2002 + mov %o2, %o0
2003 +ENDPROC(U1_o2_0)
2004 +ENTRY(U1_o2_8)
2005 + retl
2006 + add %o2, 8, %o0
2007 +ENDPROC(U1_o2_8)
2008 +ENTRY(U1_o2_4)
2009 + retl
2010 + add %o2, 4, %o0
2011 +ENDPROC(U1_o2_4)
2012 +ENTRY(U1_o2_1)
2013 + retl
2014 + add %o2, 1, %o0
2015 +ENDPROC(U1_o2_1)
2016 +ENTRY(U1_g1_0)
2017 + retl
2018 + add %g1, %o2, %o0
2019 +ENDPROC(U1_g1_0)
2020 +ENTRY(U1_g1_1)
2021 + add %g1, 1, %g1
2022 + retl
2023 + add %g1, %o2, %o0
2024 +ENDPROC(U1_g1_1)
2025 +ENTRY(U1_gs_0_o2_adj)
2026 + and %o2, 7, %o2
2027 + retl
2028 + add %GLOBAL_SPARE, %o2, %o0
2029 +ENDPROC(U1_gs_0_o2_adj)
2030 +ENTRY(U1_gs_8_o2_adj)
2031 + and %o2, 7, %o2
2032 + add %GLOBAL_SPARE, 8, %GLOBAL_SPARE
2033 + retl
2034 + add %GLOBAL_SPARE, %o2, %o0
2035 +ENDPROC(U1_gs_8_o2_adj)
2036 +#endif
2037 +
2038 .align 64
2039
2040 .globl FUNC_NAME
2041 @@ -166,8 +279,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2042 and %g2, 0x38, %g2
2043
2044 1: subcc %g1, 0x1, %g1
2045 - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
2046 - EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE))
2047 + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp)
2048 + EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp)
2049 bgu,pt %XCC, 1b
2050 add %o1, 0x1, %o1
2051
2052 @@ -178,20 +291,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2053 be,pt %icc, 3f
2054 alignaddr %o1, %g0, %o1
2055
2056 - EX_LD_FP(LOAD(ldd, %o1, %f4))
2057 -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
2058 + EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp)
2059 +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp)
2060 add %o1, 0x8, %o1
2061 subcc %g2, 0x8, %g2
2062 faligndata %f4, %f6, %f0
2063 - EX_ST_FP(STORE(std, %f0, %o0))
2064 + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
2065 be,pn %icc, 3f
2066 add %o0, 0x8, %o0
2067
2068 - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
2069 + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp)
2070 add %o1, 0x8, %o1
2071 subcc %g2, 0x8, %g2
2072 faligndata %f6, %f4, %f0
2073 - EX_ST_FP(STORE(std, %f0, %o0))
2074 + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
2075 bne,pt %icc, 1b
2076 add %o0, 0x8, %o0
2077
2078 @@ -214,13 +327,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2079 add %g1, %GLOBAL_SPARE, %g1
2080 subcc %o2, %g3, %o2
2081
2082 - EX_LD_FP(LOAD_BLK(%o1, %f0))
2083 + EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp)
2084 add %o1, 0x40, %o1
2085 add %g1, %g3, %g1
2086 - EX_LD_FP(LOAD_BLK(%o1, %f16))
2087 + EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp)
2088 add %o1, 0x40, %o1
2089 sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
2090 - EX_LD_FP(LOAD_BLK(%o1, %f32))
2091 + EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp)
2092 add %o1, 0x40, %o1
2093
2094 /* There are 8 instances of the unrolled loop,
2095 @@ -240,11 +353,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2096
2097 .align 64
2098 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
2099 - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2100 + LOOP_CHUNK1(o1, o0, 1f)
2101 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
2102 - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2103 + LOOP_CHUNK2(o1, o0, 2f)
2104 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
2105 - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2106 + LOOP_CHUNK3(o1, o0, 3f)
2107 ba,pt %xcc, 1b+4
2108 faligndata %f0, %f2, %f48
2109 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
2110 @@ -261,11 +374,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2111 STORE_JUMP(o0, f48, 56f)
2112
2113 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
2114 - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2115 + LOOP_CHUNK1(o1, o0, 1f)
2116 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
2117 - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2118 + LOOP_CHUNK2(o1, o0, 2f)
2119 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
2120 - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2121 + LOOP_CHUNK3(o1, o0, 3f)
2122 ba,pt %xcc, 1b+4
2123 faligndata %f2, %f4, %f48
2124 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
2125 @@ -282,11 +395,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2126 STORE_JUMP(o0, f48, 57f)
2127
2128 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
2129 - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2130 + LOOP_CHUNK1(o1, o0, 1f)
2131 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
2132 - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2133 + LOOP_CHUNK2(o1, o0, 2f)
2134 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
2135 - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2136 + LOOP_CHUNK3(o1, o0, 3f)
2137 ba,pt %xcc, 1b+4
2138 faligndata %f4, %f6, %f48
2139 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
2140 @@ -303,11 +416,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2141 STORE_JUMP(o0, f48, 58f)
2142
2143 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
2144 - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2145 + LOOP_CHUNK1(o1, o0, 1f)
2146 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
2147 - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2148 + LOOP_CHUNK2(o1, o0, 2f)
2149 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
2150 - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2151 + LOOP_CHUNK3(o1, o0, 3f)
2152 ba,pt %xcc, 1b+4
2153 faligndata %f6, %f8, %f48
2154 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
2155 @@ -324,11 +437,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2156 STORE_JUMP(o0, f48, 59f)
2157
2158 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
2159 - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2160 + LOOP_CHUNK1(o1, o0, 1f)
2161 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
2162 - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2163 + LOOP_CHUNK2(o1, o0, 2f)
2164 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
2165 - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2166 + LOOP_CHUNK3(o1, o0, 3f)
2167 ba,pt %xcc, 1b+4
2168 faligndata %f8, %f10, %f48
2169 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
2170 @@ -345,11 +458,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2171 STORE_JUMP(o0, f48, 60f)
2172
2173 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
2174 - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2175 + LOOP_CHUNK1(o1, o0, 1f)
2176 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
2177 - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2178 + LOOP_CHUNK2(o1, o0, 2f)
2179 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
2180 - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2181 + LOOP_CHUNK3(o1, o0, 3f)
2182 ba,pt %xcc, 1b+4
2183 faligndata %f10, %f12, %f48
2184 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
2185 @@ -366,11 +479,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2186 STORE_JUMP(o0, f48, 61f)
2187
2188 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
2189 - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2190 + LOOP_CHUNK1(o1, o0, 1f)
2191 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
2192 - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2193 + LOOP_CHUNK2(o1, o0, 2f)
2194 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
2195 - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2196 + LOOP_CHUNK3(o1, o0, 3f)
2197 ba,pt %xcc, 1b+4
2198 faligndata %f12, %f14, %f48
2199 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
2200 @@ -387,11 +500,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2201 STORE_JUMP(o0, f48, 62f)
2202
2203 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
2204 - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2205 + LOOP_CHUNK1(o1, o0, 1f)
2206 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
2207 - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2208 + LOOP_CHUNK2(o1, o0, 2f)
2209 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
2210 - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2211 + LOOP_CHUNK3(o1, o0, 3f)
2212 ba,pt %xcc, 1b+4
2213 faligndata %f14, %f16, %f48
2214 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
2215 @@ -407,53 +520,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2216 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
2217 STORE_JUMP(o0, f48, 63f)
2218
2219 -40: FINISH_VISCHUNK(o0, f0, f2, g3)
2220 -41: FINISH_VISCHUNK(o0, f2, f4, g3)
2221 -42: FINISH_VISCHUNK(o0, f4, f6, g3)
2222 -43: FINISH_VISCHUNK(o0, f6, f8, g3)
2223 -44: FINISH_VISCHUNK(o0, f8, f10, g3)
2224 -45: FINISH_VISCHUNK(o0, f10, f12, g3)
2225 -46: FINISH_VISCHUNK(o0, f12, f14, g3)
2226 -47: UNEVEN_VISCHUNK(o0, f14, f0, g3)
2227 -48: FINISH_VISCHUNK(o0, f16, f18, g3)
2228 -49: FINISH_VISCHUNK(o0, f18, f20, g3)
2229 -50: FINISH_VISCHUNK(o0, f20, f22, g3)
2230 -51: FINISH_VISCHUNK(o0, f22, f24, g3)
2231 -52: FINISH_VISCHUNK(o0, f24, f26, g3)
2232 -53: FINISH_VISCHUNK(o0, f26, f28, g3)
2233 -54: FINISH_VISCHUNK(o0, f28, f30, g3)
2234 -55: UNEVEN_VISCHUNK(o0, f30, f0, g3)
2235 -56: FINISH_VISCHUNK(o0, f32, f34, g3)
2236 -57: FINISH_VISCHUNK(o0, f34, f36, g3)
2237 -58: FINISH_VISCHUNK(o0, f36, f38, g3)
2238 -59: FINISH_VISCHUNK(o0, f38, f40, g3)
2239 -60: FINISH_VISCHUNK(o0, f40, f42, g3)
2240 -61: FINISH_VISCHUNK(o0, f42, f44, g3)
2241 -62: FINISH_VISCHUNK(o0, f44, f46, g3)
2242 -63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3)
2243 -
2244 -93: EX_LD_FP(LOAD(ldd, %o1, %f2))
2245 +40: FINISH_VISCHUNK(o0, f0, f2)
2246 +41: FINISH_VISCHUNK(o0, f2, f4)
2247 +42: FINISH_VISCHUNK(o0, f4, f6)
2248 +43: FINISH_VISCHUNK(o0, f6, f8)
2249 +44: FINISH_VISCHUNK(o0, f8, f10)
2250 +45: FINISH_VISCHUNK(o0, f10, f12)
2251 +46: FINISH_VISCHUNK(o0, f12, f14)
2252 +47: UNEVEN_VISCHUNK(o0, f14, f0)
2253 +48: FINISH_VISCHUNK(o0, f16, f18)
2254 +49: FINISH_VISCHUNK(o0, f18, f20)
2255 +50: FINISH_VISCHUNK(o0, f20, f22)
2256 +51: FINISH_VISCHUNK(o0, f22, f24)
2257 +52: FINISH_VISCHUNK(o0, f24, f26)
2258 +53: FINISH_VISCHUNK(o0, f26, f28)
2259 +54: FINISH_VISCHUNK(o0, f28, f30)
2260 +55: UNEVEN_VISCHUNK(o0, f30, f0)
2261 +56: FINISH_VISCHUNK(o0, f32, f34)
2262 +57: FINISH_VISCHUNK(o0, f34, f36)
2263 +58: FINISH_VISCHUNK(o0, f36, f38)
2264 +59: FINISH_VISCHUNK(o0, f38, f40)
2265 +60: FINISH_VISCHUNK(o0, f40, f42)
2266 +61: FINISH_VISCHUNK(o0, f42, f44)
2267 +62: FINISH_VISCHUNK(o0, f44, f46)
2268 +63: UNEVEN_VISCHUNK_LAST(o0, f46, f0)
2269 +
2270 +93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp)
2271 add %o1, 8, %o1
2272 subcc %g3, 8, %g3
2273 faligndata %f0, %f2, %f8
2274 - EX_ST_FP(STORE(std, %f8, %o0))
2275 + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
2276 bl,pn %xcc, 95f
2277 add %o0, 8, %o0
2278 - EX_LD_FP(LOAD(ldd, %o1, %f0))
2279 + EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp)
2280 add %o1, 8, %o1
2281 subcc %g3, 8, %g3
2282 faligndata %f2, %f0, %f8
2283 - EX_ST_FP(STORE(std, %f8, %o0))
2284 + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
2285 bge,pt %xcc, 93b
2286 add %o0, 8, %o0
2287
2288 95: brz,pt %o2, 2f
2289 mov %g1, %o1
2290
2291 -1: EX_LD_FP(LOAD(ldub, %o1, %o3))
2292 +1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp)
2293 add %o1, 1, %o1
2294 subcc %o2, 1, %o2
2295 - EX_ST_FP(STORE(stb, %o3, %o0))
2296 + EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp)
2297 bne,pt %xcc, 1b
2298 add %o0, 1, %o0
2299
2300 @@ -469,27 +582,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2301
2302 72: andn %o2, 0xf, %GLOBAL_SPARE
2303 and %o2, 0xf, %o2
2304 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
2305 - EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
2306 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0)
2307 + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0)
2308 subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE
2309 - EX_ST(STORE(stx, %o5, %o1 + %o3))
2310 + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10)
2311 add %o1, 0x8, %o1
2312 - EX_ST(STORE(stx, %g1, %o1 + %o3))
2313 + EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8)
2314 bgu,pt %XCC, 1b
2315 add %o1, 0x8, %o1
2316 73: andcc %o2, 0x8, %g0
2317 be,pt %XCC, 1f
2318 nop
2319 - EX_LD(LOAD(ldx, %o1, %o5))
2320 + EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)
2321 sub %o2, 0x8, %o2
2322 - EX_ST(STORE(stx, %o5, %o1 + %o3))
2323 + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8)
2324 add %o1, 0x8, %o1
2325 1: andcc %o2, 0x4, %g0
2326 be,pt %XCC, 1f
2327 nop
2328 - EX_LD(LOAD(lduw, %o1, %o5))
2329 + EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)
2330 sub %o2, 0x4, %o2
2331 - EX_ST(STORE(stw, %o5, %o1 + %o3))
2332 + EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4)
2333 add %o1, 0x4, %o1
2334 1: cmp %o2, 0
2335 be,pt %XCC, 85f
2336 @@ -503,9 +616,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2337 sub %g0, %g1, %g1
2338 sub %o2, %g1, %o2
2339
2340 -1: EX_LD(LOAD(ldub, %o1, %o5))
2341 +1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)
2342 subcc %g1, 1, %g1
2343 - EX_ST(STORE(stb, %o5, %o1 + %o3))
2344 + EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1)
2345 bgu,pt %icc, 1b
2346 add %o1, 1, %o1
2347
2348 @@ -521,16 +634,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2349
2350 8: mov 64, %o3
2351 andn %o1, 0x7, %o1
2352 - EX_LD(LOAD(ldx, %o1, %g2))
2353 + EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)
2354 sub %o3, %g1, %o3
2355 andn %o2, 0x7, %GLOBAL_SPARE
2356 sllx %g2, %g1, %g2
2357 -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
2358 +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj)
2359 subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE
2360 add %o1, 0x8, %o1
2361 srlx %g3, %o3, %o5
2362 or %o5, %g2, %o5
2363 - EX_ST(STORE(stx, %o5, %o0))
2364 + EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj)
2365 add %o0, 0x8, %o0
2366 bgu,pt %icc, 1b
2367 sllx %g3, %g1, %g2
2368 @@ -548,9 +661,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2369 bne,pn %XCC, 90f
2370 sub %o0, %o1, %o3
2371
2372 -1: EX_LD(LOAD(lduw, %o1, %g1))
2373 +1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)
2374 subcc %o2, 4, %o2
2375 - EX_ST(STORE(stw, %g1, %o1 + %o3))
2376 + EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4)
2377 bgu,pt %XCC, 1b
2378 add %o1, 4, %o1
2379
2380 @@ -558,9 +671,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2381 mov EX_RETVAL(%o4), %o0
2382
2383 .align 32
2384 -90: EX_LD(LOAD(ldub, %o1, %g1))
2385 +90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)
2386 subcc %o2, 1, %o2
2387 - EX_ST(STORE(stb, %g1, %o1 + %o3))
2388 + EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1)
2389 bgu,pt %XCC, 90b
2390 add %o1, 1, %o1
2391 retl
2392 diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S
2393 index 88ad73d86fe4..db73010a1af8 100644
2394 --- a/arch/sparc/lib/U3copy_from_user.S
2395 +++ b/arch/sparc/lib/U3copy_from_user.S
2396 @@ -3,19 +3,19 @@
2397 * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
2398 */
2399
2400 -#define EX_LD(x) \
2401 +#define EX_LD(x,y) \
2402 98: x; \
2403 .section __ex_table,"a";\
2404 .align 4; \
2405 - .word 98b, __retl_one; \
2406 + .word 98b, y; \
2407 .text; \
2408 .align 4;
2409
2410 -#define EX_LD_FP(x) \
2411 +#define EX_LD_FP(x,y) \
2412 98: x; \
2413 .section __ex_table,"a";\
2414 .align 4; \
2415 - .word 98b, __retl_one_fp;\
2416 + .word 98b, y##_fp; \
2417 .text; \
2418 .align 4;
2419
2420 diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S
2421 index 845139d75537..c4ee858e352a 100644
2422 --- a/arch/sparc/lib/U3copy_to_user.S
2423 +++ b/arch/sparc/lib/U3copy_to_user.S
2424 @@ -3,19 +3,19 @@
2425 * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
2426 */
2427
2428 -#define EX_ST(x) \
2429 +#define EX_ST(x,y) \
2430 98: x; \
2431 .section __ex_table,"a";\
2432 .align 4; \
2433 - .word 98b, __retl_one; \
2434 + .word 98b, y; \
2435 .text; \
2436 .align 4;
2437
2438 -#define EX_ST_FP(x) \
2439 +#define EX_ST_FP(x,y) \
2440 98: x; \
2441 .section __ex_table,"a";\
2442 .align 4; \
2443 - .word 98b, __retl_one_fp;\
2444 + .word 98b, y##_fp; \
2445 .text; \
2446 .align 4;
2447
2448 diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S
2449 index 491ee69e4995..54f98706b03b 100644
2450 --- a/arch/sparc/lib/U3memcpy.S
2451 +++ b/arch/sparc/lib/U3memcpy.S
2452 @@ -4,6 +4,7 @@
2453 */
2454
2455 #ifdef __KERNEL__
2456 +#include <linux/linkage.h>
2457 #include <asm/visasm.h>
2458 #include <asm/asi.h>
2459 #define GLOBAL_SPARE %g7
2460 @@ -22,21 +23,17 @@
2461 #endif
2462
2463 #ifndef EX_LD
2464 -#define EX_LD(x) x
2465 +#define EX_LD(x,y) x
2466 #endif
2467 #ifndef EX_LD_FP
2468 -#define EX_LD_FP(x) x
2469 +#define EX_LD_FP(x,y) x
2470 #endif
2471
2472 #ifndef EX_ST
2473 -#define EX_ST(x) x
2474 +#define EX_ST(x,y) x
2475 #endif
2476 #ifndef EX_ST_FP
2477 -#define EX_ST_FP(x) x
2478 -#endif
2479 -
2480 -#ifndef EX_RETVAL
2481 -#define EX_RETVAL(x) x
2482 +#define EX_ST_FP(x,y) x
2483 #endif
2484
2485 #ifndef LOAD
2486 @@ -77,6 +74,87 @@
2487 */
2488
2489 .text
2490 +#ifndef EX_RETVAL
2491 +#define EX_RETVAL(x) x
2492 +__restore_fp:
2493 + VISExitHalf
2494 + retl
2495 + nop
2496 +ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
2497 + add %g1, 1, %g1
2498 + add %g2, %g1, %g2
2499 + ba,pt %xcc, __restore_fp
2500 + add %o2, %g2, %o0
2501 +ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
2502 +ENTRY(U3_retl_o2_plus_g2_fp)
2503 + ba,pt %xcc, __restore_fp
2504 + add %o2, %g2, %o0
2505 +ENDPROC(U3_retl_o2_plus_g2_fp)
2506 +ENTRY(U3_retl_o2_plus_g2_plus_8_fp)
2507 + add %g2, 8, %g2
2508 + ba,pt %xcc, __restore_fp
2509 + add %o2, %g2, %o0
2510 +ENDPROC(U3_retl_o2_plus_g2_plus_8_fp)
2511 +ENTRY(U3_retl_o2)
2512 + retl
2513 + mov %o2, %o0
2514 +ENDPROC(U3_retl_o2)
2515 +ENTRY(U3_retl_o2_plus_1)
2516 + retl
2517 + add %o2, 1, %o0
2518 +ENDPROC(U3_retl_o2_plus_1)
2519 +ENTRY(U3_retl_o2_plus_4)
2520 + retl
2521 + add %o2, 4, %o0
2522 +ENDPROC(U3_retl_o2_plus_4)
2523 +ENTRY(U3_retl_o2_plus_8)
2524 + retl
2525 + add %o2, 8, %o0
2526 +ENDPROC(U3_retl_o2_plus_8)
2527 +ENTRY(U3_retl_o2_plus_g1_plus_1)
2528 + add %g1, 1, %g1
2529 + retl
2530 + add %o2, %g1, %o0
2531 +ENDPROC(U3_retl_o2_plus_g1_plus_1)
2532 +ENTRY(U3_retl_o2_fp)
2533 + ba,pt %xcc, __restore_fp
2534 + mov %o2, %o0
2535 +ENDPROC(U3_retl_o2_fp)
2536 +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
2537 + sll %o3, 6, %o3
2538 + add %o3, 0x80, %o3
2539 + ba,pt %xcc, __restore_fp
2540 + add %o2, %o3, %o0
2541 +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
2542 +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
2543 + sll %o3, 6, %o3
2544 + add %o3, 0x40, %o3
2545 + ba,pt %xcc, __restore_fp
2546 + add %o2, %o3, %o0
2547 +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
2548 +ENTRY(U3_retl_o2_plus_GS_plus_0x10)
2549 + add GLOBAL_SPARE, 0x10, GLOBAL_SPARE
2550 + retl
2551 + add %o2, GLOBAL_SPARE, %o0
2552 +ENDPROC(U3_retl_o2_plus_GS_plus_0x10)
2553 +ENTRY(U3_retl_o2_plus_GS_plus_0x08)
2554 + add GLOBAL_SPARE, 0x08, GLOBAL_SPARE
2555 + retl
2556 + add %o2, GLOBAL_SPARE, %o0
2557 +ENDPROC(U3_retl_o2_plus_GS_plus_0x08)
2558 +ENTRY(U3_retl_o2_and_7_plus_GS)
2559 + and %o2, 7, %o2
2560 + retl
2561 + add %o2, GLOBAL_SPARE, %o2
2562 +ENDPROC(U3_retl_o2_and_7_plus_GS)
2563 +ENTRY(U3_retl_o2_and_7_plus_GS_plus_8)
2564 + add GLOBAL_SPARE, 8, GLOBAL_SPARE
2565 + and %o2, 7, %o2
2566 + retl
2567 + add %o2, GLOBAL_SPARE, %o2
2568 +ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8)
2569 +#endif
2570 +
2571 .align 64
2572
2573 /* The cheetah's flexible spine, oversized liver, enlarged heart,
2574 @@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2575 and %g2, 0x38, %g2
2576
2577 1: subcc %g1, 0x1, %g1
2578 - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
2579 - EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE))
2580 + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1)
2581 + EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1)
2582 bgu,pt %XCC, 1b
2583 add %o1, 0x1, %o1
2584
2585 @@ -138,20 +216,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2586 be,pt %icc, 3f
2587 alignaddr %o1, %g0, %o1
2588
2589 - EX_LD_FP(LOAD(ldd, %o1, %f4))
2590 -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
2591 + EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2)
2592 +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2)
2593 add %o1, 0x8, %o1
2594 subcc %g2, 0x8, %g2
2595 faligndata %f4, %f6, %f0
2596 - EX_ST_FP(STORE(std, %f0, %o0))
2597 + EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8)
2598 be,pn %icc, 3f
2599 add %o0, 0x8, %o0
2600
2601 - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
2602 + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2)
2603 add %o1, 0x8, %o1
2604 subcc %g2, 0x8, %g2
2605 faligndata %f6, %f4, %f2
2606 - EX_ST_FP(STORE(std, %f2, %o0))
2607 + EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8)
2608 bne,pt %icc, 1b
2609 add %o0, 0x8, %o0
2610
2611 @@ -161,25 +239,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2612 LOAD(prefetch, %o1 + 0x080, #one_read)
2613 LOAD(prefetch, %o1 + 0x0c0, #one_read)
2614 LOAD(prefetch, %o1 + 0x100, #one_read)
2615 - EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0))
2616 + EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2)
2617 LOAD(prefetch, %o1 + 0x140, #one_read)
2618 - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
2619 + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2)
2620 LOAD(prefetch, %o1 + 0x180, #one_read)
2621 - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
2622 + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2)
2623 LOAD(prefetch, %o1 + 0x1c0, #one_read)
2624 faligndata %f0, %f2, %f16
2625 - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
2626 + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2)
2627 faligndata %f2, %f4, %f18
2628 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
2629 + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2)
2630 faligndata %f4, %f6, %f20
2631 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
2632 + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2)
2633 faligndata %f6, %f8, %f22
2634
2635 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
2636 + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2)
2637 faligndata %f8, %f10, %f24
2638 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
2639 + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2)
2640 faligndata %f10, %f12, %f26
2641 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
2642 + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2)
2643
2644 subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE
2645 add %o1, 0x40, %o1
2646 @@ -190,26 +268,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2647
2648 .align 64
2649 1:
2650 - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
2651 + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2652 faligndata %f12, %f14, %f28
2653 - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
2654 + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2655 faligndata %f14, %f0, %f30
2656 - EX_ST_FP(STORE_BLK(%f16, %o0))
2657 - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
2658 + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2659 + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2660 faligndata %f0, %f2, %f16
2661 add %o0, 0x40, %o0
2662
2663 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
2664 + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2665 faligndata %f2, %f4, %f18
2666 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
2667 + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2668 faligndata %f4, %f6, %f20
2669 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
2670 + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2671 subcc %o3, 0x01, %o3
2672 faligndata %f6, %f8, %f22
2673 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
2674 + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2675
2676 faligndata %f8, %f10, %f24
2677 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
2678 + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2679 LOAD(prefetch, %o1 + 0x1c0, #one_read)
2680 faligndata %f10, %f12, %f26
2681 bg,pt %XCC, 1b
2682 @@ -217,29 +295,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2683
2684 /* Finally we copy the last full 64-byte block. */
2685 2:
2686 - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
2687 + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2688 faligndata %f12, %f14, %f28
2689 - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
2690 + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2691 faligndata %f14, %f0, %f30
2692 - EX_ST_FP(STORE_BLK(%f16, %o0))
2693 - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
2694 + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2695 + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2696 faligndata %f0, %f2, %f16
2697 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
2698 + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2699 faligndata %f2, %f4, %f18
2700 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
2701 + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2702 faligndata %f4, %f6, %f20
2703 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
2704 + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2705 faligndata %f6, %f8, %f22
2706 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
2707 + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2708 faligndata %f8, %f10, %f24
2709 cmp %g1, 0
2710 be,pt %XCC, 1f
2711 add %o0, 0x40, %o0
2712 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
2713 + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2714 1: faligndata %f10, %f12, %f26
2715 faligndata %f12, %f14, %f28
2716 faligndata %f14, %f0, %f30
2717 - EX_ST_FP(STORE_BLK(%f16, %o0))
2718 + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2719 add %o0, 0x40, %o0
2720 add %o1, 0x40, %o1
2721 membar #Sync
2722 @@ -259,20 +337,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2723
2724 sub %o2, %g2, %o2
2725 be,a,pt %XCC, 1f
2726 - EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0))
2727 + EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2)
2728
2729 -1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2))
2730 +1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2)
2731 add %o1, 0x8, %o1
2732 subcc %g2, 0x8, %g2
2733 faligndata %f0, %f2, %f8
2734 - EX_ST_FP(STORE(std, %f8, %o0))
2735 + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
2736 be,pn %XCC, 2f
2737 add %o0, 0x8, %o0
2738 - EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0))
2739 + EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2)
2740 add %o1, 0x8, %o1
2741 subcc %g2, 0x8, %g2
2742 faligndata %f2, %f0, %f8
2743 - EX_ST_FP(STORE(std, %f8, %o0))
2744 + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
2745 bne,pn %XCC, 1b
2746 add %o0, 0x8, %o0
2747
2748 @@ -292,30 +370,33 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2749 andcc %o2, 0x8, %g0
2750 be,pt %icc, 1f
2751 nop
2752 - EX_LD(LOAD(ldx, %o1, %o5))
2753 - EX_ST(STORE(stx, %o5, %o1 + %o3))
2754 + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2)
2755 + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2)
2756 add %o1, 0x8, %o1
2757 + sub %o2, 8, %o2
2758
2759 1: andcc %o2, 0x4, %g0
2760 be,pt %icc, 1f
2761 nop
2762 - EX_LD(LOAD(lduw, %o1, %o5))
2763 - EX_ST(STORE(stw, %o5, %o1 + %o3))
2764 + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2)
2765 + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2)
2766 add %o1, 0x4, %o1
2767 + sub %o2, 4, %o2
2768
2769 1: andcc %o2, 0x2, %g0
2770 be,pt %icc, 1f
2771 nop
2772 - EX_LD(LOAD(lduh, %o1, %o5))
2773 - EX_ST(STORE(sth, %o5, %o1 + %o3))
2774 + EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2)
2775 + EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2)
2776 add %o1, 0x2, %o1
2777 + sub %o2, 2, %o2
2778
2779 1: andcc %o2, 0x1, %g0
2780 be,pt %icc, 85f
2781 nop
2782 - EX_LD(LOAD(ldub, %o1, %o5))
2783 + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2)
2784 ba,pt %xcc, 85f
2785 - EX_ST(STORE(stb, %o5, %o1 + %o3))
2786 + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2)
2787
2788 .align 64
2789 70: /* 16 < len <= 64 */
2790 @@ -326,26 +407,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2791 andn %o2, 0xf, GLOBAL_SPARE
2792 and %o2, 0xf, %o2
2793 1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE
2794 - EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
2795 - EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
2796 - EX_ST(STORE(stx, %o5, %o1 + %o3))
2797 + EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10)
2798 + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10)
2799 + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10)
2800 add %o1, 0x8, %o1
2801 - EX_ST(STORE(stx, %g1, %o1 + %o3))
2802 + EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08)
2803 bgu,pt %XCC, 1b
2804 add %o1, 0x8, %o1
2805 73: andcc %o2, 0x8, %g0
2806 be,pt %XCC, 1f
2807 nop
2808 sub %o2, 0x8, %o2
2809 - EX_LD(LOAD(ldx, %o1, %o5))
2810 - EX_ST(STORE(stx, %o5, %o1 + %o3))
2811 + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8)
2812 + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8)
2813 add %o1, 0x8, %o1
2814 1: andcc %o2, 0x4, %g0
2815 be,pt %XCC, 1f
2816 nop
2817 sub %o2, 0x4, %o2
2818 - EX_LD(LOAD(lduw, %o1, %o5))
2819 - EX_ST(STORE(stw, %o5, %o1 + %o3))
2820 + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4)
2821 + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4)
2822 add %o1, 0x4, %o1
2823 1: cmp %o2, 0
2824 be,pt %XCC, 85f
2825 @@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2826 sub %o2, %g1, %o2
2827
2828 1: subcc %g1, 1, %g1
2829 - EX_LD(LOAD(ldub, %o1, %o5))
2830 - EX_ST(STORE(stb, %o5, %o1 + %o3))
2831 + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1)
2832 + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1)
2833 bgu,pt %icc, 1b
2834 add %o1, 1, %o1
2835
2836 @@ -378,16 +459,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2837
2838 8: mov 64, %o3
2839 andn %o1, 0x7, %o1
2840 - EX_LD(LOAD(ldx, %o1, %g2))
2841 + EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2)
2842 sub %o3, %g1, %o3
2843 andn %o2, 0x7, GLOBAL_SPARE
2844 sllx %g2, %g1, %g2
2845 -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
2846 +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS)
2847 subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE
2848 add %o1, 0x8, %o1
2849 srlx %g3, %o3, %o5
2850 or %o5, %g2, %o5
2851 - EX_ST(STORE(stx, %o5, %o0))
2852 + EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8)
2853 add %o0, 0x8, %o0
2854 bgu,pt %icc, 1b
2855 sllx %g3, %g1, %g2
2856 @@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2857
2858 1:
2859 subcc %o2, 4, %o2
2860 - EX_LD(LOAD(lduw, %o1, %g1))
2861 - EX_ST(STORE(stw, %g1, %o1 + %o3))
2862 + EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4)
2863 + EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4)
2864 bgu,pt %XCC, 1b
2865 add %o1, 4, %o1
2866
2867 @@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2868 .align 32
2869 90:
2870 subcc %o2, 1, %o2
2871 - EX_LD(LOAD(ldub, %o1, %g1))
2872 - EX_ST(STORE(stb, %g1, %o1 + %o3))
2873 + EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1)
2874 + EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1)
2875 bgu,pt %XCC, 90b
2876 add %o1, 1, %o1
2877 retl
2878 diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S
2879 index 302c0e60dc2c..4c89b486fa0d 100644
2880 --- a/arch/sparc/lib/copy_in_user.S
2881 +++ b/arch/sparc/lib/copy_in_user.S
2882 @@ -8,18 +8,33 @@
2883
2884 #define XCC xcc
2885
2886 -#define EX(x,y) \
2887 +#define EX(x,y,z) \
2888 98: x,y; \
2889 .section __ex_table,"a";\
2890 .align 4; \
2891 - .word 98b, __retl_one; \
2892 + .word 98b, z; \
2893 .text; \
2894 .align 4;
2895
2896 +#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8)
2897 +#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4)
2898 +#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1)
2899 +
2900 .register %g2,#scratch
2901 .register %g3,#scratch
2902
2903 .text
2904 +__retl_o4_plus_8:
2905 + add %o4, %o2, %o4
2906 + retl
2907 + add %o4, 8, %o0
2908 +__retl_o2_plus_4:
2909 + retl
2910 + add %o2, 4, %o0
2911 +__retl_o2_plus_1:
2912 + retl
2913 + add %o2, 1, %o0
2914 +
2915 .align 32
2916
2917 /* Don't try to get too fancy here, just nice and
2918 @@ -44,8 +59,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
2919 andn %o2, 0x7, %o4
2920 and %o2, 0x7, %o2
2921 1: subcc %o4, 0x8, %o4
2922 - EX(ldxa [%o1] %asi, %o5)
2923 - EX(stxa %o5, [%o0] %asi)
2924 + EX_O4(ldxa [%o1] %asi, %o5)
2925 + EX_O4(stxa %o5, [%o0] %asi)
2926 add %o1, 0x8, %o1
2927 bgu,pt %XCC, 1b
2928 add %o0, 0x8, %o0
2929 @@ -53,8 +68,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
2930 be,pt %XCC, 1f
2931 nop
2932 sub %o2, 0x4, %o2
2933 - EX(lduwa [%o1] %asi, %o5)
2934 - EX(stwa %o5, [%o0] %asi)
2935 + EX_O2_4(lduwa [%o1] %asi, %o5)
2936 + EX_O2_4(stwa %o5, [%o0] %asi)
2937 add %o1, 0x4, %o1
2938 add %o0, 0x4, %o0
2939 1: cmp %o2, 0
2940 @@ -70,8 +85,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
2941
2942 82:
2943 subcc %o2, 4, %o2
2944 - EX(lduwa [%o1] %asi, %g1)
2945 - EX(stwa %g1, [%o0] %asi)
2946 + EX_O2_4(lduwa [%o1] %asi, %g1)
2947 + EX_O2_4(stwa %g1, [%o0] %asi)
2948 add %o1, 4, %o1
2949 bgu,pt %XCC, 82b
2950 add %o0, 4, %o0
2951 @@ -82,8 +97,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
2952 .align 32
2953 90:
2954 subcc %o2, 1, %o2
2955 - EX(lduba [%o1] %asi, %g1)
2956 - EX(stba %g1, [%o0] %asi)
2957 + EX_O2_1(lduba [%o1] %asi, %g1)
2958 + EX_O2_1(stba %g1, [%o0] %asi)
2959 add %o1, 1, %o1
2960 bgu,pt %XCC, 90b
2961 add %o0, 1, %o0
2962 diff --git a/arch/sparc/lib/user_fixup.c b/arch/sparc/lib/user_fixup.c
2963 deleted file mode 100644
2964 index ac96ae236709..000000000000
2965 --- a/arch/sparc/lib/user_fixup.c
2966 +++ /dev/null
2967 @@ -1,71 +0,0 @@
2968 -/* user_fixup.c: Fix up user copy faults.
2969 - *
2970 - * Copyright (C) 2004 David S. Miller <davem@redhat.com>
2971 - */
2972 -
2973 -#include <linux/compiler.h>
2974 -#include <linux/kernel.h>
2975 -#include <linux/string.h>
2976 -#include <linux/errno.h>
2977 -#include <linux/module.h>
2978 -
2979 -#include <asm/uaccess.h>
2980 -
2981 -/* Calculating the exact fault address when using
2982 - * block loads and stores can be very complicated.
2983 - *
2984 - * Instead of trying to be clever and handling all
2985 - * of the cases, just fix things up simply here.
2986 - */
2987 -
2988 -static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset)
2989 -{
2990 - unsigned long fault_addr = current_thread_info()->fault_address;
2991 - unsigned long end = start + size;
2992 -
2993 - if (fault_addr < start || fault_addr >= end) {
2994 - *offset = 0;
2995 - } else {
2996 - *offset = fault_addr - start;
2997 - size = end - fault_addr;
2998 - }
2999 - return size;
3000 -}
3001 -
3002 -unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size)
3003 -{
3004 - unsigned long offset;
3005 -
3006 - size = compute_size((unsigned long) from, size, &offset);
3007 - if (likely(size))
3008 - memset(to + offset, 0, size);
3009 -
3010 - return size;
3011 -}
3012 -EXPORT_SYMBOL(copy_from_user_fixup);
3013 -
3014 -unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size)
3015 -{
3016 - unsigned long offset;
3017 -
3018 - return compute_size((unsigned long) to, size, &offset);
3019 -}
3020 -EXPORT_SYMBOL(copy_to_user_fixup);
3021 -
3022 -unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size)
3023 -{
3024 - unsigned long fault_addr = current_thread_info()->fault_address;
3025 - unsigned long start = (unsigned long) to;
3026 - unsigned long end = start + size;
3027 -
3028 - if (fault_addr >= start && fault_addr < end)
3029 - return end - fault_addr;
3030 -
3031 - start = (unsigned long) from;
3032 - end = start + size;
3033 - if (fault_addr >= start && fault_addr < end)
3034 - return end - fault_addr;
3035 -
3036 - return size;
3037 -}
3038 -EXPORT_SYMBOL(copy_in_user_fixup);
3039 diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
3040 index f2b77112e9d8..e20fbbafb0b0 100644
3041 --- a/arch/sparc/mm/tsb.c
3042 +++ b/arch/sparc/mm/tsb.c
3043 @@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned long vaddr)
3044 return (tag == (vaddr >> 22));
3045 }
3046
3047 +static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end)
3048 +{
3049 + unsigned long idx;
3050 +
3051 + for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) {
3052 + struct tsb *ent = &swapper_tsb[idx];
3053 + unsigned long match = idx << 13;
3054 +
3055 + match |= (ent->tag << 22);
3056 + if (match >= start && match < end)
3057 + ent->tag = (1UL << TSB_TAG_INVALID_BIT);
3058 + }
3059 +}
3060 +
3061 /* TSB flushes need only occur on the processor initiating the address
3062 * space modification, not on each cpu the address space has run on.
3063 * Only the TLB flush needs that treatment.
3064 @@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
3065 {
3066 unsigned long v;
3067
3068 + if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES)
3069 + return flush_tsb_kernel_range_scan(start, end);
3070 +
3071 for (v = start; v < end; v += PAGE_SIZE) {
3072 unsigned long hash = tsb_hash(v, PAGE_SHIFT,
3073 KERNEL_TSB_NENTRIES);
3074 diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S
3075 index b4f4733abc6e..5d2fd6cd3189 100644
3076 --- a/arch/sparc/mm/ultra.S
3077 +++ b/arch/sparc/mm/ultra.S
3078 @@ -30,7 +30,7 @@
3079 .text
3080 .align 32
3081 .globl __flush_tlb_mm
3082 -__flush_tlb_mm: /* 18 insns */
3083 +__flush_tlb_mm: /* 19 insns */
3084 /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
3085 ldxa [%o1] ASI_DMMU, %g2
3086 cmp %g2, %o0
3087 @@ -81,7 +81,7 @@ __flush_tlb_page: /* 22 insns */
3088
3089 .align 32
3090 .globl __flush_tlb_pending
3091 -__flush_tlb_pending: /* 26 insns */
3092 +__flush_tlb_pending: /* 27 insns */
3093 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
3094 rdpr %pstate, %g7
3095 sllx %o1, 3, %o1
3096 @@ -113,12 +113,14 @@ __flush_tlb_pending: /* 26 insns */
3097
3098 .align 32
3099 .globl __flush_tlb_kernel_range
3100 -__flush_tlb_kernel_range: /* 16 insns */
3101 +__flush_tlb_kernel_range: /* 31 insns */
3102 /* %o0=start, %o1=end */
3103 cmp %o0, %o1
3104 be,pn %xcc, 2f
3105 + sub %o1, %o0, %o3
3106 + srlx %o3, 18, %o4
3107 + brnz,pn %o4, __spitfire_flush_tlb_kernel_range_slow
3108 sethi %hi(PAGE_SIZE), %o4
3109 - sub %o1, %o0, %o3
3110 sub %o3, %o4, %o3
3111 or %o0, 0x20, %o0 ! Nucleus
3112 1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
3113 @@ -131,6 +133,41 @@ __flush_tlb_kernel_range: /* 16 insns */
3114 retl
3115 nop
3116 nop
3117 + nop
3118 + nop
3119 + nop
3120 + nop
3121 + nop
3122 + nop
3123 + nop
3124 + nop
3125 + nop
3126 + nop
3127 + nop
3128 + nop
3129 + nop
3130 +
3131 +__spitfire_flush_tlb_kernel_range_slow:
3132 + mov 63 * 8, %o4
3133 +1: ldxa [%o4] ASI_ITLB_DATA_ACCESS, %o3
3134 + andcc %o3, 0x40, %g0 /* _PAGE_L_4U */
3135 + bne,pn %xcc, 2f
3136 + mov TLB_TAG_ACCESS, %o3
3137 + stxa %g0, [%o3] ASI_IMMU
3138 + stxa %g0, [%o4] ASI_ITLB_DATA_ACCESS
3139 + membar #Sync
3140 +2: ldxa [%o4] ASI_DTLB_DATA_ACCESS, %o3
3141 + andcc %o3, 0x40, %g0
3142 + bne,pn %xcc, 2f
3143 + mov TLB_TAG_ACCESS, %o3
3144 + stxa %g0, [%o3] ASI_DMMU
3145 + stxa %g0, [%o4] ASI_DTLB_DATA_ACCESS
3146 + membar #Sync
3147 +2: sub %o4, 8, %o4
3148 + brgez,pt %o4, 1b
3149 + nop
3150 + retl
3151 + nop
3152
3153 __spitfire_flush_tlb_mm_slow:
3154 rdpr %pstate, %g1
3155 @@ -285,6 +322,40 @@ __cheetah_flush_tlb_pending: /* 27 insns */
3156 retl
3157 wrpr %g7, 0x0, %pstate
3158
3159 +__cheetah_flush_tlb_kernel_range: /* 31 insns */
3160 + /* %o0=start, %o1=end */
3161 + cmp %o0, %o1
3162 + be,pn %xcc, 2f
3163 + sub %o1, %o0, %o3
3164 + srlx %o3, 18, %o4
3165 + brnz,pn %o4, 3f
3166 + sethi %hi(PAGE_SIZE), %o4
3167 + sub %o3, %o4, %o3
3168 + or %o0, 0x20, %o0 ! Nucleus
3169 +1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
3170 + stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP
3171 + membar #Sync
3172 + brnz,pt %o3, 1b
3173 + sub %o3, %o4, %o3
3174 +2: sethi %hi(KERNBASE), %o3
3175 + flush %o3
3176 + retl
3177 + nop
3178 +3: mov 0x80, %o4
3179 + stxa %g0, [%o4] ASI_DMMU_DEMAP
3180 + membar #Sync
3181 + stxa %g0, [%o4] ASI_IMMU_DEMAP
3182 + membar #Sync
3183 + retl
3184 + nop
3185 + nop
3186 + nop
3187 + nop
3188 + nop
3189 + nop
3190 + nop
3191 + nop
3192 +
3193 #ifdef DCACHE_ALIASING_POSSIBLE
3194 __cheetah_flush_dcache_page: /* 11 insns */
3195 sethi %hi(PAGE_OFFSET), %g1
3196 @@ -309,19 +380,28 @@ __hypervisor_tlb_tl0_error:
3197 ret
3198 restore
3199
3200 -__hypervisor_flush_tlb_mm: /* 10 insns */
3201 +__hypervisor_flush_tlb_mm: /* 19 insns */
3202 mov %o0, %o2 /* ARG2: mmu context */
3203 mov 0, %o0 /* ARG0: CPU lists unimplemented */
3204 mov 0, %o1 /* ARG1: CPU lists unimplemented */
3205 mov HV_MMU_ALL, %o3 /* ARG3: flags */
3206 mov HV_FAST_MMU_DEMAP_CTX, %o5
3207 ta HV_FAST_TRAP
3208 - brnz,pn %o0, __hypervisor_tlb_tl0_error
3209 + brnz,pn %o0, 1f
3210 mov HV_FAST_MMU_DEMAP_CTX, %o1
3211 retl
3212 nop
3213 +1: sethi %hi(__hypervisor_tlb_tl0_error), %o5
3214 + jmpl %o5 + %lo(__hypervisor_tlb_tl0_error), %g0
3215 + nop
3216 + nop
3217 + nop
3218 + nop
3219 + nop
3220 + nop
3221 + nop
3222
3223 -__hypervisor_flush_tlb_page: /* 11 insns */
3224 +__hypervisor_flush_tlb_page: /* 22 insns */
3225 /* %o0 = context, %o1 = vaddr */
3226 mov %o0, %g2
3227 mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */
3228 @@ -330,12 +410,23 @@ __hypervisor_flush_tlb_page: /* 11 insns */
3229 srlx %o0, PAGE_SHIFT, %o0
3230 sllx %o0, PAGE_SHIFT, %o0
3231 ta HV_MMU_UNMAP_ADDR_TRAP
3232 - brnz,pn %o0, __hypervisor_tlb_tl0_error
3233 + brnz,pn %o0, 1f
3234 mov HV_MMU_UNMAP_ADDR_TRAP, %o1
3235 retl
3236 nop
3237 +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2
3238 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
3239 + nop
3240 + nop
3241 + nop
3242 + nop
3243 + nop
3244 + nop
3245 + nop
3246 + nop
3247 + nop
3248
3249 -__hypervisor_flush_tlb_pending: /* 16 insns */
3250 +__hypervisor_flush_tlb_pending: /* 27 insns */
3251 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
3252 sllx %o1, 3, %g1
3253 mov %o2, %g2
3254 @@ -347,31 +438,57 @@ __hypervisor_flush_tlb_pending: /* 16 insns */
3255 srlx %o0, PAGE_SHIFT, %o0
3256 sllx %o0, PAGE_SHIFT, %o0
3257 ta HV_MMU_UNMAP_ADDR_TRAP
3258 - brnz,pn %o0, __hypervisor_tlb_tl0_error
3259 + brnz,pn %o0, 1f
3260 mov HV_MMU_UNMAP_ADDR_TRAP, %o1
3261 brnz,pt %g1, 1b
3262 nop
3263 retl
3264 nop
3265 +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2
3266 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
3267 + nop
3268 + nop
3269 + nop
3270 + nop
3271 + nop
3272 + nop
3273 + nop
3274 + nop
3275 + nop
3276
3277 -__hypervisor_flush_tlb_kernel_range: /* 16 insns */
3278 +__hypervisor_flush_tlb_kernel_range: /* 31 insns */
3279 /* %o0=start, %o1=end */
3280 cmp %o0, %o1
3281 be,pn %xcc, 2f
3282 - sethi %hi(PAGE_SIZE), %g3
3283 - mov %o0, %g1
3284 - sub %o1, %g1, %g2
3285 + sub %o1, %o0, %g2
3286 + srlx %g2, 18, %g3
3287 + brnz,pn %g3, 4f
3288 + mov %o0, %g1
3289 + sethi %hi(PAGE_SIZE), %g3
3290 sub %g2, %g3, %g2
3291 1: add %g1, %g2, %o0 /* ARG0: virtual address */
3292 mov 0, %o1 /* ARG1: mmu context */
3293 mov HV_MMU_ALL, %o2 /* ARG2: flags */
3294 ta HV_MMU_UNMAP_ADDR_TRAP
3295 - brnz,pn %o0, __hypervisor_tlb_tl0_error
3296 + brnz,pn %o0, 3f
3297 mov HV_MMU_UNMAP_ADDR_TRAP, %o1
3298 brnz,pt %g2, 1b
3299 sub %g2, %g3, %g2
3300 2: retl
3301 nop
3302 +3: sethi %hi(__hypervisor_tlb_tl0_error), %o2
3303 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
3304 + nop
3305 +4: mov 0, %o0 /* ARG0: CPU lists unimplemented */
3306 + mov 0, %o1 /* ARG1: CPU lists unimplemented */
3307 + mov 0, %o2 /* ARG2: mmu context == nucleus */
3308 + mov HV_MMU_ALL, %o3 /* ARG3: flags */
3309 + mov HV_FAST_MMU_DEMAP_CTX, %o5
3310 + ta HV_FAST_TRAP
3311 + brnz,pn %o0, 3b
3312 + mov HV_FAST_MMU_DEMAP_CTX, %o1
3313 + retl
3314 + nop
3315
3316 #ifdef DCACHE_ALIASING_POSSIBLE
3317 /* XXX Niagara and friends have an 8K cache, so no aliasing is
3318 @@ -394,43 +511,6 @@ tlb_patch_one:
3319 retl
3320 nop
3321
3322 - .globl cheetah_patch_cachetlbops
3323 -cheetah_patch_cachetlbops:
3324 - save %sp, -128, %sp
3325 -
3326 - sethi %hi(__flush_tlb_mm), %o0
3327 - or %o0, %lo(__flush_tlb_mm), %o0
3328 - sethi %hi(__cheetah_flush_tlb_mm), %o1
3329 - or %o1, %lo(__cheetah_flush_tlb_mm), %o1
3330 - call tlb_patch_one
3331 - mov 19, %o2
3332 -
3333 - sethi %hi(__flush_tlb_page), %o0
3334 - or %o0, %lo(__flush_tlb_page), %o0
3335 - sethi %hi(__cheetah_flush_tlb_page), %o1
3336 - or %o1, %lo(__cheetah_flush_tlb_page), %o1
3337 - call tlb_patch_one
3338 - mov 22, %o2
3339 -
3340 - sethi %hi(__flush_tlb_pending), %o0
3341 - or %o0, %lo(__flush_tlb_pending), %o0
3342 - sethi %hi(__cheetah_flush_tlb_pending), %o1
3343 - or %o1, %lo(__cheetah_flush_tlb_pending), %o1
3344 - call tlb_patch_one
3345 - mov 27, %o2
3346 -
3347 -#ifdef DCACHE_ALIASING_POSSIBLE
3348 - sethi %hi(__flush_dcache_page), %o0
3349 - or %o0, %lo(__flush_dcache_page), %o0
3350 - sethi %hi(__cheetah_flush_dcache_page), %o1
3351 - or %o1, %lo(__cheetah_flush_dcache_page), %o1
3352 - call tlb_patch_one
3353 - mov 11, %o2
3354 -#endif /* DCACHE_ALIASING_POSSIBLE */
3355 -
3356 - ret
3357 - restore
3358 -
3359 #ifdef CONFIG_SMP
3360 /* These are all called by the slaves of a cross call, at
3361 * trap level 1, with interrupts fully disabled.
3362 @@ -447,7 +527,7 @@ cheetah_patch_cachetlbops:
3363 */
3364 .align 32
3365 .globl xcall_flush_tlb_mm
3366 -xcall_flush_tlb_mm: /* 21 insns */
3367 +xcall_flush_tlb_mm: /* 24 insns */
3368 mov PRIMARY_CONTEXT, %g2
3369 ldxa [%g2] ASI_DMMU, %g3
3370 srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4
3371 @@ -469,9 +549,12 @@ xcall_flush_tlb_mm: /* 21 insns */
3372 nop
3373 nop
3374 nop
3375 + nop
3376 + nop
3377 + nop
3378
3379 .globl xcall_flush_tlb_page
3380 -xcall_flush_tlb_page: /* 17 insns */
3381 +xcall_flush_tlb_page: /* 20 insns */
3382 /* %g5=context, %g1=vaddr */
3383 mov PRIMARY_CONTEXT, %g4
3384 ldxa [%g4] ASI_DMMU, %g2
3385 @@ -490,15 +573,20 @@ xcall_flush_tlb_page: /* 17 insns */
3386 retry
3387 nop
3388 nop
3389 + nop
3390 + nop
3391 + nop
3392
3393 .globl xcall_flush_tlb_kernel_range
3394 -xcall_flush_tlb_kernel_range: /* 25 insns */
3395 +xcall_flush_tlb_kernel_range: /* 44 insns */
3396 sethi %hi(PAGE_SIZE - 1), %g2
3397 or %g2, %lo(PAGE_SIZE - 1), %g2
3398 andn %g1, %g2, %g1
3399 andn %g7, %g2, %g7
3400 sub %g7, %g1, %g3
3401 - add %g2, 1, %g2
3402 + srlx %g3, 18, %g2
3403 + brnz,pn %g2, 2f
3404 + add %g2, 1, %g2
3405 sub %g3, %g2, %g3
3406 or %g1, 0x20, %g1 ! Nucleus
3407 1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
3408 @@ -507,8 +595,25 @@ xcall_flush_tlb_kernel_range: /* 25 insns */
3409 brnz,pt %g3, 1b
3410 sub %g3, %g2, %g3
3411 retry
3412 - nop
3413 - nop
3414 +2: mov 63 * 8, %g1
3415 +1: ldxa [%g1] ASI_ITLB_DATA_ACCESS, %g2
3416 + andcc %g2, 0x40, %g0 /* _PAGE_L_4U */
3417 + bne,pn %xcc, 2f
3418 + mov TLB_TAG_ACCESS, %g2
3419 + stxa %g0, [%g2] ASI_IMMU
3420 + stxa %g0, [%g1] ASI_ITLB_DATA_ACCESS
3421 + membar #Sync
3422 +2: ldxa [%g1] ASI_DTLB_DATA_ACCESS, %g2
3423 + andcc %g2, 0x40, %g0
3424 + bne,pn %xcc, 2f
3425 + mov TLB_TAG_ACCESS, %g2
3426 + stxa %g0, [%g2] ASI_DMMU
3427 + stxa %g0, [%g1] ASI_DTLB_DATA_ACCESS
3428 + membar #Sync
3429 +2: sub %g1, 8, %g1
3430 + brgez,pt %g1, 1b
3431 + nop
3432 + retry
3433 nop
3434 nop
3435 nop
3436 @@ -637,6 +742,52 @@ xcall_fetch_glob_pmu_n4:
3437
3438 retry
3439
3440 +__cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */
3441 + sethi %hi(PAGE_SIZE - 1), %g2
3442 + or %g2, %lo(PAGE_SIZE - 1), %g2
3443 + andn %g1, %g2, %g1
3444 + andn %g7, %g2, %g7
3445 + sub %g7, %g1, %g3
3446 + srlx %g3, 18, %g2
3447 + brnz,pn %g2, 2f
3448 + add %g2, 1, %g2
3449 + sub %g3, %g2, %g3
3450 + or %g1, 0x20, %g1 ! Nucleus
3451 +1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
3452 + stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP
3453 + membar #Sync
3454 + brnz,pt %g3, 1b
3455 + sub %g3, %g2, %g3
3456 + retry
3457 +2: mov 0x80, %g2
3458 + stxa %g0, [%g2] ASI_DMMU_DEMAP
3459 + membar #Sync
3460 + stxa %g0, [%g2] ASI_IMMU_DEMAP
3461 + membar #Sync
3462 + retry
3463 + nop
3464 + nop
3465 + nop
3466 + nop
3467 + nop
3468 + nop
3469 + nop
3470 + nop
3471 + nop
3472 + nop
3473 + nop
3474 + nop
3475 + nop
3476 + nop
3477 + nop
3478 + nop
3479 + nop
3480 + nop
3481 + nop
3482 + nop
3483 + nop
3484 + nop
3485 +
3486 #ifdef DCACHE_ALIASING_POSSIBLE
3487 .align 32
3488 .globl xcall_flush_dcache_page_cheetah
3489 @@ -700,7 +851,7 @@ __hypervisor_tlb_xcall_error:
3490 ba,a,pt %xcc, rtrap
3491
3492 .globl __hypervisor_xcall_flush_tlb_mm
3493 -__hypervisor_xcall_flush_tlb_mm: /* 21 insns */
3494 +__hypervisor_xcall_flush_tlb_mm: /* 24 insns */
3495 /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */
3496 mov %o0, %g2
3497 mov %o1, %g3
3498 @@ -714,7 +865,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
3499 mov HV_FAST_MMU_DEMAP_CTX, %o5
3500 ta HV_FAST_TRAP
3501 mov HV_FAST_MMU_DEMAP_CTX, %g6
3502 - brnz,pn %o0, __hypervisor_tlb_xcall_error
3503 + brnz,pn %o0, 1f
3504 mov %o0, %g5
3505 mov %g2, %o0
3506 mov %g3, %o1
3507 @@ -723,9 +874,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
3508 mov %g7, %o5
3509 membar #Sync
3510 retry
3511 +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
3512 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
3513 + nop
3514
3515 .globl __hypervisor_xcall_flush_tlb_page
3516 -__hypervisor_xcall_flush_tlb_page: /* 17 insns */
3517 +__hypervisor_xcall_flush_tlb_page: /* 20 insns */
3518 /* %g5=ctx, %g1=vaddr */
3519 mov %o0, %g2
3520 mov %o1, %g3
3521 @@ -737,42 +891,64 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */
3522 sllx %o0, PAGE_SHIFT, %o0
3523 ta HV_MMU_UNMAP_ADDR_TRAP
3524 mov HV_MMU_UNMAP_ADDR_TRAP, %g6
3525 - brnz,a,pn %o0, __hypervisor_tlb_xcall_error
3526 + brnz,a,pn %o0, 1f
3527 mov %o0, %g5
3528 mov %g2, %o0
3529 mov %g3, %o1
3530 mov %g4, %o2
3531 membar #Sync
3532 retry
3533 +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
3534 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
3535 + nop
3536
3537 .globl __hypervisor_xcall_flush_tlb_kernel_range
3538 -__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */
3539 +__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */
3540 /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */
3541 sethi %hi(PAGE_SIZE - 1), %g2
3542 or %g2, %lo(PAGE_SIZE - 1), %g2
3543 andn %g1, %g2, %g1
3544 andn %g7, %g2, %g7
3545 sub %g7, %g1, %g3
3546 + srlx %g3, 18, %g7
3547 add %g2, 1, %g2
3548 sub %g3, %g2, %g3
3549 mov %o0, %g2
3550 mov %o1, %g4
3551 - mov %o2, %g7
3552 + brnz,pn %g7, 2f
3553 + mov %o2, %g7
3554 1: add %g1, %g3, %o0 /* ARG0: virtual address */
3555 mov 0, %o1 /* ARG1: mmu context */
3556 mov HV_MMU_ALL, %o2 /* ARG2: flags */
3557 ta HV_MMU_UNMAP_ADDR_TRAP
3558 mov HV_MMU_UNMAP_ADDR_TRAP, %g6
3559 - brnz,pn %o0, __hypervisor_tlb_xcall_error
3560 + brnz,pn %o0, 1f
3561 mov %o0, %g5
3562 sethi %hi(PAGE_SIZE), %o2
3563 brnz,pt %g3, 1b
3564 sub %g3, %o2, %g3
3565 - mov %g2, %o0
3566 +5: mov %g2, %o0
3567 mov %g4, %o1
3568 mov %g7, %o2
3569 membar #Sync
3570 retry
3571 +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
3572 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
3573 + nop
3574 +2: mov %o3, %g1
3575 + mov %o5, %g3
3576 + mov 0, %o0 /* ARG0: CPU lists unimplemented */
3577 + mov 0, %o1 /* ARG1: CPU lists unimplemented */
3578 + mov 0, %o2 /* ARG2: mmu context == nucleus */
3579 + mov HV_MMU_ALL, %o3 /* ARG3: flags */
3580 + mov HV_FAST_MMU_DEMAP_CTX, %o5
3581 + ta HV_FAST_TRAP
3582 + mov %g1, %o3
3583 + brz,pt %o0, 5b
3584 + mov %g3, %o5
3585 + mov HV_FAST_MMU_DEMAP_CTX, %g6
3586 + ba,pt %xcc, 1b
3587 + clr %g5
3588
3589 /* These just get rescheduled to PIL vectors. */
3590 .globl xcall_call_function
3591 @@ -809,6 +985,58 @@ xcall_kgdb_capture:
3592
3593 #endif /* CONFIG_SMP */
3594
3595 + .globl cheetah_patch_cachetlbops
3596 +cheetah_patch_cachetlbops:
3597 + save %sp, -128, %sp
3598 +
3599 + sethi %hi(__flush_tlb_mm), %o0
3600 + or %o0, %lo(__flush_tlb_mm), %o0
3601 + sethi %hi(__cheetah_flush_tlb_mm), %o1
3602 + or %o1, %lo(__cheetah_flush_tlb_mm), %o1
3603 + call tlb_patch_one
3604 + mov 19, %o2
3605 +
3606 + sethi %hi(__flush_tlb_page), %o0
3607 + or %o0, %lo(__flush_tlb_page), %o0
3608 + sethi %hi(__cheetah_flush_tlb_page), %o1
3609 + or %o1, %lo(__cheetah_flush_tlb_page), %o1
3610 + call tlb_patch_one
3611 + mov 22, %o2
3612 +
3613 + sethi %hi(__flush_tlb_pending), %o0
3614 + or %o0, %lo(__flush_tlb_pending), %o0
3615 + sethi %hi(__cheetah_flush_tlb_pending), %o1
3616 + or %o1, %lo(__cheetah_flush_tlb_pending), %o1
3617 + call tlb_patch_one
3618 + mov 27, %o2
3619 +
3620 + sethi %hi(__flush_tlb_kernel_range), %o0
3621 + or %o0, %lo(__flush_tlb_kernel_range), %o0
3622 + sethi %hi(__cheetah_flush_tlb_kernel_range), %o1
3623 + or %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1
3624 + call tlb_patch_one
3625 + mov 31, %o2
3626 +
3627 +#ifdef DCACHE_ALIASING_POSSIBLE
3628 + sethi %hi(__flush_dcache_page), %o0
3629 + or %o0, %lo(__flush_dcache_page), %o0
3630 + sethi %hi(__cheetah_flush_dcache_page), %o1
3631 + or %o1, %lo(__cheetah_flush_dcache_page), %o1
3632 + call tlb_patch_one
3633 + mov 11, %o2
3634 +#endif /* DCACHE_ALIASING_POSSIBLE */
3635 +
3636 +#ifdef CONFIG_SMP
3637 + sethi %hi(xcall_flush_tlb_kernel_range), %o0
3638 + or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
3639 + sethi %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1
3640 + or %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1
3641 + call tlb_patch_one
3642 + mov 44, %o2
3643 +#endif /* CONFIG_SMP */
3644 +
3645 + ret
3646 + restore
3647
3648 .globl hypervisor_patch_cachetlbops
3649 hypervisor_patch_cachetlbops:
3650 @@ -819,28 +1047,28 @@ hypervisor_patch_cachetlbops:
3651 sethi %hi(__hypervisor_flush_tlb_mm), %o1
3652 or %o1, %lo(__hypervisor_flush_tlb_mm), %o1
3653 call tlb_patch_one
3654 - mov 10, %o2
3655 + mov 19, %o2
3656
3657 sethi %hi(__flush_tlb_page), %o0
3658 or %o0, %lo(__flush_tlb_page), %o0
3659 sethi %hi(__hypervisor_flush_tlb_page), %o1
3660 or %o1, %lo(__hypervisor_flush_tlb_page), %o1
3661 call tlb_patch_one
3662 - mov 11, %o2
3663 + mov 22, %o2
3664
3665 sethi %hi(__flush_tlb_pending), %o0
3666 or %o0, %lo(__flush_tlb_pending), %o0
3667 sethi %hi(__hypervisor_flush_tlb_pending), %o1
3668 or %o1, %lo(__hypervisor_flush_tlb_pending), %o1
3669 call tlb_patch_one
3670 - mov 16, %o2
3671 + mov 27, %o2
3672
3673 sethi %hi(__flush_tlb_kernel_range), %o0
3674 or %o0, %lo(__flush_tlb_kernel_range), %o0
3675 sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1
3676 or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1
3677 call tlb_patch_one
3678 - mov 16, %o2
3679 + mov 31, %o2
3680
3681 #ifdef DCACHE_ALIASING_POSSIBLE
3682 sethi %hi(__flush_dcache_page), %o0
3683 @@ -857,21 +1085,21 @@ hypervisor_patch_cachetlbops:
3684 sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1
3685 or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1
3686 call tlb_patch_one
3687 - mov 21, %o2
3688 + mov 24, %o2
3689
3690 sethi %hi(xcall_flush_tlb_page), %o0
3691 or %o0, %lo(xcall_flush_tlb_page), %o0
3692 sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1
3693 or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1
3694 call tlb_patch_one
3695 - mov 17, %o2
3696 + mov 20, %o2
3697
3698 sethi %hi(xcall_flush_tlb_kernel_range), %o0
3699 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
3700 sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1
3701 or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1
3702 call tlb_patch_one
3703 - mov 25, %o2
3704 + mov 44, %o2
3705 #endif /* CONFIG_SMP */
3706
3707 ret
3708 diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
3709 index c4751ece76f6..45e87c9cc828 100644
3710 --- a/drivers/net/ethernet/broadcom/bgmac.c
3711 +++ b/drivers/net/ethernet/broadcom/bgmac.c
3712 @@ -307,6 +307,10 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
3713 u32 ctl;
3714
3715 ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL);
3716 +
3717 + /* preserve ONLY bits 16-17 from current hardware value */
3718 + ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
3719 +
3720 if (bgmac->feature_flags & BGMAC_FEAT_RX_MASK_SETUP) {
3721 ctl &= ~BGMAC_DMA_RX_BL_MASK;
3722 ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT;
3723 @@ -317,7 +321,6 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
3724 ctl &= ~BGMAC_DMA_RX_PT_MASK;
3725 ctl |= BGMAC_DMA_RX_PT_1 << BGMAC_DMA_RX_PT_SHIFT;
3726 }
3727 - ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
3728 ctl |= BGMAC_DMA_RX_ENABLE;
3729 ctl |= BGMAC_DMA_RX_PARITY_DISABLE;
3730 ctl |= BGMAC_DMA_RX_OVERFLOW_CONT;
3731 diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
3732 index 505ceaf451e2..2c850a92ab15 100644
3733 --- a/drivers/net/ethernet/broadcom/bnx2.c
3734 +++ b/drivers/net/ethernet/broadcom/bnx2.c
3735 @@ -49,6 +49,7 @@
3736 #include <linux/firmware.h>
3737 #include <linux/log2.h>
3738 #include <linux/aer.h>
3739 +#include <linux/crash_dump.h>
3740
3741 #if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE)
3742 #define BCM_CNIC 1
3743 @@ -4759,15 +4760,16 @@ bnx2_setup_msix_tbl(struct bnx2 *bp)
3744 BNX2_WR(bp, BNX2_PCI_GRC_WINDOW3_ADDR, BNX2_MSIX_PBA_ADDR);
3745 }
3746
3747 -static int
3748 -bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
3749 +static void
3750 +bnx2_wait_dma_complete(struct bnx2 *bp)
3751 {
3752 u32 val;
3753 - int i, rc = 0;
3754 - u8 old_port;
3755 + int i;
3756
3757 - /* Wait for the current PCI transaction to complete before
3758 - * issuing a reset. */
3759 + /*
3760 + * Wait for the current PCI transaction to complete before
3761 + * issuing a reset.
3762 + */
3763 if ((BNX2_CHIP(bp) == BNX2_CHIP_5706) ||
3764 (BNX2_CHIP(bp) == BNX2_CHIP_5708)) {
3765 BNX2_WR(bp, BNX2_MISC_ENABLE_CLR_BITS,
3766 @@ -4791,6 +4793,21 @@ bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
3767 }
3768 }
3769
3770 + return;
3771 +}
3772 +
3773 +
3774 +static int
3775 +bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
3776 +{
3777 + u32 val;
3778 + int i, rc = 0;
3779 + u8 old_port;
3780 +
3781 + /* Wait for the current PCI transaction to complete before
3782 + * issuing a reset. */
3783 + bnx2_wait_dma_complete(bp);
3784 +
3785 /* Wait for the firmware to tell us it is ok to issue a reset. */
3786 bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT0 | reset_code, 1, 1);
3787
3788 @@ -6356,6 +6373,10 @@ bnx2_open(struct net_device *dev)
3789 struct bnx2 *bp = netdev_priv(dev);
3790 int rc;
3791
3792 + rc = bnx2_request_firmware(bp);
3793 + if (rc < 0)
3794 + goto out;
3795 +
3796 netif_carrier_off(dev);
3797
3798 bnx2_disable_int(bp);
3799 @@ -6424,6 +6445,7 @@ open_err:
3800 bnx2_free_irq(bp);
3801 bnx2_free_mem(bp);
3802 bnx2_del_napi(bp);
3803 + bnx2_release_firmware(bp);
3804 goto out;
3805 }
3806
3807 @@ -8570,12 +8592,15 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
3808
3809 pci_set_drvdata(pdev, dev);
3810
3811 - rc = bnx2_request_firmware(bp);
3812 - if (rc < 0)
3813 - goto error;
3814 -
3815 + /*
3816 + * In-flight DMA from 1st kernel could continue going in kdump kernel.
3817 + * New io-page table has been created before bnx2 does reset at open stage.
3818 + * We have to wait for the in-flight DMA to complete to avoid it look up
3819 + * into the newly created io-page table.
3820 + */
3821 + if (is_kdump_kernel())
3822 + bnx2_wait_dma_complete(bp);
3823
3824 - bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET);
3825 memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN);
3826
3827 dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
3828 @@ -8608,7 +8633,6 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
3829 return 0;
3830
3831 error:
3832 - bnx2_release_firmware(bp);
3833 pci_iounmap(pdev, bp->regview);
3834 pci_release_regions(pdev);
3835 pci_disable_device(pdev);
3836 diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
3837 index d48873bcbddf..5cdc96bdd444 100644
3838 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
3839 +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
3840 @@ -231,7 +231,7 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
3841
3842 span_entry->used = true;
3843 span_entry->id = index;
3844 - span_entry->ref_count = 0;
3845 + span_entry->ref_count = 1;
3846 span_entry->local_port = local_port;
3847 return span_entry;
3848 }
3849 @@ -268,6 +268,7 @@ struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
3850
3851 span_entry = mlxsw_sp_span_entry_find(port);
3852 if (span_entry) {
3853 + /* Already exists, just take a reference */
3854 span_entry->ref_count++;
3855 return span_entry;
3856 }
3857 @@ -278,6 +279,7 @@ struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
3858 static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
3859 struct mlxsw_sp_span_entry *span_entry)
3860 {
3861 + WARN_ON(!span_entry->ref_count);
3862 if (--span_entry->ref_count == 0)
3863 mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
3864 return 0;
3865 diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3866 index 3f5c51da6d3e..62514b9bf988 100644
3867 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3868 +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3869 @@ -777,6 +777,26 @@ static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
3870 }
3871 }
3872
3873 +static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
3874 +{
3875 + u8 num_rec, last_rec_index, num_entries;
3876 +
3877 + num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
3878 + last_rec_index = num_rec - 1;
3879 +
3880 + if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
3881 + return false;
3882 + if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
3883 + MLXSW_REG_RAUHTD_TYPE_IPV6)
3884 + return true;
3885 +
3886 + num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
3887 + last_rec_index);
3888 + if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
3889 + return true;
3890 + return false;
3891 +}
3892 +
3893 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
3894 {
3895 char *rauhtd_pl;
3896 @@ -803,7 +823,7 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
3897 for (i = 0; i < num_rec; i++)
3898 mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
3899 i);
3900 - } while (num_rec);
3901 + } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
3902 rtnl_unlock();
3903
3904 kfree(rauhtd_pl);
3905 diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
3906 index 4c8c60af7985..fe9e7b1979b8 100644
3907 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
3908 +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
3909 @@ -871,6 +871,13 @@ static int stmmac_init_phy(struct net_device *dev)
3910 return -ENODEV;
3911 }
3912
3913 + /* stmmac_adjust_link will change this to PHY_IGNORE_INTERRUPT to avoid
3914 + * subsequent PHY polling, make sure we force a link transition if
3915 + * we have a UP/DOWN/UP transition
3916 + */
3917 + if (phydev->is_pseudo_fixed_link)
3918 + phydev->irq = PHY_POLL;
3919 +
3920 pr_debug("stmmac_init_phy: %s: attached to PHY (UID 0x%x)"
3921 " Link = %d\n", dev->name, phydev->phy_id, phydev->link);
3922
3923 diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
3924 index 5c8429f23a89..3a5530d0511b 100644
3925 --- a/drivers/usb/gadget/function/f_fs.c
3926 +++ b/drivers/usb/gadget/function/f_fs.c
3927 @@ -133,8 +133,60 @@ struct ffs_epfile {
3928 /*
3929 * Buffer for holding data from partial reads which may happen since
3930 * we’re rounding user read requests to a multiple of a max packet size.
3931 + *
3932 + * The pointer is initialised with NULL value and may be set by
3933 + * __ffs_epfile_read_data function to point to a temporary buffer.
3934 + *
3935 + * In normal operation, calls to __ffs_epfile_read_buffered will consume
3936 + * data from said buffer and eventually free it. Importantly, while the
3937 + * function is using the buffer, it sets the pointer to NULL. This is
3938 + * all right since __ffs_epfile_read_data and __ffs_epfile_read_buffered
3939 + * can never run concurrently (they are synchronised by epfile->mutex)
3940 + * so the latter will not assign a new value to the pointer.
3941 + *
3942 + * Meanwhile ffs_func_eps_disable frees the buffer (if the pointer is
3943 + * valid) and sets the pointer to READ_BUFFER_DROP value. This special
3944 + * value is crux of the synchronisation between ffs_func_eps_disable and
3945 + * __ffs_epfile_read_data.
3946 + *
3947 + * Once __ffs_epfile_read_data is about to finish it will try to set the
3948 + * pointer back to its old value (as described above), but seeing as the
3949 + * pointer is not-NULL (namely READ_BUFFER_DROP) it will instead free
3950 + * the buffer.
3951 + *
3952 + * == State transitions ==
3953 + *
3954 + * • ptr == NULL: (initial state)
3955 + * â—¦ __ffs_epfile_read_buffer_free: go to ptr == DROP
3956 + * â—¦ __ffs_epfile_read_buffered: nop
3957 + * â—¦ __ffs_epfile_read_data allocates temp buffer: go to ptr == buf
3958 + * ◦ reading finishes: n/a, not in ‘and reading’ state
3959 + * • ptr == DROP:
3960 + * â—¦ __ffs_epfile_read_buffer_free: nop
3961 + * â—¦ __ffs_epfile_read_buffered: go to ptr == NULL
3962 + * â—¦ __ffs_epfile_read_data allocates temp buffer: free buf, nop
3963 + * ◦ reading finishes: n/a, not in ‘and reading’ state
3964 + * • ptr == buf:
3965 + * â—¦ __ffs_epfile_read_buffer_free: free buf, go to ptr == DROP
3966 + * â—¦ __ffs_epfile_read_buffered: go to ptr == NULL and reading
3967 + * â—¦ __ffs_epfile_read_data: n/a, __ffs_epfile_read_buffered
3968 + * is always called first
3969 + * ◦ reading finishes: n/a, not in ‘and reading’ state
3970 + * • ptr == NULL and reading:
3971 + * â—¦ __ffs_epfile_read_buffer_free: go to ptr == DROP and reading
3972 + * â—¦ __ffs_epfile_read_buffered: n/a, mutex is held
3973 + * â—¦ __ffs_epfile_read_data: n/a, mutex is held
3974 + * ◦ reading finishes and …
3975 + * … all data read: free buf, go to ptr == NULL
3976 + * … otherwise: go to ptr == buf and reading
3977 + * • ptr == DROP and reading:
3978 + * â—¦ __ffs_epfile_read_buffer_free: nop
3979 + * â—¦ __ffs_epfile_read_buffered: n/a, mutex is held
3980 + * â—¦ __ffs_epfile_read_data: n/a, mutex is held
3981 + * â—¦ reading finishes: free buf, go to ptr == DROP
3982 */
3983 - struct ffs_buffer *read_buffer; /* P: epfile->mutex */
3984 + struct ffs_buffer *read_buffer;
3985 +#define READ_BUFFER_DROP ((struct ffs_buffer *)ERR_PTR(-ESHUTDOWN))
3986
3987 char name[5];
3988
3989 @@ -733,25 +785,47 @@ static void ffs_epfile_async_io_complete(struct usb_ep *_ep,
3990 schedule_work(&io_data->work);
3991 }
3992
3993 +static void __ffs_epfile_read_buffer_free(struct ffs_epfile *epfile)
3994 +{
3995 + /*
3996 + * See comment in struct ffs_epfile for full read_buffer pointer
3997 + * synchronisation story.
3998 + */
3999 + struct ffs_buffer *buf = xchg(&epfile->read_buffer, READ_BUFFER_DROP);
4000 + if (buf && buf != READ_BUFFER_DROP)
4001 + kfree(buf);
4002 +}
4003 +
4004 /* Assumes epfile->mutex is held. */
4005 static ssize_t __ffs_epfile_read_buffered(struct ffs_epfile *epfile,
4006 struct iov_iter *iter)
4007 {
4008 - struct ffs_buffer *buf = epfile->read_buffer;
4009 + /*
4010 + * Null out epfile->read_buffer so ffs_func_eps_disable does not free
4011 + * the buffer while we are using it. See comment in struct ffs_epfile
4012 + * for full read_buffer pointer synchronisation story.
4013 + */
4014 + struct ffs_buffer *buf = xchg(&epfile->read_buffer, NULL);
4015 ssize_t ret;
4016 - if (!buf)
4017 + if (!buf || buf == READ_BUFFER_DROP)
4018 return 0;
4019
4020 ret = copy_to_iter(buf->data, buf->length, iter);
4021 if (buf->length == ret) {
4022 kfree(buf);
4023 - epfile->read_buffer = NULL;
4024 - } else if (unlikely(iov_iter_count(iter))) {
4025 + return ret;
4026 + }
4027 +
4028 + if (unlikely(iov_iter_count(iter))) {
4029 ret = -EFAULT;
4030 } else {
4031 buf->length -= ret;
4032 buf->data += ret;
4033 }
4034 +
4035 + if (cmpxchg(&epfile->read_buffer, NULL, buf))
4036 + kfree(buf);
4037 +
4038 return ret;
4039 }
4040
4041 @@ -780,7 +854,15 @@ static ssize_t __ffs_epfile_read_data(struct ffs_epfile *epfile,
4042 buf->length = data_len;
4043 buf->data = buf->storage;
4044 memcpy(buf->storage, data + ret, data_len);
4045 - epfile->read_buffer = buf;
4046 +
4047 + /*
4048 + * At this point read_buffer is NULL or READ_BUFFER_DROP (if
4049 + * ffs_func_eps_disable has been called in the meanwhile). See comment
4050 + * in struct ffs_epfile for full read_buffer pointer synchronisation
4051 + * story.
4052 + */
4053 + if (unlikely(cmpxchg(&epfile->read_buffer, NULL, buf)))
4054 + kfree(buf);
4055
4056 return ret;
4057 }
4058 @@ -1094,8 +1176,7 @@ ffs_epfile_release(struct inode *inode, struct file *file)
4059
4060 ENTER();
4061
4062 - kfree(epfile->read_buffer);
4063 - epfile->read_buffer = NULL;
4064 + __ffs_epfile_read_buffer_free(epfile);
4065 ffs_data_closed(epfile->ffs);
4066
4067 return 0;
4068 @@ -1721,24 +1802,20 @@ static void ffs_func_eps_disable(struct ffs_function *func)
4069 unsigned count = func->ffs->eps_count;
4070 unsigned long flags;
4071
4072 + spin_lock_irqsave(&func->ffs->eps_lock, flags);
4073 do {
4074 - if (epfile)
4075 - mutex_lock(&epfile->mutex);
4076 - spin_lock_irqsave(&func->ffs->eps_lock, flags);
4077 /* pending requests get nuked */
4078 if (likely(ep->ep))
4079 usb_ep_disable(ep->ep);
4080 ++ep;
4081 - spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
4082
4083 if (epfile) {
4084 epfile->ep = NULL;
4085 - kfree(epfile->read_buffer);
4086 - epfile->read_buffer = NULL;
4087 - mutex_unlock(&epfile->mutex);
4088 + __ffs_epfile_read_buffer_free(epfile);
4089 ++epfile;
4090 }
4091 } while (--count);
4092 + spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
4093 }
4094
4095 static int ffs_func_eps_enable(struct ffs_function *func)
4096 diff --git a/include/net/ip.h b/include/net/ip.h
4097 index 156b0c11b524..0ccf6daf6f56 100644
4098 --- a/include/net/ip.h
4099 +++ b/include/net/ip.h
4100 @@ -47,7 +47,6 @@ struct inet_skb_parm {
4101 #define IPSKB_REROUTED BIT(4)
4102 #define IPSKB_DOREDIRECT BIT(5)
4103 #define IPSKB_FRAG_PMTU BIT(6)
4104 -#define IPSKB_FRAG_SEGS BIT(7)
4105
4106 u16 frag_max_size;
4107 };
4108 diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
4109 index 43a5a0e4524c..b01d5d1d7439 100644
4110 --- a/include/net/ip6_tunnel.h
4111 +++ b/include/net/ip6_tunnel.h
4112 @@ -145,6 +145,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
4113 {
4114 int pkt_len, err;
4115
4116 + memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
4117 pkt_len = skb->len - skb_inner_network_offset(skb);
4118 err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
4119 if (unlikely(net_xmit_eval(err)))
4120 diff --git a/include/net/sock.h b/include/net/sock.h
4121 index 8741988e6880..c26eab962ec7 100644
4122 --- a/include/net/sock.h
4123 +++ b/include/net/sock.h
4124 @@ -1587,11 +1587,11 @@ static inline void sock_put(struct sock *sk)
4125 void sock_gen_put(struct sock *sk);
4126
4127 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested,
4128 - unsigned int trim_cap);
4129 + unsigned int trim_cap, bool refcounted);
4130 static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
4131 const int nested)
4132 {
4133 - return __sk_receive_skb(sk, skb, nested, 1);
4134 + return __sk_receive_skb(sk, skb, nested, 1, true);
4135 }
4136
4137 static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
4138 diff --git a/include/net/tcp.h b/include/net/tcp.h
4139 index 7717302cab91..0de698940793 100644
4140 --- a/include/net/tcp.h
4141 +++ b/include/net/tcp.h
4142 @@ -1164,6 +1164,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp)
4143 }
4144
4145 bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
4146 +int tcp_filter(struct sock *sk, struct sk_buff *skb);
4147
4148 #undef STATE_TRACE
4149
4150 diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h
4151 index 5cd4d4d2dd1d..9c9c6ad55f14 100644
4152 --- a/include/uapi/linux/atm_zatm.h
4153 +++ b/include/uapi/linux/atm_zatm.h
4154 @@ -14,7 +14,6 @@
4155
4156 #include <linux/atmapi.h>
4157 #include <linux/atmioc.h>
4158 -#include <linux/time.h>
4159
4160 #define ZATM_GETPOOL _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc)
4161 /* get pool statistics */
4162 diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
4163 index 570eeca7bdfa..ad1bc67aff1b 100644
4164 --- a/kernel/bpf/hashtab.c
4165 +++ b/kernel/bpf/hashtab.c
4166 @@ -687,7 +687,8 @@ static void delete_all_elements(struct bpf_htab *htab)
4167
4168 hlist_for_each_entry_safe(l, n, head, hash_node) {
4169 hlist_del_rcu(&l->hash_node);
4170 - htab_elem_free(htab, l);
4171 + if (l->state != HTAB_EXTRA_ELEM_USED)
4172 + htab_elem_free(htab, l);
4173 }
4174 }
4175 }
4176 diff --git a/net/core/dev.c b/net/core/dev.c
4177 index 44b3ba462ba1..9ce9d7284ea7 100644
4178 --- a/net/core/dev.c
4179 +++ b/net/core/dev.c
4180 @@ -2484,7 +2484,7 @@ int skb_checksum_help(struct sk_buff *skb)
4181 goto out;
4182 }
4183
4184 - *(__sum16 *)(skb->data + offset) = csum_fold(csum);
4185 + *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
4186 out_set_summed:
4187 skb->ip_summed = CHECKSUM_NONE;
4188 out:
4189 diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
4190 index 52742a02814f..5550a86f7264 100644
4191 --- a/net/core/flow_dissector.c
4192 +++ b/net/core/flow_dissector.c
4193 @@ -118,7 +118,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
4194 struct flow_dissector_key_tags *key_tags;
4195 struct flow_dissector_key_keyid *key_keyid;
4196 u8 ip_proto = 0;
4197 - bool ret = false;
4198 + bool ret;
4199
4200 if (!data) {
4201 data = skb->data;
4202 @@ -481,12 +481,17 @@ ip_proto_again:
4203 out_good:
4204 ret = true;
4205
4206 -out_bad:
4207 + key_control->thoff = (u16)nhoff;
4208 +out:
4209 key_basic->n_proto = proto;
4210 key_basic->ip_proto = ip_proto;
4211 - key_control->thoff = (u16)nhoff;
4212
4213 return ret;
4214 +
4215 +out_bad:
4216 + ret = false;
4217 + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
4218 + goto out;
4219 }
4220 EXPORT_SYMBOL(__skb_flow_dissect);
4221
4222 diff --git a/net/core/sock.c b/net/core/sock.c
4223 index fd7b41edf1ce..10acaccca5c8 100644
4224 --- a/net/core/sock.c
4225 +++ b/net/core/sock.c
4226 @@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
4227 EXPORT_SYMBOL(sock_queue_rcv_skb);
4228
4229 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
4230 - const int nested, unsigned int trim_cap)
4231 + const int nested, unsigned int trim_cap, bool refcounted)
4232 {
4233 int rc = NET_RX_SUCCESS;
4234
4235 @@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
4236
4237 bh_unlock_sock(sk);
4238 out:
4239 - sock_put(sk);
4240 + if (refcounted)
4241 + sock_put(sk);
4242 return rc;
4243 discard_and_relse:
4244 kfree_skb(skb);
4245 @@ -1563,6 +1564,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
4246 RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
4247
4248 newsk->sk_err = 0;
4249 + newsk->sk_err_soft = 0;
4250 newsk->sk_priority = 0;
4251 newsk->sk_incoming_cpu = raw_smp_processor_id();
4252 atomic64_set(&newsk->sk_cookie, 0);
4253 diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
4254 index 345a3aeb8c7e..b567c8725aea 100644
4255 --- a/net/dccp/ipv4.c
4256 +++ b/net/dccp/ipv4.c
4257 @@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
4258 {
4259 const struct iphdr *iph = (struct iphdr *)skb->data;
4260 const u8 offset = iph->ihl << 2;
4261 - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
4262 + const struct dccp_hdr *dh;
4263 struct dccp_sock *dp;
4264 struct inet_sock *inet;
4265 const int type = icmp_hdr(skb)->type;
4266 @@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
4267 int err;
4268 struct net *net = dev_net(skb->dev);
4269
4270 - if (skb->len < offset + sizeof(*dh) ||
4271 - skb->len < offset + __dccp_basic_hdr_len(dh)) {
4272 - __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
4273 - return;
4274 - }
4275 + /* Only need dccph_dport & dccph_sport which are the first
4276 + * 4 bytes in dccp header.
4277 + * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
4278 + */
4279 + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
4280 + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
4281 + dh = (struct dccp_hdr *)(skb->data + offset);
4282
4283 sk = __inet_lookup_established(net, &dccp_hashinfo,
4284 iph->daddr, dh->dccph_dport,
4285 @@ -868,7 +870,7 @@ lookup:
4286 goto discard_and_relse;
4287 nf_reset(skb);
4288
4289 - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4);
4290 + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted);
4291
4292 no_dccp_socket:
4293 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
4294 diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
4295 index 3828f94b234c..715e5d1dc107 100644
4296 --- a/net/dccp/ipv6.c
4297 +++ b/net/dccp/ipv6.c
4298 @@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
4299 u8 type, u8 code, int offset, __be32 info)
4300 {
4301 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
4302 - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
4303 + const struct dccp_hdr *dh;
4304 struct dccp_sock *dp;
4305 struct ipv6_pinfo *np;
4306 struct sock *sk;
4307 @@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
4308 __u64 seq;
4309 struct net *net = dev_net(skb->dev);
4310
4311 - if (skb->len < offset + sizeof(*dh) ||
4312 - skb->len < offset + __dccp_basic_hdr_len(dh)) {
4313 - __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
4314 - ICMP6_MIB_INERRORS);
4315 - return;
4316 - }
4317 + /* Only need dccph_dport & dccph_sport which are the first
4318 + * 4 bytes in dccp header.
4319 + * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
4320 + */
4321 + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
4322 + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
4323 + dh = (struct dccp_hdr *)(skb->data + offset);
4324
4325 sk = __inet6_lookup_established(net, &dccp_hashinfo,
4326 &hdr->daddr, dh->dccph_dport,
4327 @@ -738,7 +739,8 @@ lookup:
4328 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
4329 goto discard_and_relse;
4330
4331 - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0;
4332 + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4,
4333 + refcounted) ? -1 : 0;
4334
4335 no_dccp_socket:
4336 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
4337 @@ -956,6 +958,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
4338 .getsockopt = ipv6_getsockopt,
4339 .addr2sockaddr = inet6_csk_addr2sockaddr,
4340 .sockaddr_len = sizeof(struct sockaddr_in6),
4341 + .bind_conflict = inet6_csk_bind_conflict,
4342 #ifdef CONFIG_COMPAT
4343 .compat_setsockopt = compat_ipv6_setsockopt,
4344 .compat_getsockopt = compat_ipv6_getsockopt,
4345 diff --git a/net/dccp/proto.c b/net/dccp/proto.c
4346 index 41e65804ddf5..9fe25bf63296 100644
4347 --- a/net/dccp/proto.c
4348 +++ b/net/dccp/proto.c
4349 @@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long timeout)
4350 __kfree_skb(skb);
4351 }
4352
4353 + /* If socket has been already reset kill it. */
4354 + if (sk->sk_state == DCCP_CLOSED)
4355 + goto adjudge_to_death;
4356 +
4357 if (data_was_unread) {
4358 /* Unread data was tossed, send an appropriate Reset Code */
4359 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
4360 diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
4361 index e2ffc2a5c7db..7ef703102dca 100644
4362 --- a/net/ipv4/fib_trie.c
4363 +++ b/net/ipv4/fib_trie.c
4364 @@ -2455,22 +2455,19 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
4365 struct key_vector *l, **tp = &iter->tnode;
4366 t_key key;
4367
4368 - /* use cache location of next-to-find key */
4369 + /* use cached location of previously found key */
4370 if (iter->pos > 0 && pos >= iter->pos) {
4371 - pos -= iter->pos;
4372 key = iter->key;
4373 } else {
4374 - iter->pos = 0;
4375 + iter->pos = 1;
4376 key = 0;
4377 }
4378
4379 - while ((l = leaf_walk_rcu(tp, key)) != NULL) {
4380 + pos -= iter->pos;
4381 +
4382 + while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
4383 key = l->key + 1;
4384 iter->pos++;
4385 -
4386 - if (--pos <= 0)
4387 - break;
4388 -
4389 l = NULL;
4390
4391 /* handle unlikely case of a key wrap */
4392 @@ -2479,7 +2476,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
4393 }
4394
4395 if (l)
4396 - iter->key = key; /* remember it */
4397 + iter->key = l->key; /* remember it */
4398 else
4399 iter->pos = 0; /* forget it */
4400
4401 @@ -2507,7 +2504,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
4402 return fib_route_get_idx(iter, *pos);
4403
4404 iter->pos = 0;
4405 - iter->key = 0;
4406 + iter->key = KEY_MAX;
4407
4408 return SEQ_START_TOKEN;
4409 }
4410 @@ -2516,7 +2513,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4411 {
4412 struct fib_route_iter *iter = seq->private;
4413 struct key_vector *l = NULL;
4414 - t_key key = iter->key;
4415 + t_key key = iter->key + 1;
4416
4417 ++*pos;
4418
4419 @@ -2525,7 +2522,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4420 l = leaf_walk_rcu(&iter->tnode, key);
4421
4422 if (l) {
4423 - iter->key = l->key + 1;
4424 + iter->key = l->key;
4425 iter->pos++;
4426 } else {
4427 iter->pos = 0;
4428 diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
4429 index 38abe70e595f..48734ee6293f 100644
4430 --- a/net/ipv4/icmp.c
4431 +++ b/net/ipv4/icmp.c
4432 @@ -477,7 +477,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
4433 fl4->flowi4_proto = IPPROTO_ICMP;
4434 fl4->fl4_icmp_type = type;
4435 fl4->fl4_icmp_code = code;
4436 - fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev);
4437 + fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
4438
4439 security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
4440 rt = __ip_route_output_key_hash(net, fl4,
4441 @@ -502,7 +502,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
4442 if (err)
4443 goto relookup_failed;
4444
4445 - if (inet_addr_type_dev_table(net, skb_in->dev,
4446 + if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev,
4447 fl4_dec.saddr) == RTN_LOCAL) {
4448 rt2 = __ip_route_output_key(net, &fl4_dec);
4449 if (IS_ERR(rt2))
4450 diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
4451 index 8b4ffd216839..9f0a7b96646f 100644
4452 --- a/net/ipv4/ip_forward.c
4453 +++ b/net/ipv4/ip_forward.c
4454 @@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb)
4455 if (opt->is_strictroute && rt->rt_uses_gateway)
4456 goto sr_failed;
4457
4458 - IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
4459 + IPCB(skb)->flags |= IPSKB_FORWARDED;
4460 mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
4461 if (ip_exceeds_mtu(skb, mtu)) {
4462 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
4463 diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
4464 index dde37fb340bf..307daed9a4b9 100644
4465 --- a/net/ipv4/ip_output.c
4466 +++ b/net/ipv4/ip_output.c
4467 @@ -223,11 +223,9 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
4468 struct sk_buff *segs;
4469 int ret = 0;
4470
4471 - /* common case: fragmentation of segments is not allowed,
4472 - * or seglen is <= mtu
4473 + /* common case: seglen is <= mtu
4474 */
4475 - if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) ||
4476 - skb_gso_validate_mtu(skb, mtu))
4477 + if (skb_gso_validate_mtu(skb, mtu))
4478 return ip_finish_output2(net, sk, skb);
4479
4480 /* Slowpath - GSO segment length is exceeding the dst MTU.
4481 diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
4482 index 0f227db0e9ac..afd6b5968caf 100644
4483 --- a/net/ipv4/ip_tunnel_core.c
4484 +++ b/net/ipv4/ip_tunnel_core.c
4485 @@ -63,7 +63,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
4486 int pkt_len = skb->len - skb_inner_network_offset(skb);
4487 struct net *net = dev_net(rt->dst.dev);
4488 struct net_device *dev = skb->dev;
4489 - int skb_iif = skb->skb_iif;
4490 struct iphdr *iph;
4491 int err;
4492
4493 @@ -73,16 +72,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
4494 skb_dst_set(skb, &rt->dst);
4495 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
4496
4497 - if (skb_iif && !(df & htons(IP_DF))) {
4498 - /* Arrived from an ingress interface, got encapsulated, with
4499 - * fragmentation of encapulating frames allowed.
4500 - * If skb is gso, the resulting encapsulated network segments
4501 - * may exceed dst mtu.
4502 - * Allow IP Fragmentation of segments.
4503 - */
4504 - IPCB(skb)->flags |= IPSKB_FRAG_SEGS;
4505 - }
4506 -
4507 /* Push down and install the IP header. */
4508 skb_push(skb, sizeof(struct iphdr));
4509 skb_reset_network_header(skb);
4510 diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
4511 index 5f006e13de56..27089f5ebbb1 100644
4512 --- a/net/ipv4/ipmr.c
4513 +++ b/net/ipv4/ipmr.c
4514 @@ -1749,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
4515 vif->dev->stats.tx_bytes += skb->len;
4516 }
4517
4518 - IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
4519 + IPCB(skb)->flags |= IPSKB_FORWARDED;
4520
4521 /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
4522 * not only before forwarding, but after forwarding on all output
4523 diff --git a/net/ipv4/route.c b/net/ipv4/route.c
4524 index 62c3ed0b7556..2f23ef1a8486 100644
4525 --- a/net/ipv4/route.c
4526 +++ b/net/ipv4/route.c
4527 @@ -753,7 +753,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
4528 goto reject_redirect;
4529 }
4530
4531 - n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
4532 + n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
4533 + if (!n)
4534 + n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
4535 if (!IS_ERR(n)) {
4536 if (!(n->nud_state & NUD_VALID)) {
4537 neigh_event_send(n, NULL);
4538 diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
4539 index ffbb218de520..c876f5ddc86c 100644
4540 --- a/net/ipv4/tcp.c
4541 +++ b/net/ipv4/tcp.c
4542 @@ -1145,7 +1145,7 @@ restart:
4543
4544 err = -EPIPE;
4545 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
4546 - goto out_err;
4547 + goto do_error;
4548
4549 sg = !!(sk->sk_route_caps & NETIF_F_SG);
4550
4551 @@ -1219,7 +1219,7 @@ new_segment:
4552
4553 if (!skb_can_coalesce(skb, i, pfrag->page,
4554 pfrag->offset)) {
4555 - if (i == sysctl_max_skb_frags || !sg) {
4556 + if (i >= sysctl_max_skb_frags || !sg) {
4557 tcp_mark_push(tp, skb);
4558 goto new_segment;
4559 }
4560 diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
4561 index 10d728b6804c..ab37c6775630 100644
4562 --- a/net/ipv4/tcp_dctcp.c
4563 +++ b/net/ipv4/tcp_dctcp.c
4564 @@ -56,6 +56,7 @@ struct dctcp {
4565 u32 next_seq;
4566 u32 ce_state;
4567 u32 delayed_ack_reserved;
4568 + u32 loss_cwnd;
4569 };
4570
4571 static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */
4572 @@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk)
4573 ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
4574
4575 ca->delayed_ack_reserved = 0;
4576 + ca->loss_cwnd = 0;
4577 ca->ce_state = 0;
4578
4579 dctcp_reset(tp, ca);
4580 @@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk)
4581
4582 static u32 dctcp_ssthresh(struct sock *sk)
4583 {
4584 - const struct dctcp *ca = inet_csk_ca(sk);
4585 + struct dctcp *ca = inet_csk_ca(sk);
4586 struct tcp_sock *tp = tcp_sk(sk);
4587
4588 + ca->loss_cwnd = tp->snd_cwnd;
4589 return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
4590 }
4591
4592 @@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
4593 return 0;
4594 }
4595
4596 +static u32 dctcp_cwnd_undo(struct sock *sk)
4597 +{
4598 + const struct dctcp *ca = inet_csk_ca(sk);
4599 +
4600 + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
4601 +}
4602 +
4603 static struct tcp_congestion_ops dctcp __read_mostly = {
4604 .init = dctcp_init,
4605 .in_ack_event = dctcp_update_alpha,
4606 .cwnd_event = dctcp_cwnd_event,
4607 .ssthresh = dctcp_ssthresh,
4608 .cong_avoid = tcp_reno_cong_avoid,
4609 + .undo_cwnd = dctcp_cwnd_undo,
4610 .set_state = dctcp_state,
4611 .get_info = dctcp_get_info,
4612 .flags = TCP_CONG_NEEDS_ECN,
4613 diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
4614 index 7158d4f8dae4..7b235fa12903 100644
4615 --- a/net/ipv4/tcp_ipv4.c
4616 +++ b/net/ipv4/tcp_ipv4.c
4617 @@ -1537,6 +1537,21 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
4618 }
4619 EXPORT_SYMBOL(tcp_prequeue);
4620
4621 +int tcp_filter(struct sock *sk, struct sk_buff *skb)
4622 +{
4623 + struct tcphdr *th = (struct tcphdr *)skb->data;
4624 + unsigned int eaten = skb->len;
4625 + int err;
4626 +
4627 + err = sk_filter_trim_cap(sk, skb, th->doff * 4);
4628 + if (!err) {
4629 + eaten -= skb->len;
4630 + TCP_SKB_CB(skb)->end_seq -= eaten;
4631 + }
4632 + return err;
4633 +}
4634 +EXPORT_SYMBOL(tcp_filter);
4635 +
4636 /*
4637 * From tcp_input.c
4638 */
4639 @@ -1648,8 +1663,10 @@ process:
4640
4641 nf_reset(skb);
4642
4643 - if (sk_filter(sk, skb))
4644 + if (tcp_filter(sk, skb))
4645 goto discard_and_relse;
4646 + th = (const struct tcphdr *)skb->data;
4647 + iph = ip_hdr(skb);
4648
4649 skb->dev = NULL;
4650
4651 diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
4652 index bd59c343d35f..7370ad2e693a 100644
4653 --- a/net/ipv6/icmp.c
4654 +++ b/net/ipv6/icmp.c
4655 @@ -448,7 +448,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
4656 if (__ipv6_addr_needs_scope_id(addr_type))
4657 iif = skb->dev->ifindex;
4658 else
4659 - iif = l3mdev_master_ifindex(skb->dev);
4660 + iif = l3mdev_master_ifindex(skb_dst(skb)->dev);
4661
4662 /*
4663 * Must not send error if the source does not uniquely
4664 diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
4665 index fc67822c42e0..af6a09efad5b 100644
4666 --- a/net/ipv6/tcp_ipv6.c
4667 +++ b/net/ipv6/tcp_ipv6.c
4668 @@ -1228,7 +1228,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
4669 if (skb->protocol == htons(ETH_P_IP))
4670 return tcp_v4_do_rcv(sk, skb);
4671
4672 - if (sk_filter(sk, skb))
4673 + if (tcp_filter(sk, skb))
4674 goto discard;
4675
4676 /*
4677 @@ -1455,8 +1455,10 @@ process:
4678 if (tcp_v6_inbound_md5_hash(sk, skb))
4679 goto discard_and_relse;
4680
4681 - if (sk_filter(sk, skb))
4682 + if (tcp_filter(sk, skb))
4683 goto discard_and_relse;
4684 + th = (const struct tcphdr *)skb->data;
4685 + hdr = ipv6_hdr(skb);
4686
4687 skb->dev = NULL;
4688
4689 diff --git a/net/sctp/socket.c b/net/sctp/socket.c
4690 index baccbf3c1c60..7b0e059bf13b 100644
4691 --- a/net/sctp/socket.c
4692 +++ b/net/sctp/socket.c
4693 @@ -1214,9 +1214,12 @@ static int __sctp_connect(struct sock *sk,
4694
4695 timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK);
4696
4697 - err = sctp_wait_for_connect(asoc, &timeo);
4698 - if ((err == 0 || err == -EINPROGRESS) && assoc_id)
4699 + if (assoc_id)
4700 *assoc_id = asoc->assoc_id;
4701 + err = sctp_wait_for_connect(asoc, &timeo);
4702 + /* Note: the asoc may be freed after the return of
4703 + * sctp_wait_for_connect.
4704 + */
4705
4706 /* Don't free association on exit. */
4707 asoc = NULL;
4708 @@ -4278,19 +4281,18 @@ static void sctp_shutdown(struct sock *sk, int how)
4709 {
4710 struct net *net = sock_net(sk);
4711 struct sctp_endpoint *ep;
4712 - struct sctp_association *asoc;
4713
4714 if (!sctp_style(sk, TCP))
4715 return;
4716
4717 - if (how & SEND_SHUTDOWN) {
4718 + ep = sctp_sk(sk)->ep;
4719 + if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) {
4720 + struct sctp_association *asoc;
4721 +
4722 sk->sk_state = SCTP_SS_CLOSING;
4723 - ep = sctp_sk(sk)->ep;
4724 - if (!list_empty(&ep->asocs)) {
4725 - asoc = list_entry(ep->asocs.next,
4726 - struct sctp_association, asocs);
4727 - sctp_primitive_SHUTDOWN(net, asoc, NULL);
4728 - }
4729 + asoc = list_entry(ep->asocs.next,
4730 + struct sctp_association, asocs);
4731 + sctp_primitive_SHUTDOWN(net, asoc, NULL);
4732 }
4733 }
4734
4735 diff --git a/net/socket.c b/net/socket.c
4736 index a1bd16106625..03bc2c289c94 100644
4737 --- a/net/socket.c
4738 +++ b/net/socket.c
4739 @@ -2041,6 +2041,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
4740 if (err)
4741 break;
4742 ++datagrams;
4743 + if (msg_data_left(&msg_sys))
4744 + break;
4745 cond_resched();
4746 }
4747
4748 diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c
4749 index f3825b676e38..f046b77cfefe 100644
4750 --- a/tools/spi/spidev_test.c
4751 +++ b/tools/spi/spidev_test.c
4752 @@ -19,6 +19,7 @@
4753 #include <getopt.h>
4754 #include <fcntl.h>
4755 #include <sys/ioctl.h>
4756 +#include <linux/ioctl.h>
4757 #include <sys/stat.h>
4758 #include <linux/types.h>
4759 #include <linux/spi/spidev.h>