Magellan Linux

Annotation of /trunk/kernel-magellan/patches-4.8/0109-4.8.10-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2849 - (hide annotations) (download)
Tue Nov 22 13:19:55 2016 UTC (7 years, 6 months ago) by niro
File size: 141674 byte(s)
-linux-4.8.10
1 niro 2849 diff --git a/Makefile b/Makefile
2     index c1519ab85258..7cf2b4985703 100644
3     --- a/Makefile
4     +++ b/Makefile
5     @@ -1,6 +1,6 @@
6     VERSION = 4
7     PATCHLEVEL = 8
8     -SUBLEVEL = 9
9     +SUBLEVEL = 10
10     EXTRAVERSION =
11     NAME = Psychotic Stoned Sheep
12    
13     diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
14     index 37a315d0ddd4..a6847fc05a6d 100644
15     --- a/arch/sparc/include/asm/uaccess_64.h
16     +++ b/arch/sparc/include/asm/uaccess_64.h
17     @@ -98,7 +98,6 @@ struct exception_table_entry {
18     unsigned int insn, fixup;
19     };
20    
21     -void __ret_efault(void);
22     void __retl_efault(void);
23    
24     /* Uh, these should become the main single-value transfer routines..
25     @@ -205,55 +204,34 @@ int __get_user_bad(void);
26     unsigned long __must_check ___copy_from_user(void *to,
27     const void __user *from,
28     unsigned long size);
29     -unsigned long copy_from_user_fixup(void *to, const void __user *from,
30     - unsigned long size);
31     static inline unsigned long __must_check
32     copy_from_user(void *to, const void __user *from, unsigned long size)
33     {
34     - unsigned long ret;
35     -
36     check_object_size(to, size, false);
37    
38     - ret = ___copy_from_user(to, from, size);
39     - if (unlikely(ret))
40     - ret = copy_from_user_fixup(to, from, size);
41     -
42     - return ret;
43     + return ___copy_from_user(to, from, size);
44     }
45     #define __copy_from_user copy_from_user
46    
47     unsigned long __must_check ___copy_to_user(void __user *to,
48     const void *from,
49     unsigned long size);
50     -unsigned long copy_to_user_fixup(void __user *to, const void *from,
51     - unsigned long size);
52     static inline unsigned long __must_check
53     copy_to_user(void __user *to, const void *from, unsigned long size)
54     {
55     - unsigned long ret;
56     -
57     check_object_size(from, size, true);
58    
59     - ret = ___copy_to_user(to, from, size);
60     - if (unlikely(ret))
61     - ret = copy_to_user_fixup(to, from, size);
62     - return ret;
63     + return ___copy_to_user(to, from, size);
64     }
65     #define __copy_to_user copy_to_user
66    
67     unsigned long __must_check ___copy_in_user(void __user *to,
68     const void __user *from,
69     unsigned long size);
70     -unsigned long copy_in_user_fixup(void __user *to, void __user *from,
71     - unsigned long size);
72     static inline unsigned long __must_check
73     copy_in_user(void __user *to, void __user *from, unsigned long size)
74     {
75     - unsigned long ret = ___copy_in_user(to, from, size);
76     -
77     - if (unlikely(ret))
78     - ret = copy_in_user_fixup(to, from, size);
79     - return ret;
80     + return ___copy_in_user(to, from, size);
81     }
82     #define __copy_in_user copy_in_user
83    
84     diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
85     index a076b4249e62..5f1f3ae21657 100644
86     --- a/arch/sparc/kernel/head_64.S
87     +++ b/arch/sparc/kernel/head_64.S
88     @@ -922,47 +922,11 @@ prom_tba: .xword 0
89     tlb_type: .word 0 /* Must NOT end up in BSS */
90     .section ".fixup",#alloc,#execinstr
91    
92     - .globl __ret_efault, __retl_efault, __ret_one, __retl_one
93     -ENTRY(__ret_efault)
94     - ret
95     - restore %g0, -EFAULT, %o0
96     -ENDPROC(__ret_efault)
97     -
98     ENTRY(__retl_efault)
99     retl
100     mov -EFAULT, %o0
101     ENDPROC(__retl_efault)
102    
103     -ENTRY(__retl_one)
104     - retl
105     - mov 1, %o0
106     -ENDPROC(__retl_one)
107     -
108     -ENTRY(__retl_one_fp)
109     - VISExitHalf
110     - retl
111     - mov 1, %o0
112     -ENDPROC(__retl_one_fp)
113     -
114     -ENTRY(__ret_one_asi)
115     - wr %g0, ASI_AIUS, %asi
116     - ret
117     - restore %g0, 1, %o0
118     -ENDPROC(__ret_one_asi)
119     -
120     -ENTRY(__retl_one_asi)
121     - wr %g0, ASI_AIUS, %asi
122     - retl
123     - mov 1, %o0
124     -ENDPROC(__retl_one_asi)
125     -
126     -ENTRY(__retl_one_asi_fp)
127     - wr %g0, ASI_AIUS, %asi
128     - VISExitHalf
129     - retl
130     - mov 1, %o0
131     -ENDPROC(__retl_one_asi_fp)
132     -
133     ENTRY(__retl_o1)
134     retl
135     mov %o1, %o0
136     diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
137     index 59bbeff55024..07933b9e9ce0 100644
138     --- a/arch/sparc/kernel/jump_label.c
139     +++ b/arch/sparc/kernel/jump_label.c
140     @@ -13,19 +13,30 @@
141     void arch_jump_label_transform(struct jump_entry *entry,
142     enum jump_label_type type)
143     {
144     - u32 val;
145     u32 *insn = (u32 *) (unsigned long) entry->code;
146     + u32 val;
147    
148     if (type == JUMP_LABEL_JMP) {
149     s32 off = (s32)entry->target - (s32)entry->code;
150     + bool use_v9_branch = false;
151     +
152     + BUG_ON(off & 3);
153    
154     #ifdef CONFIG_SPARC64
155     - /* ba,pt %xcc, . + (off << 2) */
156     - val = 0x10680000 | ((u32) off >> 2);
157     -#else
158     - /* ba . + (off << 2) */
159     - val = 0x10800000 | ((u32) off >> 2);
160     + if (off <= 0xfffff && off >= -0x100000)
161     + use_v9_branch = true;
162     #endif
163     + if (use_v9_branch) {
164     + /* WDISP19 - target is . + immed << 2 */
165     + /* ba,pt %xcc, . + off */
166     + val = 0x10680000 | (((u32) off >> 2) & 0x7ffff);
167     + } else {
168     + /* WDISP22 - target is . + immed << 2 */
169     + BUG_ON(off > 0x7fffff);
170     + BUG_ON(off < -0x800000);
171     + /* ba . + off */
172     + val = 0x10800000 | (((u32) off >> 2) & 0x3fffff);
173     + }
174     } else {
175     val = 0x01000000;
176     }
177     diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c
178     index 9e034f29dcc5..20ffb052fe38 100644
179     --- a/arch/sparc/kernel/sparc_ksyms_64.c
180     +++ b/arch/sparc/kernel/sparc_ksyms_64.c
181     @@ -27,7 +27,6 @@ EXPORT_SYMBOL(__flushw_user);
182     EXPORT_SYMBOL_GPL(real_hard_smp_processor_id);
183    
184     /* from head_64.S */
185     -EXPORT_SYMBOL(__ret_efault);
186     EXPORT_SYMBOL(tlb_type);
187     EXPORT_SYMBOL(sun4v_chip_type);
188     EXPORT_SYMBOL(prom_root_node);
189     diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S
190     index b7d0bd6b1406..69a439fa2fc1 100644
191     --- a/arch/sparc/lib/GENcopy_from_user.S
192     +++ b/arch/sparc/lib/GENcopy_from_user.S
193     @@ -3,11 +3,11 @@
194     * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
195     */
196    
197     -#define EX_LD(x) \
198     +#define EX_LD(x,y) \
199     98: x; \
200     .section __ex_table,"a";\
201     .align 4; \
202     - .word 98b, __retl_one; \
203     + .word 98b, y; \
204     .text; \
205     .align 4;
206    
207     diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S
208     index 780550e1afc7..9947427ce354 100644
209     --- a/arch/sparc/lib/GENcopy_to_user.S
210     +++ b/arch/sparc/lib/GENcopy_to_user.S
211     @@ -3,11 +3,11 @@
212     * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
213     */
214    
215     -#define EX_ST(x) \
216     +#define EX_ST(x,y) \
217     98: x; \
218     .section __ex_table,"a";\
219     .align 4; \
220     - .word 98b, __retl_one; \
221     + .word 98b, y; \
222     .text; \
223     .align 4;
224    
225     diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S
226     index 89358ee94851..059ea24ad73d 100644
227     --- a/arch/sparc/lib/GENmemcpy.S
228     +++ b/arch/sparc/lib/GENmemcpy.S
229     @@ -4,21 +4,18 @@
230     */
231    
232     #ifdef __KERNEL__
233     +#include <linux/linkage.h>
234     #define GLOBAL_SPARE %g7
235     #else
236     #define GLOBAL_SPARE %g5
237     #endif
238    
239     #ifndef EX_LD
240     -#define EX_LD(x) x
241     +#define EX_LD(x,y) x
242     #endif
243    
244     #ifndef EX_ST
245     -#define EX_ST(x) x
246     -#endif
247     -
248     -#ifndef EX_RETVAL
249     -#define EX_RETVAL(x) x
250     +#define EX_ST(x,y) x
251     #endif
252    
253     #ifndef LOAD
254     @@ -45,6 +42,29 @@
255     .register %g3,#scratch
256    
257     .text
258     +
259     +#ifndef EX_RETVAL
260     +#define EX_RETVAL(x) x
261     +ENTRY(GEN_retl_o4_1)
262     + add %o4, %o2, %o4
263     + retl
264     + add %o4, 1, %o0
265     +ENDPROC(GEN_retl_o4_1)
266     +ENTRY(GEN_retl_g1_8)
267     + add %g1, %o2, %g1
268     + retl
269     + add %g1, 8, %o0
270     +ENDPROC(GEN_retl_g1_8)
271     +ENTRY(GEN_retl_o2_4)
272     + retl
273     + add %o2, 4, %o0
274     +ENDPROC(GEN_retl_o2_4)
275     +ENTRY(GEN_retl_o2_1)
276     + retl
277     + add %o2, 1, %o0
278     +ENDPROC(GEN_retl_o2_1)
279     +#endif
280     +
281     .align 64
282    
283     .globl FUNC_NAME
284     @@ -73,8 +93,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
285     sub %g0, %o4, %o4
286     sub %o2, %o4, %o2
287     1: subcc %o4, 1, %o4
288     - EX_LD(LOAD(ldub, %o1, %g1))
289     - EX_ST(STORE(stb, %g1, %o0))
290     + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1)
291     + EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1)
292     add %o1, 1, %o1
293     bne,pt %XCC, 1b
294     add %o0, 1, %o0
295     @@ -82,8 +102,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
296     andn %o2, 0x7, %g1
297     sub %o2, %g1, %o2
298     1: subcc %g1, 0x8, %g1
299     - EX_LD(LOAD(ldx, %o1, %g2))
300     - EX_ST(STORE(stx, %g2, %o0))
301     + EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8)
302     + EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8)
303     add %o1, 0x8, %o1
304     bne,pt %XCC, 1b
305     add %o0, 0x8, %o0
306     @@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
307    
308     1:
309     subcc %o2, 4, %o2
310     - EX_LD(LOAD(lduw, %o1, %g1))
311     - EX_ST(STORE(stw, %g1, %o1 + %o3))
312     + EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4)
313     + EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4)
314     bgu,pt %XCC, 1b
315     add %o1, 4, %o1
316    
317     @@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
318     .align 32
319     90:
320     subcc %o2, 1, %o2
321     - EX_LD(LOAD(ldub, %o1, %g1))
322     - EX_ST(STORE(stb, %g1, %o1 + %o3))
323     + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1)
324     + EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1)
325     bgu,pt %XCC, 90b
326     add %o1, 1, %o1
327     retl
328     diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
329     index 3269b0234093..4f2384a4286a 100644
330     --- a/arch/sparc/lib/Makefile
331     +++ b/arch/sparc/lib/Makefile
332     @@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o
333     lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
334     lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
335    
336     -lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o
337     +lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o
338     lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
339    
340     obj-$(CONFIG_SPARC64) += iomap.o
341     diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S
342     index d5242b8c4f94..b79a6998d87c 100644
343     --- a/arch/sparc/lib/NG2copy_from_user.S
344     +++ b/arch/sparc/lib/NG2copy_from_user.S
345     @@ -3,19 +3,19 @@
346     * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
347     */
348    
349     -#define EX_LD(x) \
350     +#define EX_LD(x,y) \
351     98: x; \
352     .section __ex_table,"a";\
353     .align 4; \
354     - .word 98b, __retl_one_asi;\
355     + .word 98b, y; \
356     .text; \
357     .align 4;
358    
359     -#define EX_LD_FP(x) \
360     +#define EX_LD_FP(x,y) \
361     98: x; \
362     .section __ex_table,"a";\
363     .align 4; \
364     - .word 98b, __retl_one_asi_fp;\
365     + .word 98b, y##_fp; \
366     .text; \
367     .align 4;
368    
369     diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S
370     index 4e962d993b10..dcec55f254ab 100644
371     --- a/arch/sparc/lib/NG2copy_to_user.S
372     +++ b/arch/sparc/lib/NG2copy_to_user.S
373     @@ -3,19 +3,19 @@
374     * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
375     */
376    
377     -#define EX_ST(x) \
378     +#define EX_ST(x,y) \
379     98: x; \
380     .section __ex_table,"a";\
381     .align 4; \
382     - .word 98b, __retl_one_asi;\
383     + .word 98b, y; \
384     .text; \
385     .align 4;
386    
387     -#define EX_ST_FP(x) \
388     +#define EX_ST_FP(x,y) \
389     98: x; \
390     .section __ex_table,"a";\
391     .align 4; \
392     - .word 98b, __retl_one_asi_fp;\
393     + .word 98b, y##_fp; \
394     .text; \
395     .align 4;
396    
397     diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S
398     index d5f585df2f3f..c629dbd121b6 100644
399     --- a/arch/sparc/lib/NG2memcpy.S
400     +++ b/arch/sparc/lib/NG2memcpy.S
401     @@ -4,6 +4,7 @@
402     */
403    
404     #ifdef __KERNEL__
405     +#include <linux/linkage.h>
406     #include <asm/visasm.h>
407     #include <asm/asi.h>
408     #define GLOBAL_SPARE %g7
409     @@ -32,21 +33,17 @@
410     #endif
411    
412     #ifndef EX_LD
413     -#define EX_LD(x) x
414     +#define EX_LD(x,y) x
415     #endif
416     #ifndef EX_LD_FP
417     -#define EX_LD_FP(x) x
418     +#define EX_LD_FP(x,y) x
419     #endif
420    
421     #ifndef EX_ST
422     -#define EX_ST(x) x
423     +#define EX_ST(x,y) x
424     #endif
425     #ifndef EX_ST_FP
426     -#define EX_ST_FP(x) x
427     -#endif
428     -
429     -#ifndef EX_RETVAL
430     -#define EX_RETVAL(x) x
431     +#define EX_ST_FP(x,y) x
432     #endif
433    
434     #ifndef LOAD
435     @@ -140,45 +137,110 @@
436     fsrc2 %x6, %f12; \
437     fsrc2 %x7, %f14;
438     #define FREG_LOAD_1(base, x0) \
439     - EX_LD_FP(LOAD(ldd, base + 0x00, %x0))
440     + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1)
441     #define FREG_LOAD_2(base, x0, x1) \
442     - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
443     - EX_LD_FP(LOAD(ldd, base + 0x08, %x1));
444     + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
445     + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1);
446     #define FREG_LOAD_3(base, x0, x1, x2) \
447     - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
448     - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
449     - EX_LD_FP(LOAD(ldd, base + 0x10, %x2));
450     + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
451     + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
452     + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1);
453     #define FREG_LOAD_4(base, x0, x1, x2, x3) \
454     - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
455     - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
456     - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
457     - EX_LD_FP(LOAD(ldd, base + 0x18, %x3));
458     + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
459     + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
460     + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
461     + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1);
462     #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
463     - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
464     - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
465     - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
466     - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
467     - EX_LD_FP(LOAD(ldd, base + 0x20, %x4));
468     + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
469     + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
470     + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
471     + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
472     + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1);
473     #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
474     - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
475     - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
476     - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
477     - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
478     - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
479     - EX_LD_FP(LOAD(ldd, base + 0x28, %x5));
480     + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
481     + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
482     + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
483     + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
484     + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
485     + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1);
486     #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
487     - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
488     - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
489     - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
490     - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
491     - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
492     - EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \
493     - EX_LD_FP(LOAD(ldd, base + 0x30, %x6));
494     + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
495     + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
496     + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
497     + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
498     + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
499     + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \
500     + EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1);
501    
502     .register %g2,#scratch
503     .register %g3,#scratch
504    
505     .text
506     +#ifndef EX_RETVAL
507     +#define EX_RETVAL(x) x
508     +__restore_fp:
509     + VISExitHalf
510     +__restore_asi:
511     + retl
512     + wr %g0, ASI_AIUS, %asi
513     +ENTRY(NG2_retl_o2)
514     + ba,pt %xcc, __restore_asi
515     + mov %o2, %o0
516     +ENDPROC(NG2_retl_o2)
517     +ENTRY(NG2_retl_o2_plus_1)
518     + ba,pt %xcc, __restore_asi
519     + add %o2, 1, %o0
520     +ENDPROC(NG2_retl_o2_plus_1)
521     +ENTRY(NG2_retl_o2_plus_4)
522     + ba,pt %xcc, __restore_asi
523     + add %o2, 4, %o0
524     +ENDPROC(NG2_retl_o2_plus_4)
525     +ENTRY(NG2_retl_o2_plus_8)
526     + ba,pt %xcc, __restore_asi
527     + add %o2, 8, %o0
528     +ENDPROC(NG2_retl_o2_plus_8)
529     +ENTRY(NG2_retl_o2_plus_o4_plus_1)
530     + add %o4, 1, %o4
531     + ba,pt %xcc, __restore_asi
532     + add %o2, %o4, %o0
533     +ENDPROC(NG2_retl_o2_plus_o4_plus_1)
534     +ENTRY(NG2_retl_o2_plus_o4_plus_8)
535     + add %o4, 8, %o4
536     + ba,pt %xcc, __restore_asi
537     + add %o2, %o4, %o0
538     +ENDPROC(NG2_retl_o2_plus_o4_plus_8)
539     +ENTRY(NG2_retl_o2_plus_o4_plus_16)
540     + add %o4, 16, %o4
541     + ba,pt %xcc, __restore_asi
542     + add %o2, %o4, %o0
543     +ENDPROC(NG2_retl_o2_plus_o4_plus_16)
544     +ENTRY(NG2_retl_o2_plus_g1_fp)
545     + ba,pt %xcc, __restore_fp
546     + add %o2, %g1, %o0
547     +ENDPROC(NG2_retl_o2_plus_g1_fp)
548     +ENTRY(NG2_retl_o2_plus_g1_plus_64_fp)
549     + add %g1, 64, %g1
550     + ba,pt %xcc, __restore_fp
551     + add %o2, %g1, %o0
552     +ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp)
553     +ENTRY(NG2_retl_o2_plus_g1_plus_1)
554     + add %g1, 1, %g1
555     + ba,pt %xcc, __restore_asi
556     + add %o2, %g1, %o0
557     +ENDPROC(NG2_retl_o2_plus_g1_plus_1)
558     +ENTRY(NG2_retl_o2_and_7_plus_o4)
559     + and %o2, 7, %o2
560     + ba,pt %xcc, __restore_asi
561     + add %o2, %o4, %o0
562     +ENDPROC(NG2_retl_o2_and_7_plus_o4)
563     +ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8)
564     + and %o2, 7, %o2
565     + add %o4, 8, %o4
566     + ba,pt %xcc, __restore_asi
567     + add %o2, %o4, %o0
568     +ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8)
569     +#endif
570     +
571     .align 64
572    
573     .globl FUNC_NAME
574     @@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
575     sub %g0, %o4, %o4 ! bytes to align dst
576     sub %o2, %o4, %o2
577     1: subcc %o4, 1, %o4
578     - EX_LD(LOAD(ldub, %o1, %g1))
579     - EX_ST(STORE(stb, %g1, %o0))
580     + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1)
581     + EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1)
582     add %o1, 1, %o1
583     bne,pt %XCC, 1b
584     add %o0, 1, %o0
585     @@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
586     nop
587     /* fall through for 0 < low bits < 8 */
588     110: sub %o4, 64, %g2
589     - EX_LD_FP(LOAD_BLK(%g2, %f0))
590     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
591     - EX_LD_FP(LOAD_BLK(%o4, %f16))
592     + EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1)
593     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
594     + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
595     FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
596     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
597     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
598     FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
599     subcc %g1, 64, %g1
600     add %o4, 64, %o4
601     @@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
602    
603     120: sub %o4, 56, %g2
604     FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
605     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
606     - EX_LD_FP(LOAD_BLK(%o4, %f16))
607     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
608     + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
609     FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
610     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
611     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
612     FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
613     subcc %g1, 64, %g1
614     add %o4, 64, %o4
615     @@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
616    
617     130: sub %o4, 48, %g2
618     FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
619     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
620     - EX_LD_FP(LOAD_BLK(%o4, %f16))
621     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
622     + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
623     FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
624     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
625     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
626     FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
627     subcc %g1, 64, %g1
628     add %o4, 64, %o4
629     @@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
630    
631     140: sub %o4, 40, %g2
632     FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
633     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
634     - EX_LD_FP(LOAD_BLK(%o4, %f16))
635     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
636     + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
637     FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
638     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
639     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
640     FREG_MOVE_5(f22, f24, f26, f28, f30)
641     subcc %g1, 64, %g1
642     add %o4, 64, %o4
643     @@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
644    
645     150: sub %o4, 32, %g2
646     FREG_LOAD_4(%g2, f0, f2, f4, f6)
647     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
648     - EX_LD_FP(LOAD_BLK(%o4, %f16))
649     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
650     + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
651     FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
652     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
653     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
654     FREG_MOVE_4(f24, f26, f28, f30)
655     subcc %g1, 64, %g1
656     add %o4, 64, %o4
657     @@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
658    
659     160: sub %o4, 24, %g2
660     FREG_LOAD_3(%g2, f0, f2, f4)
661     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
662     - EX_LD_FP(LOAD_BLK(%o4, %f16))
663     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
664     + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
665     FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
666     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
667     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
668     FREG_MOVE_3(f26, f28, f30)
669     subcc %g1, 64, %g1
670     add %o4, 64, %o4
671     @@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
672    
673     170: sub %o4, 16, %g2
674     FREG_LOAD_2(%g2, f0, f2)
675     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
676     - EX_LD_FP(LOAD_BLK(%o4, %f16))
677     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
678     + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
679     FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
680     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
681     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
682     FREG_MOVE_2(f28, f30)
683     subcc %g1, 64, %g1
684     add %o4, 64, %o4
685     @@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
686    
687     180: sub %o4, 8, %g2
688     FREG_LOAD_1(%g2, f0)
689     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
690     - EX_LD_FP(LOAD_BLK(%o4, %f16))
691     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
692     + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
693     FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
694     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
695     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
696     FREG_MOVE_1(f30)
697     subcc %g1, 64, %g1
698     add %o4, 64, %o4
699     @@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
700     nop
701    
702     190:
703     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
704     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
705     subcc %g1, 64, %g1
706     - EX_LD_FP(LOAD_BLK(%o4, %f0))
707     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
708     + EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64)
709     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64)
710     add %o4, 64, %o4
711     bne,pt %xcc, 1b
712     LOAD(prefetch, %o4 + 64, #one_read)
713     @@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
714     andn %o2, 0xf, %o4
715     and %o2, 0xf, %o2
716     1: subcc %o4, 0x10, %o4
717     - EX_LD(LOAD(ldx, %o1, %o5))
718     + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16)
719     add %o1, 0x08, %o1
720     - EX_LD(LOAD(ldx, %o1, %g1))
721     + EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16)
722     sub %o1, 0x08, %o1
723     - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
724     + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16)
725     add %o1, 0x8, %o1
726     - EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE))
727     + EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8)
728     bgu,pt %XCC, 1b
729     add %o1, 0x8, %o1
730     73: andcc %o2, 0x8, %g0
731     be,pt %XCC, 1f
732     nop
733     sub %o2, 0x8, %o2
734     - EX_LD(LOAD(ldx, %o1, %o5))
735     - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
736     + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8)
737     + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8)
738     add %o1, 0x8, %o1
739     1: andcc %o2, 0x4, %g0
740     be,pt %XCC, 1f
741     nop
742     sub %o2, 0x4, %o2
743     - EX_LD(LOAD(lduw, %o1, %o5))
744     - EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE))
745     + EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4)
746     + EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
747     add %o1, 0x4, %o1
748     1: cmp %o2, 0
749     be,pt %XCC, 85f
750     @@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
751     sub %o2, %g1, %o2
752    
753     1: subcc %g1, 1, %g1
754     - EX_LD(LOAD(ldub, %o1, %o5))
755     - EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE))
756     + EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1)
757     + EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1)
758     bgu,pt %icc, 1b
759     add %o1, 1, %o1
760    
761     @@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
762    
763     8: mov 64, GLOBAL_SPARE
764     andn %o1, 0x7, %o1
765     - EX_LD(LOAD(ldx, %o1, %g2))
766     + EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2)
767     sub GLOBAL_SPARE, %g1, GLOBAL_SPARE
768     andn %o2, 0x7, %o4
769     sllx %g2, %g1, %g2
770     1: add %o1, 0x8, %o1
771     - EX_LD(LOAD(ldx, %o1, %g3))
772     + EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4)
773     subcc %o4, 0x8, %o4
774     srlx %g3, GLOBAL_SPARE, %o5
775     or %o5, %g2, %o5
776     - EX_ST(STORE(stx, %o5, %o0))
777     + EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8)
778     add %o0, 0x8, %o0
779     bgu,pt %icc, 1b
780     sllx %g3, %g1, %g2
781     @@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
782    
783     1:
784     subcc %o2, 4, %o2
785     - EX_LD(LOAD(lduw, %o1, %g1))
786     - EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE))
787     + EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4)
788     + EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
789     bgu,pt %XCC, 1b
790     add %o1, 4, %o1
791    
792     @@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
793     .align 32
794     90:
795     subcc %o2, 1, %o2
796     - EX_LD(LOAD(ldub, %o1, %g1))
797     - EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE))
798     + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1)
799     + EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1)
800     bgu,pt %XCC, 90b
801     add %o1, 1, %o1
802     retl
803     diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S
804     index 2e8ee7ad07a9..16a286c1a528 100644
805     --- a/arch/sparc/lib/NG4copy_from_user.S
806     +++ b/arch/sparc/lib/NG4copy_from_user.S
807     @@ -3,19 +3,19 @@
808     * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
809     */
810    
811     -#define EX_LD(x) \
812     +#define EX_LD(x, y) \
813     98: x; \
814     .section __ex_table,"a";\
815     .align 4; \
816     - .word 98b, __retl_one_asi;\
817     + .word 98b, y; \
818     .text; \
819     .align 4;
820    
821     -#define EX_LD_FP(x) \
822     +#define EX_LD_FP(x,y) \
823     98: x; \
824     .section __ex_table,"a";\
825     .align 4; \
826     - .word 98b, __retl_one_asi_fp;\
827     + .word 98b, y##_fp; \
828     .text; \
829     .align 4;
830    
831     diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S
832     index be0bf4590df8..6b0276ffc858 100644
833     --- a/arch/sparc/lib/NG4copy_to_user.S
834     +++ b/arch/sparc/lib/NG4copy_to_user.S
835     @@ -3,19 +3,19 @@
836     * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
837     */
838    
839     -#define EX_ST(x) \
840     +#define EX_ST(x,y) \
841     98: x; \
842     .section __ex_table,"a";\
843     .align 4; \
844     - .word 98b, __retl_one_asi;\
845     + .word 98b, y; \
846     .text; \
847     .align 4;
848    
849     -#define EX_ST_FP(x) \
850     +#define EX_ST_FP(x,y) \
851     98: x; \
852     .section __ex_table,"a";\
853     .align 4; \
854     - .word 98b, __retl_one_asi_fp;\
855     + .word 98b, y##_fp; \
856     .text; \
857     .align 4;
858    
859     diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
860     index 8e13ee1f4454..75bb93b1437f 100644
861     --- a/arch/sparc/lib/NG4memcpy.S
862     +++ b/arch/sparc/lib/NG4memcpy.S
863     @@ -4,6 +4,7 @@
864     */
865    
866     #ifdef __KERNEL__
867     +#include <linux/linkage.h>
868     #include <asm/visasm.h>
869     #include <asm/asi.h>
870     #define GLOBAL_SPARE %g7
871     @@ -46,22 +47,19 @@
872     #endif
873    
874     #ifndef EX_LD
875     -#define EX_LD(x) x
876     +#define EX_LD(x,y) x
877     #endif
878     #ifndef EX_LD_FP
879     -#define EX_LD_FP(x) x
880     +#define EX_LD_FP(x,y) x
881     #endif
882    
883     #ifndef EX_ST
884     -#define EX_ST(x) x
885     +#define EX_ST(x,y) x
886     #endif
887     #ifndef EX_ST_FP
888     -#define EX_ST_FP(x) x
889     +#define EX_ST_FP(x,y) x
890     #endif
891    
892     -#ifndef EX_RETVAL
893     -#define EX_RETVAL(x) x
894     -#endif
895    
896     #ifndef LOAD
897     #define LOAD(type,addr,dest) type [addr], dest
898     @@ -94,6 +92,158 @@
899     .register %g3,#scratch
900    
901     .text
902     +#ifndef EX_RETVAL
903     +#define EX_RETVAL(x) x
904     +__restore_asi_fp:
905     + VISExitHalf
906     +__restore_asi:
907     + retl
908     + wr %g0, ASI_AIUS, %asi
909     +
910     +ENTRY(NG4_retl_o2)
911     + ba,pt %xcc, __restore_asi
912     + mov %o2, %o0
913     +ENDPROC(NG4_retl_o2)
914     +ENTRY(NG4_retl_o2_plus_1)
915     + ba,pt %xcc, __restore_asi
916     + add %o2, 1, %o0
917     +ENDPROC(NG4_retl_o2_plus_1)
918     +ENTRY(NG4_retl_o2_plus_4)
919     + ba,pt %xcc, __restore_asi
920     + add %o2, 4, %o0
921     +ENDPROC(NG4_retl_o2_plus_4)
922     +ENTRY(NG4_retl_o2_plus_o5)
923     + ba,pt %xcc, __restore_asi
924     + add %o2, %o5, %o0
925     +ENDPROC(NG4_retl_o2_plus_o5)
926     +ENTRY(NG4_retl_o2_plus_o5_plus_4)
927     + add %o5, 4, %o5
928     + ba,pt %xcc, __restore_asi
929     + add %o2, %o5, %o0
930     +ENDPROC(NG4_retl_o2_plus_o5_plus_4)
931     +ENTRY(NG4_retl_o2_plus_o5_plus_8)
932     + add %o5, 8, %o5
933     + ba,pt %xcc, __restore_asi
934     + add %o2, %o5, %o0
935     +ENDPROC(NG4_retl_o2_plus_o5_plus_8)
936     +ENTRY(NG4_retl_o2_plus_o5_plus_16)
937     + add %o5, 16, %o5
938     + ba,pt %xcc, __restore_asi
939     + add %o2, %o5, %o0
940     +ENDPROC(NG4_retl_o2_plus_o5_plus_16)
941     +ENTRY(NG4_retl_o2_plus_o5_plus_24)
942     + add %o5, 24, %o5
943     + ba,pt %xcc, __restore_asi
944     + add %o2, %o5, %o0
945     +ENDPROC(NG4_retl_o2_plus_o5_plus_24)
946     +ENTRY(NG4_retl_o2_plus_o5_plus_32)
947     + add %o5, 32, %o5
948     + ba,pt %xcc, __restore_asi
949     + add %o2, %o5, %o0
950     +ENDPROC(NG4_retl_o2_plus_o5_plus_32)
951     +ENTRY(NG4_retl_o2_plus_g1)
952     + ba,pt %xcc, __restore_asi
953     + add %o2, %g1, %o0
954     +ENDPROC(NG4_retl_o2_plus_g1)
955     +ENTRY(NG4_retl_o2_plus_g1_plus_1)
956     + add %g1, 1, %g1
957     + ba,pt %xcc, __restore_asi
958     + add %o2, %g1, %o0
959     +ENDPROC(NG4_retl_o2_plus_g1_plus_1)
960     +ENTRY(NG4_retl_o2_plus_g1_plus_8)
961     + add %g1, 8, %g1
962     + ba,pt %xcc, __restore_asi
963     + add %o2, %g1, %o0
964     +ENDPROC(NG4_retl_o2_plus_g1_plus_8)
965     +ENTRY(NG4_retl_o2_plus_o4)
966     + ba,pt %xcc, __restore_asi
967     + add %o2, %o4, %o0
968     +ENDPROC(NG4_retl_o2_plus_o4)
969     +ENTRY(NG4_retl_o2_plus_o4_plus_8)
970     + add %o4, 8, %o4
971     + ba,pt %xcc, __restore_asi
972     + add %o2, %o4, %o0
973     +ENDPROC(NG4_retl_o2_plus_o4_plus_8)
974     +ENTRY(NG4_retl_o2_plus_o4_plus_16)
975     + add %o4, 16, %o4
976     + ba,pt %xcc, __restore_asi
977     + add %o2, %o4, %o0
978     +ENDPROC(NG4_retl_o2_plus_o4_plus_16)
979     +ENTRY(NG4_retl_o2_plus_o4_plus_24)
980     + add %o4, 24, %o4
981     + ba,pt %xcc, __restore_asi
982     + add %o2, %o4, %o0
983     +ENDPROC(NG4_retl_o2_plus_o4_plus_24)
984     +ENTRY(NG4_retl_o2_plus_o4_plus_32)
985     + add %o4, 32, %o4
986     + ba,pt %xcc, __restore_asi
987     + add %o2, %o4, %o0
988     +ENDPROC(NG4_retl_o2_plus_o4_plus_32)
989     +ENTRY(NG4_retl_o2_plus_o4_plus_40)
990     + add %o4, 40, %o4
991     + ba,pt %xcc, __restore_asi
992     + add %o2, %o4, %o0
993     +ENDPROC(NG4_retl_o2_plus_o4_plus_40)
994     +ENTRY(NG4_retl_o2_plus_o4_plus_48)
995     + add %o4, 48, %o4
996     + ba,pt %xcc, __restore_asi
997     + add %o2, %o4, %o0
998     +ENDPROC(NG4_retl_o2_plus_o4_plus_48)
999     +ENTRY(NG4_retl_o2_plus_o4_plus_56)
1000     + add %o4, 56, %o4
1001     + ba,pt %xcc, __restore_asi
1002     + add %o2, %o4, %o0
1003     +ENDPROC(NG4_retl_o2_plus_o4_plus_56)
1004     +ENTRY(NG4_retl_o2_plus_o4_plus_64)
1005     + add %o4, 64, %o4
1006     + ba,pt %xcc, __restore_asi
1007     + add %o2, %o4, %o0
1008     +ENDPROC(NG4_retl_o2_plus_o4_plus_64)
1009     +ENTRY(NG4_retl_o2_plus_o4_fp)
1010     + ba,pt %xcc, __restore_asi_fp
1011     + add %o2, %o4, %o0
1012     +ENDPROC(NG4_retl_o2_plus_o4_fp)
1013     +ENTRY(NG4_retl_o2_plus_o4_plus_8_fp)
1014     + add %o4, 8, %o4
1015     + ba,pt %xcc, __restore_asi_fp
1016     + add %o2, %o4, %o0
1017     +ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp)
1018     +ENTRY(NG4_retl_o2_plus_o4_plus_16_fp)
1019     + add %o4, 16, %o4
1020     + ba,pt %xcc, __restore_asi_fp
1021     + add %o2, %o4, %o0
1022     +ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp)
1023     +ENTRY(NG4_retl_o2_plus_o4_plus_24_fp)
1024     + add %o4, 24, %o4
1025     + ba,pt %xcc, __restore_asi_fp
1026     + add %o2, %o4, %o0
1027     +ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp)
1028     +ENTRY(NG4_retl_o2_plus_o4_plus_32_fp)
1029     + add %o4, 32, %o4
1030     + ba,pt %xcc, __restore_asi_fp
1031     + add %o2, %o4, %o0
1032     +ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp)
1033     +ENTRY(NG4_retl_o2_plus_o4_plus_40_fp)
1034     + add %o4, 40, %o4
1035     + ba,pt %xcc, __restore_asi_fp
1036     + add %o2, %o4, %o0
1037     +ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp)
1038     +ENTRY(NG4_retl_o2_plus_o4_plus_48_fp)
1039     + add %o4, 48, %o4
1040     + ba,pt %xcc, __restore_asi_fp
1041     + add %o2, %o4, %o0
1042     +ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp)
1043     +ENTRY(NG4_retl_o2_plus_o4_plus_56_fp)
1044     + add %o4, 56, %o4
1045     + ba,pt %xcc, __restore_asi_fp
1046     + add %o2, %o4, %o0
1047     +ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp)
1048     +ENTRY(NG4_retl_o2_plus_o4_plus_64_fp)
1049     + add %o4, 64, %o4
1050     + ba,pt %xcc, __restore_asi_fp
1051     + add %o2, %o4, %o0
1052     +ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp)
1053     +#endif
1054     .align 64
1055    
1056     .globl FUNC_NAME
1057     @@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1058     brz,pt %g1, 51f
1059     sub %o2, %g1, %o2
1060    
1061     -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
1062     +
1063     +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
1064     add %o1, 1, %o1
1065     subcc %g1, 1, %g1
1066     add %o0, 1, %o0
1067     bne,pt %icc, 1b
1068     - EX_ST(STORE(stb, %g2, %o0 - 0x01))
1069     + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
1070    
1071     51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
1072     LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
1073     @@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1074     brz,pt %g1, .Llarge_aligned
1075     sub %o2, %g1, %o2
1076    
1077     -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2))
1078     +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
1079     add %o1, 8, %o1
1080     subcc %g1, 8, %g1
1081     add %o0, 8, %o0
1082     bne,pt %icc, 1b
1083     - EX_ST(STORE(stx, %g2, %o0 - 0x08))
1084     + EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8)
1085    
1086     .Llarge_aligned:
1087     /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
1088     andn %o2, 0x3f, %o4
1089     sub %o2, %o4, %o2
1090    
1091     -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
1092     +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4)
1093     add %o1, 0x40, %o1
1094     - EX_LD(LOAD(ldx, %o1 - 0x38, %g2))
1095     + EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4)
1096     subcc %o4, 0x40, %o4
1097     - EX_LD(LOAD(ldx, %o1 - 0x30, %g3))
1098     - EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE))
1099     - EX_LD(LOAD(ldx, %o1 - 0x20, %o5))
1100     - EX_ST(STORE_INIT(%g1, %o0))
1101     + EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64)
1102     + EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64)
1103     + EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64)
1104     + EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64)
1105     add %o0, 0x08, %o0
1106     - EX_ST(STORE_INIT(%g2, %o0))
1107     + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56)
1108     add %o0, 0x08, %o0
1109     - EX_LD(LOAD(ldx, %o1 - 0x18, %g2))
1110     - EX_ST(STORE_INIT(%g3, %o0))
1111     + EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48)
1112     + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48)
1113     add %o0, 0x08, %o0
1114     - EX_LD(LOAD(ldx, %o1 - 0x10, %g3))
1115     - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
1116     + EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40)
1117     + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40)
1118     add %o0, 0x08, %o0
1119     - EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE))
1120     - EX_ST(STORE_INIT(%o5, %o0))
1121     + EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32)
1122     + EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32)
1123     add %o0, 0x08, %o0
1124     - EX_ST(STORE_INIT(%g2, %o0))
1125     + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24)
1126     add %o0, 0x08, %o0
1127     - EX_ST(STORE_INIT(%g3, %o0))
1128     + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16)
1129     add %o0, 0x08, %o0
1130     - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
1131     + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8)
1132     add %o0, 0x08, %o0
1133     bne,pt %icc, 1b
1134     LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
1135     @@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1136     sub %o2, %o4, %o2
1137     alignaddr %o1, %g0, %g1
1138     add %o1, %o4, %o1
1139     - EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0))
1140     -1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2))
1141     + EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4)
1142     +1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4)
1143     subcc %o4, 0x40, %o4
1144     - EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4))
1145     - EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6))
1146     - EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8))
1147     - EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10))
1148     - EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12))
1149     - EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14))
1150     + EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64)
1151     + EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64)
1152     + EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64)
1153     + EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64)
1154     + EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64)
1155     + EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64)
1156     faligndata %f0, %f2, %f16
1157     - EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0))
1158     + EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64)
1159     faligndata %f2, %f4, %f18
1160     add %g1, 0x40, %g1
1161     faligndata %f4, %f6, %f20
1162     @@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1163     faligndata %f10, %f12, %f26
1164     faligndata %f12, %f14, %f28
1165     faligndata %f14, %f0, %f30
1166     - EX_ST_FP(STORE(std, %f16, %o0 + 0x00))
1167     - EX_ST_FP(STORE(std, %f18, %o0 + 0x08))
1168     - EX_ST_FP(STORE(std, %f20, %o0 + 0x10))
1169     - EX_ST_FP(STORE(std, %f22, %o0 + 0x18))
1170     - EX_ST_FP(STORE(std, %f24, %o0 + 0x20))
1171     - EX_ST_FP(STORE(std, %f26, %o0 + 0x28))
1172     - EX_ST_FP(STORE(std, %f28, %o0 + 0x30))
1173     - EX_ST_FP(STORE(std, %f30, %o0 + 0x38))
1174     + EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64)
1175     + EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56)
1176     + EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48)
1177     + EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40)
1178     + EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32)
1179     + EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24)
1180     + EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16)
1181     + EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8)
1182     add %o0, 0x40, %o0
1183     bne,pt %icc, 1b
1184     LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
1185     @@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1186     andncc %o2, 0x20 - 1, %o5
1187     be,pn %icc, 2f
1188     sub %o2, %o5, %o2
1189     -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
1190     - EX_LD(LOAD(ldx, %o1 + 0x08, %g2))
1191     - EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE))
1192     - EX_LD(LOAD(ldx, %o1 + 0x18, %o4))
1193     +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
1194     + EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5)
1195     + EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5)
1196     + EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5)
1197     add %o1, 0x20, %o1
1198     subcc %o5, 0x20, %o5
1199     - EX_ST(STORE(stx, %g1, %o0 + 0x00))
1200     - EX_ST(STORE(stx, %g2, %o0 + 0x08))
1201     - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10))
1202     - EX_ST(STORE(stx, %o4, %o0 + 0x18))
1203     + EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32)
1204     + EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24)
1205     + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24)
1206     + EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8)
1207     bne,pt %icc, 1b
1208     add %o0, 0x20, %o0
1209     2: andcc %o2, 0x18, %o5
1210     be,pt %icc, 3f
1211     sub %o2, %o5, %o2
1212     -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
1213     +
1214     +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
1215     add %o1, 0x08, %o1
1216     add %o0, 0x08, %o0
1217     subcc %o5, 0x08, %o5
1218     bne,pt %icc, 1b
1219     - EX_ST(STORE(stx, %g1, %o0 - 0x08))
1220     + EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8)
1221     3: brz,pt %o2, .Lexit
1222     cmp %o2, 0x04
1223     bl,pn %icc, .Ltiny
1224     nop
1225     - EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
1226     + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2)
1227     add %o1, 0x04, %o1
1228     add %o0, 0x04, %o0
1229     subcc %o2, 0x04, %o2
1230     bne,pn %icc, .Ltiny
1231     - EX_ST(STORE(stw, %g1, %o0 - 0x04))
1232     + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4)
1233     ba,a,pt %icc, .Lexit
1234     .Lmedium_unaligned:
1235     /* First get dest 8 byte aligned. */
1236     @@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1237     brz,pt %g1, 2f
1238     sub %o2, %g1, %o2
1239    
1240     -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
1241     +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
1242     add %o1, 1, %o1
1243     subcc %g1, 1, %g1
1244     add %o0, 1, %o0
1245     bne,pt %icc, 1b
1246     - EX_ST(STORE(stb, %g2, %o0 - 0x01))
1247     + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
1248     2:
1249     and %o1, 0x7, %g1
1250     brz,pn %g1, .Lmedium_noprefetch
1251     @@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1252     mov 64, %g2
1253     sub %g2, %g1, %g2
1254     andn %o1, 0x7, %o1
1255     - EX_LD(LOAD(ldx, %o1 + 0x00, %o4))
1256     + EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2)
1257     sllx %o4, %g1, %o4
1258     andn %o2, 0x08 - 1, %o5
1259     sub %o2, %o5, %o2
1260     -1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3))
1261     +1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5)
1262     add %o1, 0x08, %o1
1263     subcc %o5, 0x08, %o5
1264     srlx %g3, %g2, GLOBAL_SPARE
1265     or GLOBAL_SPARE, %o4, GLOBAL_SPARE
1266     - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00))
1267     + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8)
1268     add %o0, 0x08, %o0
1269     bne,pt %icc, 1b
1270     sllx %g3, %g1, %o4
1271     @@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1272     ba,pt %icc, .Lsmall_unaligned
1273    
1274     .Ltiny:
1275     - EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
1276     + EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
1277     subcc %o2, 1, %o2
1278     be,pn %icc, .Lexit
1279     - EX_ST(STORE(stb, %g1, %o0 + 0x00))
1280     - EX_LD(LOAD(ldub, %o1 + 0x01, %g1))
1281     + EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1)
1282     + EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2)
1283     subcc %o2, 1, %o2
1284     be,pn %icc, .Lexit
1285     - EX_ST(STORE(stb, %g1, %o0 + 0x01))
1286     - EX_LD(LOAD(ldub, %o1 + 0x02, %g1))
1287     + EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1)
1288     + EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2)
1289     ba,pt %icc, .Lexit
1290     - EX_ST(STORE(stb, %g1, %o0 + 0x02))
1291     + EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2)
1292    
1293     .Lsmall:
1294     andcc %g2, 0x3, %g0
1295     @@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1296     andn %o2, 0x4 - 1, %o5
1297     sub %o2, %o5, %o2
1298     1:
1299     - EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
1300     + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
1301     add %o1, 0x04, %o1
1302     subcc %o5, 0x04, %o5
1303     add %o0, 0x04, %o0
1304     bne,pt %icc, 1b
1305     - EX_ST(STORE(stw, %g1, %o0 - 0x04))
1306     + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4)
1307     brz,pt %o2, .Lexit
1308     nop
1309     ba,a,pt %icc, .Ltiny
1310    
1311     .Lsmall_unaligned:
1312     -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
1313     +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
1314     add %o1, 1, %o1
1315     add %o0, 1, %o0
1316     subcc %o2, 1, %o2
1317     bne,pt %icc, 1b
1318     - EX_ST(STORE(stb, %g1, %o0 - 0x01))
1319     + EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
1320     ba,a,pt %icc, .Lexit
1321     .size FUNC_NAME, .-FUNC_NAME
1322     diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S
1323     index 5d1e4d1ac21e..9cd42fcbc781 100644
1324     --- a/arch/sparc/lib/NGcopy_from_user.S
1325     +++ b/arch/sparc/lib/NGcopy_from_user.S
1326     @@ -3,11 +3,11 @@
1327     * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
1328     */
1329    
1330     -#define EX_LD(x) \
1331     +#define EX_LD(x,y) \
1332     98: x; \
1333     .section __ex_table,"a";\
1334     .align 4; \
1335     - .word 98b, __ret_one_asi;\
1336     + .word 98b, y; \
1337     .text; \
1338     .align 4;
1339    
1340     diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S
1341     index ff630dcb273c..5c358afd464e 100644
1342     --- a/arch/sparc/lib/NGcopy_to_user.S
1343     +++ b/arch/sparc/lib/NGcopy_to_user.S
1344     @@ -3,11 +3,11 @@
1345     * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
1346     */
1347    
1348     -#define EX_ST(x) \
1349     +#define EX_ST(x,y) \
1350     98: x; \
1351     .section __ex_table,"a";\
1352     .align 4; \
1353     - .word 98b, __ret_one_asi;\
1354     + .word 98b, y; \
1355     .text; \
1356     .align 4;
1357    
1358     diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S
1359     index 96a14caf6966..d88c4ed50a00 100644
1360     --- a/arch/sparc/lib/NGmemcpy.S
1361     +++ b/arch/sparc/lib/NGmemcpy.S
1362     @@ -4,6 +4,7 @@
1363     */
1364    
1365     #ifdef __KERNEL__
1366     +#include <linux/linkage.h>
1367     #include <asm/asi.h>
1368     #include <asm/thread_info.h>
1369     #define GLOBAL_SPARE %g7
1370     @@ -27,15 +28,11 @@
1371     #endif
1372    
1373     #ifndef EX_LD
1374     -#define EX_LD(x) x
1375     +#define EX_LD(x,y) x
1376     #endif
1377    
1378     #ifndef EX_ST
1379     -#define EX_ST(x) x
1380     -#endif
1381     -
1382     -#ifndef EX_RETVAL
1383     -#define EX_RETVAL(x) x
1384     +#define EX_ST(x,y) x
1385     #endif
1386    
1387     #ifndef LOAD
1388     @@ -79,6 +76,92 @@
1389     .register %g3,#scratch
1390    
1391     .text
1392     +#ifndef EX_RETVAL
1393     +#define EX_RETVAL(x) x
1394     +__restore_asi:
1395     + ret
1396     + wr %g0, ASI_AIUS, %asi
1397     + restore
1398     +ENTRY(NG_ret_i2_plus_i4_plus_1)
1399     + ba,pt %xcc, __restore_asi
1400     + add %i2, %i5, %i0
1401     +ENDPROC(NG_ret_i2_plus_i4_plus_1)
1402     +ENTRY(NG_ret_i2_plus_g1)
1403     + ba,pt %xcc, __restore_asi
1404     + add %i2, %g1, %i0
1405     +ENDPROC(NG_ret_i2_plus_g1)
1406     +ENTRY(NG_ret_i2_plus_g1_minus_8)
1407     + sub %g1, 8, %g1
1408     + ba,pt %xcc, __restore_asi
1409     + add %i2, %g1, %i0
1410     +ENDPROC(NG_ret_i2_plus_g1_minus_8)
1411     +ENTRY(NG_ret_i2_plus_g1_minus_16)
1412     + sub %g1, 16, %g1
1413     + ba,pt %xcc, __restore_asi
1414     + add %i2, %g1, %i0
1415     +ENDPROC(NG_ret_i2_plus_g1_minus_16)
1416     +ENTRY(NG_ret_i2_plus_g1_minus_24)
1417     + sub %g1, 24, %g1
1418     + ba,pt %xcc, __restore_asi
1419     + add %i2, %g1, %i0
1420     +ENDPROC(NG_ret_i2_plus_g1_minus_24)
1421     +ENTRY(NG_ret_i2_plus_g1_minus_32)
1422     + sub %g1, 32, %g1
1423     + ba,pt %xcc, __restore_asi
1424     + add %i2, %g1, %i0
1425     +ENDPROC(NG_ret_i2_plus_g1_minus_32)
1426     +ENTRY(NG_ret_i2_plus_g1_minus_40)
1427     + sub %g1, 40, %g1
1428     + ba,pt %xcc, __restore_asi
1429     + add %i2, %g1, %i0
1430     +ENDPROC(NG_ret_i2_plus_g1_minus_40)
1431     +ENTRY(NG_ret_i2_plus_g1_minus_48)
1432     + sub %g1, 48, %g1
1433     + ba,pt %xcc, __restore_asi
1434     + add %i2, %g1, %i0
1435     +ENDPROC(NG_ret_i2_plus_g1_minus_48)
1436     +ENTRY(NG_ret_i2_plus_g1_minus_56)
1437     + sub %g1, 56, %g1
1438     + ba,pt %xcc, __restore_asi
1439     + add %i2, %g1, %i0
1440     +ENDPROC(NG_ret_i2_plus_g1_minus_56)
1441     +ENTRY(NG_ret_i2_plus_i4)
1442     + ba,pt %xcc, __restore_asi
1443     + add %i2, %i4, %i0
1444     +ENDPROC(NG_ret_i2_plus_i4)
1445     +ENTRY(NG_ret_i2_plus_i4_minus_8)
1446     + sub %i4, 8, %i4
1447     + ba,pt %xcc, __restore_asi
1448     + add %i2, %i4, %i0
1449     +ENDPROC(NG_ret_i2_plus_i4_minus_8)
1450     +ENTRY(NG_ret_i2_plus_8)
1451     + ba,pt %xcc, __restore_asi
1452     + add %i2, 8, %i0
1453     +ENDPROC(NG_ret_i2_plus_8)
1454     +ENTRY(NG_ret_i2_plus_4)
1455     + ba,pt %xcc, __restore_asi
1456     + add %i2, 4, %i0
1457     +ENDPROC(NG_ret_i2_plus_4)
1458     +ENTRY(NG_ret_i2_plus_1)
1459     + ba,pt %xcc, __restore_asi
1460     + add %i2, 1, %i0
1461     +ENDPROC(NG_ret_i2_plus_1)
1462     +ENTRY(NG_ret_i2_plus_g1_plus_1)
1463     + add %g1, 1, %g1
1464     + ba,pt %xcc, __restore_asi
1465     + add %i2, %g1, %i0
1466     +ENDPROC(NG_ret_i2_plus_g1_plus_1)
1467     +ENTRY(NG_ret_i2)
1468     + ba,pt %xcc, __restore_asi
1469     + mov %i2, %i0
1470     +ENDPROC(NG_ret_i2)
1471     +ENTRY(NG_ret_i2_and_7_plus_i4)
1472     + and %i2, 7, %i2
1473     + ba,pt %xcc, __restore_asi
1474     + add %i2, %i4, %i0
1475     +ENDPROC(NG_ret_i2_and_7_plus_i4)
1476     +#endif
1477     +
1478     .align 64
1479    
1480     .globl FUNC_NAME
1481     @@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1482     sub %g0, %i4, %i4 ! bytes to align dst
1483     sub %i2, %i4, %i2
1484     1: subcc %i4, 1, %i4
1485     - EX_LD(LOAD(ldub, %i1, %g1))
1486     - EX_ST(STORE(stb, %g1, %o0))
1487     + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1)
1488     + EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1)
1489     add %i1, 1, %i1
1490     bne,pt %XCC, 1b
1491     add %o0, 1, %o0
1492     @@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1493     and %i4, 0x7, GLOBAL_SPARE
1494     sll GLOBAL_SPARE, 3, GLOBAL_SPARE
1495     mov 64, %i5
1496     - EX_LD(LOAD_TWIN(%i1, %g2, %g3))
1497     + EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1)
1498     sub %i5, GLOBAL_SPARE, %i5
1499     mov 16, %o4
1500     mov 32, %o5
1501     @@ -178,31 +261,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1502     srlx WORD3, PRE_SHIFT, TMP; \
1503     or WORD2, TMP, WORD2;
1504    
1505     -8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
1506     +8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
1507     MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
1508     LOAD(prefetch, %i1 + %i3, #one_read)
1509    
1510     - EX_ST(STORE_INIT(%g2, %o0 + 0x00))
1511     - EX_ST(STORE_INIT(%g3, %o0 + 0x08))
1512     + EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1)
1513     + EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
1514    
1515     - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
1516     + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
1517     MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
1518    
1519     - EX_ST(STORE_INIT(%o2, %o0 + 0x10))
1520     - EX_ST(STORE_INIT(%o3, %o0 + 0x18))
1521     + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
1522     + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
1523    
1524     - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
1525     + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
1526     MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
1527    
1528     - EX_ST(STORE_INIT(%g2, %o0 + 0x20))
1529     - EX_ST(STORE_INIT(%g3, %o0 + 0x28))
1530     + EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
1531     + EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
1532    
1533     - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
1534     + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
1535     add %i1, 64, %i1
1536     MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
1537    
1538     - EX_ST(STORE_INIT(%o2, %o0 + 0x30))
1539     - EX_ST(STORE_INIT(%o3, %o0 + 0x38))
1540     + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
1541     + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
1542    
1543     subcc %g1, 64, %g1
1544     bne,pt %XCC, 8b
1545     @@ -211,31 +294,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1546     ba,pt %XCC, 60f
1547     add %i1, %i4, %i1
1548    
1549     -9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
1550     +9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
1551     MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
1552     LOAD(prefetch, %i1 + %i3, #one_read)
1553    
1554     - EX_ST(STORE_INIT(%g3, %o0 + 0x00))
1555     - EX_ST(STORE_INIT(%o2, %o0 + 0x08))
1556     + EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1)
1557     + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
1558    
1559     - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
1560     + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
1561     MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
1562    
1563     - EX_ST(STORE_INIT(%o3, %o0 + 0x10))
1564     - EX_ST(STORE_INIT(%g2, %o0 + 0x18))
1565     + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
1566     + EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
1567    
1568     - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
1569     + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
1570     MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
1571    
1572     - EX_ST(STORE_INIT(%g3, %o0 + 0x20))
1573     - EX_ST(STORE_INIT(%o2, %o0 + 0x28))
1574     + EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
1575     + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
1576    
1577     - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
1578     + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
1579     add %i1, 64, %i1
1580     MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
1581    
1582     - EX_ST(STORE_INIT(%o3, %o0 + 0x30))
1583     - EX_ST(STORE_INIT(%g2, %o0 + 0x38))
1584     + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
1585     + EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
1586    
1587     subcc %g1, 64, %g1
1588     bne,pt %XCC, 9b
1589     @@ -249,25 +332,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1590     * one twin load ahead, then add 8 back into source when
1591     * we finish the loop.
1592     */
1593     - EX_LD(LOAD_TWIN(%i1, %o4, %o5))
1594     + EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1)
1595     mov 16, %o7
1596     mov 32, %g2
1597     mov 48, %g3
1598     mov 64, %o1
1599     -1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
1600     +1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
1601     LOAD(prefetch, %i1 + %o1, #one_read)
1602     - EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line
1603     - EX_ST(STORE_INIT(%o2, %o0 + 0x08))
1604     - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
1605     - EX_ST(STORE_INIT(%o3, %o0 + 0x10))
1606     - EX_ST(STORE_INIT(%o4, %o0 + 0x18))
1607     - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
1608     - EX_ST(STORE_INIT(%o5, %o0 + 0x20))
1609     - EX_ST(STORE_INIT(%o2, %o0 + 0x28))
1610     - EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5))
1611     + EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
1612     + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
1613     + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
1614     + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
1615     + EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
1616     + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
1617     + EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
1618     + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
1619     + EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48)
1620     add %i1, 64, %i1
1621     - EX_ST(STORE_INIT(%o3, %o0 + 0x30))
1622     - EX_ST(STORE_INIT(%o4, %o0 + 0x38))
1623     + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
1624     + EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
1625     subcc %g1, 64, %g1
1626     bne,pt %XCC, 1b
1627     add %o0, 64, %o0
1628     @@ -282,20 +365,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1629     mov 32, %g2
1630     mov 48, %g3
1631     mov 64, %o1
1632     -1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5))
1633     - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
1634     +1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1)
1635     + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
1636     LOAD(prefetch, %i1 + %o1, #one_read)
1637     - EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line
1638     - EX_ST(STORE_INIT(%o5, %o0 + 0x08))
1639     - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
1640     - EX_ST(STORE_INIT(%o2, %o0 + 0x10))
1641     - EX_ST(STORE_INIT(%o3, %o0 + 0x18))
1642     - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
1643     + EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
1644     + EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
1645     + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
1646     + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
1647     + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
1648     + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
1649     add %i1, 64, %i1
1650     - EX_ST(STORE_INIT(%o4, %o0 + 0x20))
1651     - EX_ST(STORE_INIT(%o5, %o0 + 0x28))
1652     - EX_ST(STORE_INIT(%o2, %o0 + 0x30))
1653     - EX_ST(STORE_INIT(%o3, %o0 + 0x38))
1654     + EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
1655     + EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
1656     + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
1657     + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
1658     subcc %g1, 64, %g1
1659     bne,pt %XCC, 1b
1660     add %o0, 64, %o0
1661     @@ -321,28 +404,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1662     andn %i2, 0xf, %i4
1663     and %i2, 0xf, %i2
1664     1: subcc %i4, 0x10, %i4
1665     - EX_LD(LOAD(ldx, %i1, %o4))
1666     + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4)
1667     add %i1, 0x08, %i1
1668     - EX_LD(LOAD(ldx, %i1, %g1))
1669     + EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4)
1670     sub %i1, 0x08, %i1
1671     - EX_ST(STORE(stx, %o4, %i1 + %i3))
1672     + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4)
1673     add %i1, 0x8, %i1
1674     - EX_ST(STORE(stx, %g1, %i1 + %i3))
1675     + EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8)
1676     bgu,pt %XCC, 1b
1677     add %i1, 0x8, %i1
1678     73: andcc %i2, 0x8, %g0
1679     be,pt %XCC, 1f
1680     nop
1681     sub %i2, 0x8, %i2
1682     - EX_LD(LOAD(ldx, %i1, %o4))
1683     - EX_ST(STORE(stx, %o4, %i1 + %i3))
1684     + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8)
1685     + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8)
1686     add %i1, 0x8, %i1
1687     1: andcc %i2, 0x4, %g0
1688     be,pt %XCC, 1f
1689     nop
1690     sub %i2, 0x4, %i2
1691     - EX_LD(LOAD(lduw, %i1, %i5))
1692     - EX_ST(STORE(stw, %i5, %i1 + %i3))
1693     + EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4)
1694     + EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4)
1695     add %i1, 0x4, %i1
1696     1: cmp %i2, 0
1697     be,pt %XCC, 85f
1698     @@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1699     sub %i2, %g1, %i2
1700    
1701     1: subcc %g1, 1, %g1
1702     - EX_LD(LOAD(ldub, %i1, %i5))
1703     - EX_ST(STORE(stb, %i5, %i1 + %i3))
1704     + EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1)
1705     + EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1)
1706     bgu,pt %icc, 1b
1707     add %i1, 1, %i1
1708    
1709     @@ -375,16 +458,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1710    
1711     8: mov 64, %i3
1712     andn %i1, 0x7, %i1
1713     - EX_LD(LOAD(ldx, %i1, %g2))
1714     + EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2)
1715     sub %i3, %g1, %i3
1716     andn %i2, 0x7, %i4
1717     sllx %g2, %g1, %g2
1718     1: add %i1, 0x8, %i1
1719     - EX_LD(LOAD(ldx, %i1, %g3))
1720     + EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4)
1721     subcc %i4, 0x8, %i4
1722     srlx %g3, %i3, %i5
1723     or %i5, %g2, %i5
1724     - EX_ST(STORE(stx, %i5, %o0))
1725     + EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4)
1726     add %o0, 0x8, %o0
1727     bgu,pt %icc, 1b
1728     sllx %g3, %g1, %g2
1729     @@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1730    
1731     1:
1732     subcc %i2, 4, %i2
1733     - EX_LD(LOAD(lduw, %i1, %g1))
1734     - EX_ST(STORE(stw, %g1, %i1 + %i3))
1735     + EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4)
1736     + EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4)
1737     bgu,pt %XCC, 1b
1738     add %i1, 4, %i1
1739    
1740     @@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1741     .align 32
1742     90:
1743     subcc %i2, 1, %i2
1744     - EX_LD(LOAD(ldub, %i1, %g1))
1745     - EX_ST(STORE(stb, %g1, %i1 + %i3))
1746     + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1)
1747     + EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1)
1748     bgu,pt %XCC, 90b
1749     add %i1, 1, %i1
1750     ret
1751     diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S
1752     index ecc5692fa2b4..bb6ff73229e3 100644
1753     --- a/arch/sparc/lib/U1copy_from_user.S
1754     +++ b/arch/sparc/lib/U1copy_from_user.S
1755     @@ -3,19 +3,19 @@
1756     * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
1757     */
1758    
1759     -#define EX_LD(x) \
1760     +#define EX_LD(x,y) \
1761     98: x; \
1762     .section __ex_table,"a";\
1763     .align 4; \
1764     - .word 98b, __retl_one; \
1765     + .word 98b, y; \
1766     .text; \
1767     .align 4;
1768    
1769     -#define EX_LD_FP(x) \
1770     +#define EX_LD_FP(x,y) \
1771     98: x; \
1772     .section __ex_table,"a";\
1773     .align 4; \
1774     - .word 98b, __retl_one_fp;\
1775     + .word 98b, y; \
1776     .text; \
1777     .align 4;
1778    
1779     diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S
1780     index 9eea392e44d4..ed92ce739558 100644
1781     --- a/arch/sparc/lib/U1copy_to_user.S
1782     +++ b/arch/sparc/lib/U1copy_to_user.S
1783     @@ -3,19 +3,19 @@
1784     * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
1785     */
1786    
1787     -#define EX_ST(x) \
1788     +#define EX_ST(x,y) \
1789     98: x; \
1790     .section __ex_table,"a";\
1791     .align 4; \
1792     - .word 98b, __retl_one; \
1793     + .word 98b, y; \
1794     .text; \
1795     .align 4;
1796    
1797     -#define EX_ST_FP(x) \
1798     +#define EX_ST_FP(x,y) \
1799     98: x; \
1800     .section __ex_table,"a";\
1801     .align 4; \
1802     - .word 98b, __retl_one_fp;\
1803     + .word 98b, y; \
1804     .text; \
1805     .align 4;
1806    
1807     diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S
1808     index 3e6209ebb7d7..f30d2ab2c371 100644
1809     --- a/arch/sparc/lib/U1memcpy.S
1810     +++ b/arch/sparc/lib/U1memcpy.S
1811     @@ -5,6 +5,7 @@
1812     */
1813    
1814     #ifdef __KERNEL__
1815     +#include <linux/linkage.h>
1816     #include <asm/visasm.h>
1817     #include <asm/asi.h>
1818     #define GLOBAL_SPARE g7
1819     @@ -23,21 +24,17 @@
1820     #endif
1821    
1822     #ifndef EX_LD
1823     -#define EX_LD(x) x
1824     +#define EX_LD(x,y) x
1825     #endif
1826     #ifndef EX_LD_FP
1827     -#define EX_LD_FP(x) x
1828     +#define EX_LD_FP(x,y) x
1829     #endif
1830    
1831     #ifndef EX_ST
1832     -#define EX_ST(x) x
1833     +#define EX_ST(x,y) x
1834     #endif
1835     #ifndef EX_ST_FP
1836     -#define EX_ST_FP(x) x
1837     -#endif
1838     -
1839     -#ifndef EX_RETVAL
1840     -#define EX_RETVAL(x) x
1841     +#define EX_ST_FP(x,y) x
1842     #endif
1843    
1844     #ifndef LOAD
1845     @@ -78,53 +75,169 @@
1846     faligndata %f7, %f8, %f60; \
1847     faligndata %f8, %f9, %f62;
1848    
1849     -#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \
1850     - EX_LD_FP(LOAD_BLK(%src, %fdest)); \
1851     - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
1852     - add %src, 0x40, %src; \
1853     - subcc %len, 0x40, %len; \
1854     - be,pn %xcc, jmptgt; \
1855     - add %dest, 0x40, %dest; \
1856     -
1857     -#define LOOP_CHUNK1(src, dest, len, branch_dest) \
1858     - MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest)
1859     -#define LOOP_CHUNK2(src, dest, len, branch_dest) \
1860     - MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
1861     -#define LOOP_CHUNK3(src, dest, len, branch_dest) \
1862     - MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
1863     +#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \
1864     + EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \
1865     + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
1866     + add %src, 0x40, %src; \
1867     + subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \
1868     + be,pn %xcc, jmptgt; \
1869     + add %dest, 0x40, %dest; \
1870     +
1871     +#define LOOP_CHUNK1(src, dest, branch_dest) \
1872     + MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest)
1873     +#define LOOP_CHUNK2(src, dest, branch_dest) \
1874     + MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest)
1875     +#define LOOP_CHUNK3(src, dest, branch_dest) \
1876     + MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest)
1877    
1878     #define DO_SYNC membar #Sync;
1879     #define STORE_SYNC(dest, fsrc) \
1880     - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
1881     + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
1882     add %dest, 0x40, %dest; \
1883     DO_SYNC
1884    
1885     #define STORE_JUMP(dest, fsrc, target) \
1886     - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
1887     + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \
1888     add %dest, 0x40, %dest; \
1889     ba,pt %xcc, target; \
1890     nop;
1891    
1892     -#define FINISH_VISCHUNK(dest, f0, f1, left) \
1893     - subcc %left, 8, %left;\
1894     - bl,pn %xcc, 95f; \
1895     - faligndata %f0, %f1, %f48; \
1896     - EX_ST_FP(STORE(std, %f48, %dest)); \
1897     +#define FINISH_VISCHUNK(dest, f0, f1) \
1898     + subcc %g3, 8, %g3; \
1899     + bl,pn %xcc, 95f; \
1900     + faligndata %f0, %f1, %f48; \
1901     + EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \
1902     add %dest, 8, %dest;
1903    
1904     -#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \
1905     - subcc %left, 8, %left; \
1906     - bl,pn %xcc, 95f; \
1907     +#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
1908     + subcc %g3, 8, %g3; \
1909     + bl,pn %xcc, 95f; \
1910     fsrc2 %f0, %f1;
1911    
1912     -#define UNEVEN_VISCHUNK(dest, f0, f1, left) \
1913     - UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \
1914     +#define UNEVEN_VISCHUNK(dest, f0, f1) \
1915     + UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
1916     ba,a,pt %xcc, 93f;
1917    
1918     .register %g2,#scratch
1919     .register %g3,#scratch
1920    
1921     .text
1922     +#ifndef EX_RETVAL
1923     +#define EX_RETVAL(x) x
1924     +ENTRY(U1_g1_1_fp)
1925     + VISExitHalf
1926     + add %g1, 1, %g1
1927     + add %g1, %g2, %g1
1928     + retl
1929     + add %g1, %o2, %o0
1930     +ENDPROC(U1_g1_1_fp)
1931     +ENTRY(U1_g2_0_fp)
1932     + VISExitHalf
1933     + retl
1934     + add %g2, %o2, %o0
1935     +ENDPROC(U1_g2_0_fp)
1936     +ENTRY(U1_g2_8_fp)
1937     + VISExitHalf
1938     + add %g2, 8, %g2
1939     + retl
1940     + add %g2, %o2, %o0
1941     +ENDPROC(U1_g2_8_fp)
1942     +ENTRY(U1_gs_0_fp)
1943     + VISExitHalf
1944     + add %GLOBAL_SPARE, %g3, %o0
1945     + retl
1946     + add %o0, %o2, %o0
1947     +ENDPROC(U1_gs_0_fp)
1948     +ENTRY(U1_gs_80_fp)
1949     + VISExitHalf
1950     + add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
1951     + add %GLOBAL_SPARE, %g3, %o0
1952     + retl
1953     + add %o0, %o2, %o0
1954     +ENDPROC(U1_gs_80_fp)
1955     +ENTRY(U1_gs_40_fp)
1956     + VISExitHalf
1957     + add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE
1958     + add %GLOBAL_SPARE, %g3, %o0
1959     + retl
1960     + add %o0, %o2, %o0
1961     +ENDPROC(U1_gs_40_fp)
1962     +ENTRY(U1_g3_0_fp)
1963     + VISExitHalf
1964     + retl
1965     + add %g3, %o2, %o0
1966     +ENDPROC(U1_g3_0_fp)
1967     +ENTRY(U1_g3_8_fp)
1968     + VISExitHalf
1969     + add %g3, 8, %g3
1970     + retl
1971     + add %g3, %o2, %o0
1972     +ENDPROC(U1_g3_8_fp)
1973     +ENTRY(U1_o2_0_fp)
1974     + VISExitHalf
1975     + retl
1976     + mov %o2, %o0
1977     +ENDPROC(U1_o2_0_fp)
1978     +ENTRY(U1_o2_1_fp)
1979     + VISExitHalf
1980     + retl
1981     + add %o2, 1, %o0
1982     +ENDPROC(U1_o2_1_fp)
1983     +ENTRY(U1_gs_0)
1984     + VISExitHalf
1985     + retl
1986     + add %GLOBAL_SPARE, %o2, %o0
1987     +ENDPROC(U1_gs_0)
1988     +ENTRY(U1_gs_8)
1989     + VISExitHalf
1990     + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
1991     + retl
1992     + add %GLOBAL_SPARE, 0x8, %o0
1993     +ENDPROC(U1_gs_8)
1994     +ENTRY(U1_gs_10)
1995     + VISExitHalf
1996     + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
1997     + retl
1998     + add %GLOBAL_SPARE, 0x10, %o0
1999     +ENDPROC(U1_gs_10)
2000     +ENTRY(U1_o2_0)
2001     + retl
2002     + mov %o2, %o0
2003     +ENDPROC(U1_o2_0)
2004     +ENTRY(U1_o2_8)
2005     + retl
2006     + add %o2, 8, %o0
2007     +ENDPROC(U1_o2_8)
2008     +ENTRY(U1_o2_4)
2009     + retl
2010     + add %o2, 4, %o0
2011     +ENDPROC(U1_o2_4)
2012     +ENTRY(U1_o2_1)
2013     + retl
2014     + add %o2, 1, %o0
2015     +ENDPROC(U1_o2_1)
2016     +ENTRY(U1_g1_0)
2017     + retl
2018     + add %g1, %o2, %o0
2019     +ENDPROC(U1_g1_0)
2020     +ENTRY(U1_g1_1)
2021     + add %g1, 1, %g1
2022     + retl
2023     + add %g1, %o2, %o0
2024     +ENDPROC(U1_g1_1)
2025     +ENTRY(U1_gs_0_o2_adj)
2026     + and %o2, 7, %o2
2027     + retl
2028     + add %GLOBAL_SPARE, %o2, %o0
2029     +ENDPROC(U1_gs_0_o2_adj)
2030     +ENTRY(U1_gs_8_o2_adj)
2031     + and %o2, 7, %o2
2032     + add %GLOBAL_SPARE, 8, %GLOBAL_SPARE
2033     + retl
2034     + add %GLOBAL_SPARE, %o2, %o0
2035     +ENDPROC(U1_gs_8_o2_adj)
2036     +#endif
2037     +
2038     .align 64
2039    
2040     .globl FUNC_NAME
2041     @@ -166,8 +279,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2042     and %g2, 0x38, %g2
2043    
2044     1: subcc %g1, 0x1, %g1
2045     - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
2046     - EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE))
2047     + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp)
2048     + EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp)
2049     bgu,pt %XCC, 1b
2050     add %o1, 0x1, %o1
2051    
2052     @@ -178,20 +291,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2053     be,pt %icc, 3f
2054     alignaddr %o1, %g0, %o1
2055    
2056     - EX_LD_FP(LOAD(ldd, %o1, %f4))
2057     -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
2058     + EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp)
2059     +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp)
2060     add %o1, 0x8, %o1
2061     subcc %g2, 0x8, %g2
2062     faligndata %f4, %f6, %f0
2063     - EX_ST_FP(STORE(std, %f0, %o0))
2064     + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
2065     be,pn %icc, 3f
2066     add %o0, 0x8, %o0
2067    
2068     - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
2069     + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp)
2070     add %o1, 0x8, %o1
2071     subcc %g2, 0x8, %g2
2072     faligndata %f6, %f4, %f0
2073     - EX_ST_FP(STORE(std, %f0, %o0))
2074     + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
2075     bne,pt %icc, 1b
2076     add %o0, 0x8, %o0
2077    
2078     @@ -214,13 +327,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2079     add %g1, %GLOBAL_SPARE, %g1
2080     subcc %o2, %g3, %o2
2081    
2082     - EX_LD_FP(LOAD_BLK(%o1, %f0))
2083     + EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp)
2084     add %o1, 0x40, %o1
2085     add %g1, %g3, %g1
2086     - EX_LD_FP(LOAD_BLK(%o1, %f16))
2087     + EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp)
2088     add %o1, 0x40, %o1
2089     sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
2090     - EX_LD_FP(LOAD_BLK(%o1, %f32))
2091     + EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp)
2092     add %o1, 0x40, %o1
2093    
2094     /* There are 8 instances of the unrolled loop,
2095     @@ -240,11 +353,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2096    
2097     .align 64
2098     1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
2099     - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2100     + LOOP_CHUNK1(o1, o0, 1f)
2101     FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
2102     - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2103     + LOOP_CHUNK2(o1, o0, 2f)
2104     FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
2105     - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2106     + LOOP_CHUNK3(o1, o0, 3f)
2107     ba,pt %xcc, 1b+4
2108     faligndata %f0, %f2, %f48
2109     1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
2110     @@ -261,11 +374,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2111     STORE_JUMP(o0, f48, 56f)
2112    
2113     1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
2114     - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2115     + LOOP_CHUNK1(o1, o0, 1f)
2116     FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
2117     - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2118     + LOOP_CHUNK2(o1, o0, 2f)
2119     FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
2120     - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2121     + LOOP_CHUNK3(o1, o0, 3f)
2122     ba,pt %xcc, 1b+4
2123     faligndata %f2, %f4, %f48
2124     1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
2125     @@ -282,11 +395,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2126     STORE_JUMP(o0, f48, 57f)
2127    
2128     1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
2129     - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2130     + LOOP_CHUNK1(o1, o0, 1f)
2131     FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
2132     - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2133     + LOOP_CHUNK2(o1, o0, 2f)
2134     FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
2135     - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2136     + LOOP_CHUNK3(o1, o0, 3f)
2137     ba,pt %xcc, 1b+4
2138     faligndata %f4, %f6, %f48
2139     1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
2140     @@ -303,11 +416,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2141     STORE_JUMP(o0, f48, 58f)
2142    
2143     1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
2144     - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2145     + LOOP_CHUNK1(o1, o0, 1f)
2146     FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
2147     - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2148     + LOOP_CHUNK2(o1, o0, 2f)
2149     FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
2150     - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2151     + LOOP_CHUNK3(o1, o0, 3f)
2152     ba,pt %xcc, 1b+4
2153     faligndata %f6, %f8, %f48
2154     1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
2155     @@ -324,11 +437,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2156     STORE_JUMP(o0, f48, 59f)
2157    
2158     1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
2159     - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2160     + LOOP_CHUNK1(o1, o0, 1f)
2161     FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
2162     - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2163     + LOOP_CHUNK2(o1, o0, 2f)
2164     FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
2165     - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2166     + LOOP_CHUNK3(o1, o0, 3f)
2167     ba,pt %xcc, 1b+4
2168     faligndata %f8, %f10, %f48
2169     1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
2170     @@ -345,11 +458,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2171     STORE_JUMP(o0, f48, 60f)
2172    
2173     1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
2174     - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2175     + LOOP_CHUNK1(o1, o0, 1f)
2176     FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
2177     - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2178     + LOOP_CHUNK2(o1, o0, 2f)
2179     FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
2180     - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2181     + LOOP_CHUNK3(o1, o0, 3f)
2182     ba,pt %xcc, 1b+4
2183     faligndata %f10, %f12, %f48
2184     1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
2185     @@ -366,11 +479,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2186     STORE_JUMP(o0, f48, 61f)
2187    
2188     1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
2189     - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2190     + LOOP_CHUNK1(o1, o0, 1f)
2191     FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
2192     - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2193     + LOOP_CHUNK2(o1, o0, 2f)
2194     FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
2195     - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2196     + LOOP_CHUNK3(o1, o0, 3f)
2197     ba,pt %xcc, 1b+4
2198     faligndata %f12, %f14, %f48
2199     1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
2200     @@ -387,11 +500,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2201     STORE_JUMP(o0, f48, 62f)
2202    
2203     1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
2204     - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2205     + LOOP_CHUNK1(o1, o0, 1f)
2206     FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
2207     - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2208     + LOOP_CHUNK2(o1, o0, 2f)
2209     FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
2210     - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2211     + LOOP_CHUNK3(o1, o0, 3f)
2212     ba,pt %xcc, 1b+4
2213     faligndata %f14, %f16, %f48
2214     1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
2215     @@ -407,53 +520,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2216     FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
2217     STORE_JUMP(o0, f48, 63f)
2218    
2219     -40: FINISH_VISCHUNK(o0, f0, f2, g3)
2220     -41: FINISH_VISCHUNK(o0, f2, f4, g3)
2221     -42: FINISH_VISCHUNK(o0, f4, f6, g3)
2222     -43: FINISH_VISCHUNK(o0, f6, f8, g3)
2223     -44: FINISH_VISCHUNK(o0, f8, f10, g3)
2224     -45: FINISH_VISCHUNK(o0, f10, f12, g3)
2225     -46: FINISH_VISCHUNK(o0, f12, f14, g3)
2226     -47: UNEVEN_VISCHUNK(o0, f14, f0, g3)
2227     -48: FINISH_VISCHUNK(o0, f16, f18, g3)
2228     -49: FINISH_VISCHUNK(o0, f18, f20, g3)
2229     -50: FINISH_VISCHUNK(o0, f20, f22, g3)
2230     -51: FINISH_VISCHUNK(o0, f22, f24, g3)
2231     -52: FINISH_VISCHUNK(o0, f24, f26, g3)
2232     -53: FINISH_VISCHUNK(o0, f26, f28, g3)
2233     -54: FINISH_VISCHUNK(o0, f28, f30, g3)
2234     -55: UNEVEN_VISCHUNK(o0, f30, f0, g3)
2235     -56: FINISH_VISCHUNK(o0, f32, f34, g3)
2236     -57: FINISH_VISCHUNK(o0, f34, f36, g3)
2237     -58: FINISH_VISCHUNK(o0, f36, f38, g3)
2238     -59: FINISH_VISCHUNK(o0, f38, f40, g3)
2239     -60: FINISH_VISCHUNK(o0, f40, f42, g3)
2240     -61: FINISH_VISCHUNK(o0, f42, f44, g3)
2241     -62: FINISH_VISCHUNK(o0, f44, f46, g3)
2242     -63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3)
2243     -
2244     -93: EX_LD_FP(LOAD(ldd, %o1, %f2))
2245     +40: FINISH_VISCHUNK(o0, f0, f2)
2246     +41: FINISH_VISCHUNK(o0, f2, f4)
2247     +42: FINISH_VISCHUNK(o0, f4, f6)
2248     +43: FINISH_VISCHUNK(o0, f6, f8)
2249     +44: FINISH_VISCHUNK(o0, f8, f10)
2250     +45: FINISH_VISCHUNK(o0, f10, f12)
2251     +46: FINISH_VISCHUNK(o0, f12, f14)
2252     +47: UNEVEN_VISCHUNK(o0, f14, f0)
2253     +48: FINISH_VISCHUNK(o0, f16, f18)
2254     +49: FINISH_VISCHUNK(o0, f18, f20)
2255     +50: FINISH_VISCHUNK(o0, f20, f22)
2256     +51: FINISH_VISCHUNK(o0, f22, f24)
2257     +52: FINISH_VISCHUNK(o0, f24, f26)
2258     +53: FINISH_VISCHUNK(o0, f26, f28)
2259     +54: FINISH_VISCHUNK(o0, f28, f30)
2260     +55: UNEVEN_VISCHUNK(o0, f30, f0)
2261     +56: FINISH_VISCHUNK(o0, f32, f34)
2262     +57: FINISH_VISCHUNK(o0, f34, f36)
2263     +58: FINISH_VISCHUNK(o0, f36, f38)
2264     +59: FINISH_VISCHUNK(o0, f38, f40)
2265     +60: FINISH_VISCHUNK(o0, f40, f42)
2266     +61: FINISH_VISCHUNK(o0, f42, f44)
2267     +62: FINISH_VISCHUNK(o0, f44, f46)
2268     +63: UNEVEN_VISCHUNK_LAST(o0, f46, f0)
2269     +
2270     +93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp)
2271     add %o1, 8, %o1
2272     subcc %g3, 8, %g3
2273     faligndata %f0, %f2, %f8
2274     - EX_ST_FP(STORE(std, %f8, %o0))
2275     + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
2276     bl,pn %xcc, 95f
2277     add %o0, 8, %o0
2278     - EX_LD_FP(LOAD(ldd, %o1, %f0))
2279     + EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp)
2280     add %o1, 8, %o1
2281     subcc %g3, 8, %g3
2282     faligndata %f2, %f0, %f8
2283     - EX_ST_FP(STORE(std, %f8, %o0))
2284     + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
2285     bge,pt %xcc, 93b
2286     add %o0, 8, %o0
2287    
2288     95: brz,pt %o2, 2f
2289     mov %g1, %o1
2290    
2291     -1: EX_LD_FP(LOAD(ldub, %o1, %o3))
2292     +1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp)
2293     add %o1, 1, %o1
2294     subcc %o2, 1, %o2
2295     - EX_ST_FP(STORE(stb, %o3, %o0))
2296     + EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp)
2297     bne,pt %xcc, 1b
2298     add %o0, 1, %o0
2299    
2300     @@ -469,27 +582,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2301    
2302     72: andn %o2, 0xf, %GLOBAL_SPARE
2303     and %o2, 0xf, %o2
2304     -1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
2305     - EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
2306     +1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0)
2307     + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0)
2308     subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE
2309     - EX_ST(STORE(stx, %o5, %o1 + %o3))
2310     + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10)
2311     add %o1, 0x8, %o1
2312     - EX_ST(STORE(stx, %g1, %o1 + %o3))
2313     + EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8)
2314     bgu,pt %XCC, 1b
2315     add %o1, 0x8, %o1
2316     73: andcc %o2, 0x8, %g0
2317     be,pt %XCC, 1f
2318     nop
2319     - EX_LD(LOAD(ldx, %o1, %o5))
2320     + EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)
2321     sub %o2, 0x8, %o2
2322     - EX_ST(STORE(stx, %o5, %o1 + %o3))
2323     + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8)
2324     add %o1, 0x8, %o1
2325     1: andcc %o2, 0x4, %g0
2326     be,pt %XCC, 1f
2327     nop
2328     - EX_LD(LOAD(lduw, %o1, %o5))
2329     + EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)
2330     sub %o2, 0x4, %o2
2331     - EX_ST(STORE(stw, %o5, %o1 + %o3))
2332     + EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4)
2333     add %o1, 0x4, %o1
2334     1: cmp %o2, 0
2335     be,pt %XCC, 85f
2336     @@ -503,9 +616,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2337     sub %g0, %g1, %g1
2338     sub %o2, %g1, %o2
2339    
2340     -1: EX_LD(LOAD(ldub, %o1, %o5))
2341     +1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)
2342     subcc %g1, 1, %g1
2343     - EX_ST(STORE(stb, %o5, %o1 + %o3))
2344     + EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1)
2345     bgu,pt %icc, 1b
2346     add %o1, 1, %o1
2347    
2348     @@ -521,16 +634,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2349    
2350     8: mov 64, %o3
2351     andn %o1, 0x7, %o1
2352     - EX_LD(LOAD(ldx, %o1, %g2))
2353     + EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)
2354     sub %o3, %g1, %o3
2355     andn %o2, 0x7, %GLOBAL_SPARE
2356     sllx %g2, %g1, %g2
2357     -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
2358     +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj)
2359     subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE
2360     add %o1, 0x8, %o1
2361     srlx %g3, %o3, %o5
2362     or %o5, %g2, %o5
2363     - EX_ST(STORE(stx, %o5, %o0))
2364     + EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj)
2365     add %o0, 0x8, %o0
2366     bgu,pt %icc, 1b
2367     sllx %g3, %g1, %g2
2368     @@ -548,9 +661,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2369     bne,pn %XCC, 90f
2370     sub %o0, %o1, %o3
2371    
2372     -1: EX_LD(LOAD(lduw, %o1, %g1))
2373     +1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)
2374     subcc %o2, 4, %o2
2375     - EX_ST(STORE(stw, %g1, %o1 + %o3))
2376     + EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4)
2377     bgu,pt %XCC, 1b
2378     add %o1, 4, %o1
2379    
2380     @@ -558,9 +671,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2381     mov EX_RETVAL(%o4), %o0
2382    
2383     .align 32
2384     -90: EX_LD(LOAD(ldub, %o1, %g1))
2385     +90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)
2386     subcc %o2, 1, %o2
2387     - EX_ST(STORE(stb, %g1, %o1 + %o3))
2388     + EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1)
2389     bgu,pt %XCC, 90b
2390     add %o1, 1, %o1
2391     retl
2392     diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S
2393     index 88ad73d86fe4..db73010a1af8 100644
2394     --- a/arch/sparc/lib/U3copy_from_user.S
2395     +++ b/arch/sparc/lib/U3copy_from_user.S
2396     @@ -3,19 +3,19 @@
2397     * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
2398     */
2399    
2400     -#define EX_LD(x) \
2401     +#define EX_LD(x,y) \
2402     98: x; \
2403     .section __ex_table,"a";\
2404     .align 4; \
2405     - .word 98b, __retl_one; \
2406     + .word 98b, y; \
2407     .text; \
2408     .align 4;
2409    
2410     -#define EX_LD_FP(x) \
2411     +#define EX_LD_FP(x,y) \
2412     98: x; \
2413     .section __ex_table,"a";\
2414     .align 4; \
2415     - .word 98b, __retl_one_fp;\
2416     + .word 98b, y##_fp; \
2417     .text; \
2418     .align 4;
2419    
2420     diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S
2421     index 845139d75537..c4ee858e352a 100644
2422     --- a/arch/sparc/lib/U3copy_to_user.S
2423     +++ b/arch/sparc/lib/U3copy_to_user.S
2424     @@ -3,19 +3,19 @@
2425     * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
2426     */
2427    
2428     -#define EX_ST(x) \
2429     +#define EX_ST(x,y) \
2430     98: x; \
2431     .section __ex_table,"a";\
2432     .align 4; \
2433     - .word 98b, __retl_one; \
2434     + .word 98b, y; \
2435     .text; \
2436     .align 4;
2437    
2438     -#define EX_ST_FP(x) \
2439     +#define EX_ST_FP(x,y) \
2440     98: x; \
2441     .section __ex_table,"a";\
2442     .align 4; \
2443     - .word 98b, __retl_one_fp;\
2444     + .word 98b, y##_fp; \
2445     .text; \
2446     .align 4;
2447    
2448     diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S
2449     index 491ee69e4995..54f98706b03b 100644
2450     --- a/arch/sparc/lib/U3memcpy.S
2451     +++ b/arch/sparc/lib/U3memcpy.S
2452     @@ -4,6 +4,7 @@
2453     */
2454    
2455     #ifdef __KERNEL__
2456     +#include <linux/linkage.h>
2457     #include <asm/visasm.h>
2458     #include <asm/asi.h>
2459     #define GLOBAL_SPARE %g7
2460     @@ -22,21 +23,17 @@
2461     #endif
2462    
2463     #ifndef EX_LD
2464     -#define EX_LD(x) x
2465     +#define EX_LD(x,y) x
2466     #endif
2467     #ifndef EX_LD_FP
2468     -#define EX_LD_FP(x) x
2469     +#define EX_LD_FP(x,y) x
2470     #endif
2471    
2472     #ifndef EX_ST
2473     -#define EX_ST(x) x
2474     +#define EX_ST(x,y) x
2475     #endif
2476     #ifndef EX_ST_FP
2477     -#define EX_ST_FP(x) x
2478     -#endif
2479     -
2480     -#ifndef EX_RETVAL
2481     -#define EX_RETVAL(x) x
2482     +#define EX_ST_FP(x,y) x
2483     #endif
2484    
2485     #ifndef LOAD
2486     @@ -77,6 +74,87 @@
2487     */
2488    
2489     .text
2490     +#ifndef EX_RETVAL
2491     +#define EX_RETVAL(x) x
2492     +__restore_fp:
2493     + VISExitHalf
2494     + retl
2495     + nop
2496     +ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
2497     + add %g1, 1, %g1
2498     + add %g2, %g1, %g2
2499     + ba,pt %xcc, __restore_fp
2500     + add %o2, %g2, %o0
2501     +ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
2502     +ENTRY(U3_retl_o2_plus_g2_fp)
2503     + ba,pt %xcc, __restore_fp
2504     + add %o2, %g2, %o0
2505     +ENDPROC(U3_retl_o2_plus_g2_fp)
2506     +ENTRY(U3_retl_o2_plus_g2_plus_8_fp)
2507     + add %g2, 8, %g2
2508     + ba,pt %xcc, __restore_fp
2509     + add %o2, %g2, %o0
2510     +ENDPROC(U3_retl_o2_plus_g2_plus_8_fp)
2511     +ENTRY(U3_retl_o2)
2512     + retl
2513     + mov %o2, %o0
2514     +ENDPROC(U3_retl_o2)
2515     +ENTRY(U3_retl_o2_plus_1)
2516     + retl
2517     + add %o2, 1, %o0
2518     +ENDPROC(U3_retl_o2_plus_1)
2519     +ENTRY(U3_retl_o2_plus_4)
2520     + retl
2521     + add %o2, 4, %o0
2522     +ENDPROC(U3_retl_o2_plus_4)
2523     +ENTRY(U3_retl_o2_plus_8)
2524     + retl
2525     + add %o2, 8, %o0
2526     +ENDPROC(U3_retl_o2_plus_8)
2527     +ENTRY(U3_retl_o2_plus_g1_plus_1)
2528     + add %g1, 1, %g1
2529     + retl
2530     + add %o2, %g1, %o0
2531     +ENDPROC(U3_retl_o2_plus_g1_plus_1)
2532     +ENTRY(U3_retl_o2_fp)
2533     + ba,pt %xcc, __restore_fp
2534     + mov %o2, %o0
2535     +ENDPROC(U3_retl_o2_fp)
2536     +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
2537     + sll %o3, 6, %o3
2538     + add %o3, 0x80, %o3
2539     + ba,pt %xcc, __restore_fp
2540     + add %o2, %o3, %o0
2541     +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
2542     +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
2543     + sll %o3, 6, %o3
2544     + add %o3, 0x40, %o3
2545     + ba,pt %xcc, __restore_fp
2546     + add %o2, %o3, %o0
2547     +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
2548     +ENTRY(U3_retl_o2_plus_GS_plus_0x10)
2549     + add GLOBAL_SPARE, 0x10, GLOBAL_SPARE
2550     + retl
2551     + add %o2, GLOBAL_SPARE, %o0
2552     +ENDPROC(U3_retl_o2_plus_GS_plus_0x10)
2553     +ENTRY(U3_retl_o2_plus_GS_plus_0x08)
2554     + add GLOBAL_SPARE, 0x08, GLOBAL_SPARE
2555     + retl
2556     + add %o2, GLOBAL_SPARE, %o0
2557     +ENDPROC(U3_retl_o2_plus_GS_plus_0x08)
2558     +ENTRY(U3_retl_o2_and_7_plus_GS)
2559     + and %o2, 7, %o2
2560     + retl
2561     + add %o2, GLOBAL_SPARE, %o2
2562     +ENDPROC(U3_retl_o2_and_7_plus_GS)
2563     +ENTRY(U3_retl_o2_and_7_plus_GS_plus_8)
2564     + add GLOBAL_SPARE, 8, GLOBAL_SPARE
2565     + and %o2, 7, %o2
2566     + retl
2567     + add %o2, GLOBAL_SPARE, %o2
2568     +ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8)
2569     +#endif
2570     +
2571     .align 64
2572    
2573     /* The cheetah's flexible spine, oversized liver, enlarged heart,
2574     @@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2575     and %g2, 0x38, %g2
2576    
2577     1: subcc %g1, 0x1, %g1
2578     - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
2579     - EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE))
2580     + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1)
2581     + EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1)
2582     bgu,pt %XCC, 1b
2583     add %o1, 0x1, %o1
2584    
2585     @@ -138,20 +216,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2586     be,pt %icc, 3f
2587     alignaddr %o1, %g0, %o1
2588    
2589     - EX_LD_FP(LOAD(ldd, %o1, %f4))
2590     -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
2591     + EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2)
2592     +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2)
2593     add %o1, 0x8, %o1
2594     subcc %g2, 0x8, %g2
2595     faligndata %f4, %f6, %f0
2596     - EX_ST_FP(STORE(std, %f0, %o0))
2597     + EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8)
2598     be,pn %icc, 3f
2599     add %o0, 0x8, %o0
2600    
2601     - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
2602     + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2)
2603     add %o1, 0x8, %o1
2604     subcc %g2, 0x8, %g2
2605     faligndata %f6, %f4, %f2
2606     - EX_ST_FP(STORE(std, %f2, %o0))
2607     + EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8)
2608     bne,pt %icc, 1b
2609     add %o0, 0x8, %o0
2610    
2611     @@ -161,25 +239,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2612     LOAD(prefetch, %o1 + 0x080, #one_read)
2613     LOAD(prefetch, %o1 + 0x0c0, #one_read)
2614     LOAD(prefetch, %o1 + 0x100, #one_read)
2615     - EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0))
2616     + EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2)
2617     LOAD(prefetch, %o1 + 0x140, #one_read)
2618     - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
2619     + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2)
2620     LOAD(prefetch, %o1 + 0x180, #one_read)
2621     - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
2622     + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2)
2623     LOAD(prefetch, %o1 + 0x1c0, #one_read)
2624     faligndata %f0, %f2, %f16
2625     - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
2626     + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2)
2627     faligndata %f2, %f4, %f18
2628     - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
2629     + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2)
2630     faligndata %f4, %f6, %f20
2631     - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
2632     + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2)
2633     faligndata %f6, %f8, %f22
2634    
2635     - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
2636     + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2)
2637     faligndata %f8, %f10, %f24
2638     - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
2639     + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2)
2640     faligndata %f10, %f12, %f26
2641     - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
2642     + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2)
2643    
2644     subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE
2645     add %o1, 0x40, %o1
2646     @@ -190,26 +268,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2647    
2648     .align 64
2649     1:
2650     - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
2651     + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2652     faligndata %f12, %f14, %f28
2653     - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
2654     + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2655     faligndata %f14, %f0, %f30
2656     - EX_ST_FP(STORE_BLK(%f16, %o0))
2657     - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
2658     + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2659     + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2660     faligndata %f0, %f2, %f16
2661     add %o0, 0x40, %o0
2662    
2663     - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
2664     + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2665     faligndata %f2, %f4, %f18
2666     - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
2667     + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2668     faligndata %f4, %f6, %f20
2669     - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
2670     + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2671     subcc %o3, 0x01, %o3
2672     faligndata %f6, %f8, %f22
2673     - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
2674     + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2675    
2676     faligndata %f8, %f10, %f24
2677     - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
2678     + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2679     LOAD(prefetch, %o1 + 0x1c0, #one_read)
2680     faligndata %f10, %f12, %f26
2681     bg,pt %XCC, 1b
2682     @@ -217,29 +295,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2683    
2684     /* Finally we copy the last full 64-byte block. */
2685     2:
2686     - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
2687     + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2688     faligndata %f12, %f14, %f28
2689     - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
2690     + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2691     faligndata %f14, %f0, %f30
2692     - EX_ST_FP(STORE_BLK(%f16, %o0))
2693     - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
2694     + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2695     + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2696     faligndata %f0, %f2, %f16
2697     - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
2698     + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2699     faligndata %f2, %f4, %f18
2700     - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
2701     + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2702     faligndata %f4, %f6, %f20
2703     - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
2704     + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2705     faligndata %f6, %f8, %f22
2706     - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
2707     + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2708     faligndata %f8, %f10, %f24
2709     cmp %g1, 0
2710     be,pt %XCC, 1f
2711     add %o0, 0x40, %o0
2712     - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
2713     + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2714     1: faligndata %f10, %f12, %f26
2715     faligndata %f12, %f14, %f28
2716     faligndata %f14, %f0, %f30
2717     - EX_ST_FP(STORE_BLK(%f16, %o0))
2718     + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2719     add %o0, 0x40, %o0
2720     add %o1, 0x40, %o1
2721     membar #Sync
2722     @@ -259,20 +337,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2723    
2724     sub %o2, %g2, %o2
2725     be,a,pt %XCC, 1f
2726     - EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0))
2727     + EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2)
2728    
2729     -1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2))
2730     +1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2)
2731     add %o1, 0x8, %o1
2732     subcc %g2, 0x8, %g2
2733     faligndata %f0, %f2, %f8
2734     - EX_ST_FP(STORE(std, %f8, %o0))
2735     + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
2736     be,pn %XCC, 2f
2737     add %o0, 0x8, %o0
2738     - EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0))
2739     + EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2)
2740     add %o1, 0x8, %o1
2741     subcc %g2, 0x8, %g2
2742     faligndata %f2, %f0, %f8
2743     - EX_ST_FP(STORE(std, %f8, %o0))
2744     + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
2745     bne,pn %XCC, 1b
2746     add %o0, 0x8, %o0
2747    
2748     @@ -292,30 +370,33 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2749     andcc %o2, 0x8, %g0
2750     be,pt %icc, 1f
2751     nop
2752     - EX_LD(LOAD(ldx, %o1, %o5))
2753     - EX_ST(STORE(stx, %o5, %o1 + %o3))
2754     + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2)
2755     + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2)
2756     add %o1, 0x8, %o1
2757     + sub %o2, 8, %o2
2758    
2759     1: andcc %o2, 0x4, %g0
2760     be,pt %icc, 1f
2761     nop
2762     - EX_LD(LOAD(lduw, %o1, %o5))
2763     - EX_ST(STORE(stw, %o5, %o1 + %o3))
2764     + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2)
2765     + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2)
2766     add %o1, 0x4, %o1
2767     + sub %o2, 4, %o2
2768    
2769     1: andcc %o2, 0x2, %g0
2770     be,pt %icc, 1f
2771     nop
2772     - EX_LD(LOAD(lduh, %o1, %o5))
2773     - EX_ST(STORE(sth, %o5, %o1 + %o3))
2774     + EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2)
2775     + EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2)
2776     add %o1, 0x2, %o1
2777     + sub %o2, 2, %o2
2778    
2779     1: andcc %o2, 0x1, %g0
2780     be,pt %icc, 85f
2781     nop
2782     - EX_LD(LOAD(ldub, %o1, %o5))
2783     + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2)
2784     ba,pt %xcc, 85f
2785     - EX_ST(STORE(stb, %o5, %o1 + %o3))
2786     + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2)
2787    
2788     .align 64
2789     70: /* 16 < len <= 64 */
2790     @@ -326,26 +407,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2791     andn %o2, 0xf, GLOBAL_SPARE
2792     and %o2, 0xf, %o2
2793     1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE
2794     - EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
2795     - EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
2796     - EX_ST(STORE(stx, %o5, %o1 + %o3))
2797     + EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10)
2798     + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10)
2799     + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10)
2800     add %o1, 0x8, %o1
2801     - EX_ST(STORE(stx, %g1, %o1 + %o3))
2802     + EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08)
2803     bgu,pt %XCC, 1b
2804     add %o1, 0x8, %o1
2805     73: andcc %o2, 0x8, %g0
2806     be,pt %XCC, 1f
2807     nop
2808     sub %o2, 0x8, %o2
2809     - EX_LD(LOAD(ldx, %o1, %o5))
2810     - EX_ST(STORE(stx, %o5, %o1 + %o3))
2811     + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8)
2812     + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8)
2813     add %o1, 0x8, %o1
2814     1: andcc %o2, 0x4, %g0
2815     be,pt %XCC, 1f
2816     nop
2817     sub %o2, 0x4, %o2
2818     - EX_LD(LOAD(lduw, %o1, %o5))
2819     - EX_ST(STORE(stw, %o5, %o1 + %o3))
2820     + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4)
2821     + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4)
2822     add %o1, 0x4, %o1
2823     1: cmp %o2, 0
2824     be,pt %XCC, 85f
2825     @@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2826     sub %o2, %g1, %o2
2827    
2828     1: subcc %g1, 1, %g1
2829     - EX_LD(LOAD(ldub, %o1, %o5))
2830     - EX_ST(STORE(stb, %o5, %o1 + %o3))
2831     + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1)
2832     + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1)
2833     bgu,pt %icc, 1b
2834     add %o1, 1, %o1
2835    
2836     @@ -378,16 +459,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2837    
2838     8: mov 64, %o3
2839     andn %o1, 0x7, %o1
2840     - EX_LD(LOAD(ldx, %o1, %g2))
2841     + EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2)
2842     sub %o3, %g1, %o3
2843     andn %o2, 0x7, GLOBAL_SPARE
2844     sllx %g2, %g1, %g2
2845     -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
2846     +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS)
2847     subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE
2848     add %o1, 0x8, %o1
2849     srlx %g3, %o3, %o5
2850     or %o5, %g2, %o5
2851     - EX_ST(STORE(stx, %o5, %o0))
2852     + EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8)
2853     add %o0, 0x8, %o0
2854     bgu,pt %icc, 1b
2855     sllx %g3, %g1, %g2
2856     @@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2857    
2858     1:
2859     subcc %o2, 4, %o2
2860     - EX_LD(LOAD(lduw, %o1, %g1))
2861     - EX_ST(STORE(stw, %g1, %o1 + %o3))
2862     + EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4)
2863     + EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4)
2864     bgu,pt %XCC, 1b
2865     add %o1, 4, %o1
2866    
2867     @@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2868     .align 32
2869     90:
2870     subcc %o2, 1, %o2
2871     - EX_LD(LOAD(ldub, %o1, %g1))
2872     - EX_ST(STORE(stb, %g1, %o1 + %o3))
2873     + EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1)
2874     + EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1)
2875     bgu,pt %XCC, 90b
2876     add %o1, 1, %o1
2877     retl
2878     diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S
2879     index 302c0e60dc2c..4c89b486fa0d 100644
2880     --- a/arch/sparc/lib/copy_in_user.S
2881     +++ b/arch/sparc/lib/copy_in_user.S
2882     @@ -8,18 +8,33 @@
2883    
2884     #define XCC xcc
2885    
2886     -#define EX(x,y) \
2887     +#define EX(x,y,z) \
2888     98: x,y; \
2889     .section __ex_table,"a";\
2890     .align 4; \
2891     - .word 98b, __retl_one; \
2892     + .word 98b, z; \
2893     .text; \
2894     .align 4;
2895    
2896     +#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8)
2897     +#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4)
2898     +#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1)
2899     +
2900     .register %g2,#scratch
2901     .register %g3,#scratch
2902    
2903     .text
2904     +__retl_o4_plus_8:
2905     + add %o4, %o2, %o4
2906     + retl
2907     + add %o4, 8, %o0
2908     +__retl_o2_plus_4:
2909     + retl
2910     + add %o2, 4, %o0
2911     +__retl_o2_plus_1:
2912     + retl
2913     + add %o2, 1, %o0
2914     +
2915     .align 32
2916    
2917     /* Don't try to get too fancy here, just nice and
2918     @@ -44,8 +59,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
2919     andn %o2, 0x7, %o4
2920     and %o2, 0x7, %o2
2921     1: subcc %o4, 0x8, %o4
2922     - EX(ldxa [%o1] %asi, %o5)
2923     - EX(stxa %o5, [%o0] %asi)
2924     + EX_O4(ldxa [%o1] %asi, %o5)
2925     + EX_O4(stxa %o5, [%o0] %asi)
2926     add %o1, 0x8, %o1
2927     bgu,pt %XCC, 1b
2928     add %o0, 0x8, %o0
2929     @@ -53,8 +68,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
2930     be,pt %XCC, 1f
2931     nop
2932     sub %o2, 0x4, %o2
2933     - EX(lduwa [%o1] %asi, %o5)
2934     - EX(stwa %o5, [%o0] %asi)
2935     + EX_O2_4(lduwa [%o1] %asi, %o5)
2936     + EX_O2_4(stwa %o5, [%o0] %asi)
2937     add %o1, 0x4, %o1
2938     add %o0, 0x4, %o0
2939     1: cmp %o2, 0
2940     @@ -70,8 +85,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
2941    
2942     82:
2943     subcc %o2, 4, %o2
2944     - EX(lduwa [%o1] %asi, %g1)
2945     - EX(stwa %g1, [%o0] %asi)
2946     + EX_O2_4(lduwa [%o1] %asi, %g1)
2947     + EX_O2_4(stwa %g1, [%o0] %asi)
2948     add %o1, 4, %o1
2949     bgu,pt %XCC, 82b
2950     add %o0, 4, %o0
2951     @@ -82,8 +97,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
2952     .align 32
2953     90:
2954     subcc %o2, 1, %o2
2955     - EX(lduba [%o1] %asi, %g1)
2956     - EX(stba %g1, [%o0] %asi)
2957     + EX_O2_1(lduba [%o1] %asi, %g1)
2958     + EX_O2_1(stba %g1, [%o0] %asi)
2959     add %o1, 1, %o1
2960     bgu,pt %XCC, 90b
2961     add %o0, 1, %o0
2962     diff --git a/arch/sparc/lib/user_fixup.c b/arch/sparc/lib/user_fixup.c
2963     deleted file mode 100644
2964     index ac96ae236709..000000000000
2965     --- a/arch/sparc/lib/user_fixup.c
2966     +++ /dev/null
2967     @@ -1,71 +0,0 @@
2968     -/* user_fixup.c: Fix up user copy faults.
2969     - *
2970     - * Copyright (C) 2004 David S. Miller <davem@redhat.com>
2971     - */
2972     -
2973     -#include <linux/compiler.h>
2974     -#include <linux/kernel.h>
2975     -#include <linux/string.h>
2976     -#include <linux/errno.h>
2977     -#include <linux/module.h>
2978     -
2979     -#include <asm/uaccess.h>
2980     -
2981     -/* Calculating the exact fault address when using
2982     - * block loads and stores can be very complicated.
2983     - *
2984     - * Instead of trying to be clever and handling all
2985     - * of the cases, just fix things up simply here.
2986     - */
2987     -
2988     -static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset)
2989     -{
2990     - unsigned long fault_addr = current_thread_info()->fault_address;
2991     - unsigned long end = start + size;
2992     -
2993     - if (fault_addr < start || fault_addr >= end) {
2994     - *offset = 0;
2995     - } else {
2996     - *offset = fault_addr - start;
2997     - size = end - fault_addr;
2998     - }
2999     - return size;
3000     -}
3001     -
3002     -unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size)
3003     -{
3004     - unsigned long offset;
3005     -
3006     - size = compute_size((unsigned long) from, size, &offset);
3007     - if (likely(size))
3008     - memset(to + offset, 0, size);
3009     -
3010     - return size;
3011     -}
3012     -EXPORT_SYMBOL(copy_from_user_fixup);
3013     -
3014     -unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size)
3015     -{
3016     - unsigned long offset;
3017     -
3018     - return compute_size((unsigned long) to, size, &offset);
3019     -}
3020     -EXPORT_SYMBOL(copy_to_user_fixup);
3021     -
3022     -unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size)
3023     -{
3024     - unsigned long fault_addr = current_thread_info()->fault_address;
3025     - unsigned long start = (unsigned long) to;
3026     - unsigned long end = start + size;
3027     -
3028     - if (fault_addr >= start && fault_addr < end)
3029     - return end - fault_addr;
3030     -
3031     - start = (unsigned long) from;
3032     - end = start + size;
3033     - if (fault_addr >= start && fault_addr < end)
3034     - return end - fault_addr;
3035     -
3036     - return size;
3037     -}
3038     -EXPORT_SYMBOL(copy_in_user_fixup);
3039     diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
3040     index f2b77112e9d8..e20fbbafb0b0 100644
3041     --- a/arch/sparc/mm/tsb.c
3042     +++ b/arch/sparc/mm/tsb.c
3043     @@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned long vaddr)
3044     return (tag == (vaddr >> 22));
3045     }
3046    
3047     +static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end)
3048     +{
3049     + unsigned long idx;
3050     +
3051     + for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) {
3052     + struct tsb *ent = &swapper_tsb[idx];
3053     + unsigned long match = idx << 13;
3054     +
3055     + match |= (ent->tag << 22);
3056     + if (match >= start && match < end)
3057     + ent->tag = (1UL << TSB_TAG_INVALID_BIT);
3058     + }
3059     +}
3060     +
3061     /* TSB flushes need only occur on the processor initiating the address
3062     * space modification, not on each cpu the address space has run on.
3063     * Only the TLB flush needs that treatment.
3064     @@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
3065     {
3066     unsigned long v;
3067    
3068     + if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES)
3069     + return flush_tsb_kernel_range_scan(start, end);
3070     +
3071     for (v = start; v < end; v += PAGE_SIZE) {
3072     unsigned long hash = tsb_hash(v, PAGE_SHIFT,
3073     KERNEL_TSB_NENTRIES);
3074     diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S
3075     index b4f4733abc6e..5d2fd6cd3189 100644
3076     --- a/arch/sparc/mm/ultra.S
3077     +++ b/arch/sparc/mm/ultra.S
3078     @@ -30,7 +30,7 @@
3079     .text
3080     .align 32
3081     .globl __flush_tlb_mm
3082     -__flush_tlb_mm: /* 18 insns */
3083     +__flush_tlb_mm: /* 19 insns */
3084     /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
3085     ldxa [%o1] ASI_DMMU, %g2
3086     cmp %g2, %o0
3087     @@ -81,7 +81,7 @@ __flush_tlb_page: /* 22 insns */
3088    
3089     .align 32
3090     .globl __flush_tlb_pending
3091     -__flush_tlb_pending: /* 26 insns */
3092     +__flush_tlb_pending: /* 27 insns */
3093     /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
3094     rdpr %pstate, %g7
3095     sllx %o1, 3, %o1
3096     @@ -113,12 +113,14 @@ __flush_tlb_pending: /* 26 insns */
3097    
3098     .align 32
3099     .globl __flush_tlb_kernel_range
3100     -__flush_tlb_kernel_range: /* 16 insns */
3101     +__flush_tlb_kernel_range: /* 31 insns */
3102     /* %o0=start, %o1=end */
3103     cmp %o0, %o1
3104     be,pn %xcc, 2f
3105     + sub %o1, %o0, %o3
3106     + srlx %o3, 18, %o4
3107     + brnz,pn %o4, __spitfire_flush_tlb_kernel_range_slow
3108     sethi %hi(PAGE_SIZE), %o4
3109     - sub %o1, %o0, %o3
3110     sub %o3, %o4, %o3
3111     or %o0, 0x20, %o0 ! Nucleus
3112     1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
3113     @@ -131,6 +133,41 @@ __flush_tlb_kernel_range: /* 16 insns */
3114     retl
3115     nop
3116     nop
3117     + nop
3118     + nop
3119     + nop
3120     + nop
3121     + nop
3122     + nop
3123     + nop
3124     + nop
3125     + nop
3126     + nop
3127     + nop
3128     + nop
3129     + nop
3130     +
3131     +__spitfire_flush_tlb_kernel_range_slow:
3132     + mov 63 * 8, %o4
3133     +1: ldxa [%o4] ASI_ITLB_DATA_ACCESS, %o3
3134     + andcc %o3, 0x40, %g0 /* _PAGE_L_4U */
3135     + bne,pn %xcc, 2f
3136     + mov TLB_TAG_ACCESS, %o3
3137     + stxa %g0, [%o3] ASI_IMMU
3138     + stxa %g0, [%o4] ASI_ITLB_DATA_ACCESS
3139     + membar #Sync
3140     +2: ldxa [%o4] ASI_DTLB_DATA_ACCESS, %o3
3141     + andcc %o3, 0x40, %g0
3142     + bne,pn %xcc, 2f
3143     + mov TLB_TAG_ACCESS, %o3
3144     + stxa %g0, [%o3] ASI_DMMU
3145     + stxa %g0, [%o4] ASI_DTLB_DATA_ACCESS
3146     + membar #Sync
3147     +2: sub %o4, 8, %o4
3148     + brgez,pt %o4, 1b
3149     + nop
3150     + retl
3151     + nop
3152    
3153     __spitfire_flush_tlb_mm_slow:
3154     rdpr %pstate, %g1
3155     @@ -285,6 +322,40 @@ __cheetah_flush_tlb_pending: /* 27 insns */
3156     retl
3157     wrpr %g7, 0x0, %pstate
3158    
3159     +__cheetah_flush_tlb_kernel_range: /* 31 insns */
3160     + /* %o0=start, %o1=end */
3161     + cmp %o0, %o1
3162     + be,pn %xcc, 2f
3163     + sub %o1, %o0, %o3
3164     + srlx %o3, 18, %o4
3165     + brnz,pn %o4, 3f
3166     + sethi %hi(PAGE_SIZE), %o4
3167     + sub %o3, %o4, %o3
3168     + or %o0, 0x20, %o0 ! Nucleus
3169     +1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
3170     + stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP
3171     + membar #Sync
3172     + brnz,pt %o3, 1b
3173     + sub %o3, %o4, %o3
3174     +2: sethi %hi(KERNBASE), %o3
3175     + flush %o3
3176     + retl
3177     + nop
3178     +3: mov 0x80, %o4
3179     + stxa %g0, [%o4] ASI_DMMU_DEMAP
3180     + membar #Sync
3181     + stxa %g0, [%o4] ASI_IMMU_DEMAP
3182     + membar #Sync
3183     + retl
3184     + nop
3185     + nop
3186     + nop
3187     + nop
3188     + nop
3189     + nop
3190     + nop
3191     + nop
3192     +
3193     #ifdef DCACHE_ALIASING_POSSIBLE
3194     __cheetah_flush_dcache_page: /* 11 insns */
3195     sethi %hi(PAGE_OFFSET), %g1
3196     @@ -309,19 +380,28 @@ __hypervisor_tlb_tl0_error:
3197     ret
3198     restore
3199    
3200     -__hypervisor_flush_tlb_mm: /* 10 insns */
3201     +__hypervisor_flush_tlb_mm: /* 19 insns */
3202     mov %o0, %o2 /* ARG2: mmu context */
3203     mov 0, %o0 /* ARG0: CPU lists unimplemented */
3204     mov 0, %o1 /* ARG1: CPU lists unimplemented */
3205     mov HV_MMU_ALL, %o3 /* ARG3: flags */
3206     mov HV_FAST_MMU_DEMAP_CTX, %o5
3207     ta HV_FAST_TRAP
3208     - brnz,pn %o0, __hypervisor_tlb_tl0_error
3209     + brnz,pn %o0, 1f
3210     mov HV_FAST_MMU_DEMAP_CTX, %o1
3211     retl
3212     nop
3213     +1: sethi %hi(__hypervisor_tlb_tl0_error), %o5
3214     + jmpl %o5 + %lo(__hypervisor_tlb_tl0_error), %g0
3215     + nop
3216     + nop
3217     + nop
3218     + nop
3219     + nop
3220     + nop
3221     + nop
3222    
3223     -__hypervisor_flush_tlb_page: /* 11 insns */
3224     +__hypervisor_flush_tlb_page: /* 22 insns */
3225     /* %o0 = context, %o1 = vaddr */
3226     mov %o0, %g2
3227     mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */
3228     @@ -330,12 +410,23 @@ __hypervisor_flush_tlb_page: /* 11 insns */
3229     srlx %o0, PAGE_SHIFT, %o0
3230     sllx %o0, PAGE_SHIFT, %o0
3231     ta HV_MMU_UNMAP_ADDR_TRAP
3232     - brnz,pn %o0, __hypervisor_tlb_tl0_error
3233     + brnz,pn %o0, 1f
3234     mov HV_MMU_UNMAP_ADDR_TRAP, %o1
3235     retl
3236     nop
3237     +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2
3238     + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
3239     + nop
3240     + nop
3241     + nop
3242     + nop
3243     + nop
3244     + nop
3245     + nop
3246     + nop
3247     + nop
3248    
3249     -__hypervisor_flush_tlb_pending: /* 16 insns */
3250     +__hypervisor_flush_tlb_pending: /* 27 insns */
3251     /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
3252     sllx %o1, 3, %g1
3253     mov %o2, %g2
3254     @@ -347,31 +438,57 @@ __hypervisor_flush_tlb_pending: /* 16 insns */
3255     srlx %o0, PAGE_SHIFT, %o0
3256     sllx %o0, PAGE_SHIFT, %o0
3257     ta HV_MMU_UNMAP_ADDR_TRAP
3258     - brnz,pn %o0, __hypervisor_tlb_tl0_error
3259     + brnz,pn %o0, 1f
3260     mov HV_MMU_UNMAP_ADDR_TRAP, %o1
3261     brnz,pt %g1, 1b
3262     nop
3263     retl
3264     nop
3265     +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2
3266     + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
3267     + nop
3268     + nop
3269     + nop
3270     + nop
3271     + nop
3272     + nop
3273     + nop
3274     + nop
3275     + nop
3276    
3277     -__hypervisor_flush_tlb_kernel_range: /* 16 insns */
3278     +__hypervisor_flush_tlb_kernel_range: /* 31 insns */
3279     /* %o0=start, %o1=end */
3280     cmp %o0, %o1
3281     be,pn %xcc, 2f
3282     - sethi %hi(PAGE_SIZE), %g3
3283     - mov %o0, %g1
3284     - sub %o1, %g1, %g2
3285     + sub %o1, %o0, %g2
3286     + srlx %g2, 18, %g3
3287     + brnz,pn %g3, 4f
3288     + mov %o0, %g1
3289     + sethi %hi(PAGE_SIZE), %g3
3290     sub %g2, %g3, %g2
3291     1: add %g1, %g2, %o0 /* ARG0: virtual address */
3292     mov 0, %o1 /* ARG1: mmu context */
3293     mov HV_MMU_ALL, %o2 /* ARG2: flags */
3294     ta HV_MMU_UNMAP_ADDR_TRAP
3295     - brnz,pn %o0, __hypervisor_tlb_tl0_error
3296     + brnz,pn %o0, 3f
3297     mov HV_MMU_UNMAP_ADDR_TRAP, %o1
3298     brnz,pt %g2, 1b
3299     sub %g2, %g3, %g2
3300     2: retl
3301     nop
3302     +3: sethi %hi(__hypervisor_tlb_tl0_error), %o2
3303     + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
3304     + nop
3305     +4: mov 0, %o0 /* ARG0: CPU lists unimplemented */
3306     + mov 0, %o1 /* ARG1: CPU lists unimplemented */
3307     + mov 0, %o2 /* ARG2: mmu context == nucleus */
3308     + mov HV_MMU_ALL, %o3 /* ARG3: flags */
3309     + mov HV_FAST_MMU_DEMAP_CTX, %o5
3310     + ta HV_FAST_TRAP
3311     + brnz,pn %o0, 3b
3312     + mov HV_FAST_MMU_DEMAP_CTX, %o1
3313     + retl
3314     + nop
3315    
3316     #ifdef DCACHE_ALIASING_POSSIBLE
3317     /* XXX Niagara and friends have an 8K cache, so no aliasing is
3318     @@ -394,43 +511,6 @@ tlb_patch_one:
3319     retl
3320     nop
3321    
3322     - .globl cheetah_patch_cachetlbops
3323     -cheetah_patch_cachetlbops:
3324     - save %sp, -128, %sp
3325     -
3326     - sethi %hi(__flush_tlb_mm), %o0
3327     - or %o0, %lo(__flush_tlb_mm), %o0
3328     - sethi %hi(__cheetah_flush_tlb_mm), %o1
3329     - or %o1, %lo(__cheetah_flush_tlb_mm), %o1
3330     - call tlb_patch_one
3331     - mov 19, %o2
3332     -
3333     - sethi %hi(__flush_tlb_page), %o0
3334     - or %o0, %lo(__flush_tlb_page), %o0
3335     - sethi %hi(__cheetah_flush_tlb_page), %o1
3336     - or %o1, %lo(__cheetah_flush_tlb_page), %o1
3337     - call tlb_patch_one
3338     - mov 22, %o2
3339     -
3340     - sethi %hi(__flush_tlb_pending), %o0
3341     - or %o0, %lo(__flush_tlb_pending), %o0
3342     - sethi %hi(__cheetah_flush_tlb_pending), %o1
3343     - or %o1, %lo(__cheetah_flush_tlb_pending), %o1
3344     - call tlb_patch_one
3345     - mov 27, %o2
3346     -
3347     -#ifdef DCACHE_ALIASING_POSSIBLE
3348     - sethi %hi(__flush_dcache_page), %o0
3349     - or %o0, %lo(__flush_dcache_page), %o0
3350     - sethi %hi(__cheetah_flush_dcache_page), %o1
3351     - or %o1, %lo(__cheetah_flush_dcache_page), %o1
3352     - call tlb_patch_one
3353     - mov 11, %o2
3354     -#endif /* DCACHE_ALIASING_POSSIBLE */
3355     -
3356     - ret
3357     - restore
3358     -
3359     #ifdef CONFIG_SMP
3360     /* These are all called by the slaves of a cross call, at
3361     * trap level 1, with interrupts fully disabled.
3362     @@ -447,7 +527,7 @@ cheetah_patch_cachetlbops:
3363     */
3364     .align 32
3365     .globl xcall_flush_tlb_mm
3366     -xcall_flush_tlb_mm: /* 21 insns */
3367     +xcall_flush_tlb_mm: /* 24 insns */
3368     mov PRIMARY_CONTEXT, %g2
3369     ldxa [%g2] ASI_DMMU, %g3
3370     srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4
3371     @@ -469,9 +549,12 @@ xcall_flush_tlb_mm: /* 21 insns */
3372     nop
3373     nop
3374     nop
3375     + nop
3376     + nop
3377     + nop
3378    
3379     .globl xcall_flush_tlb_page
3380     -xcall_flush_tlb_page: /* 17 insns */
3381     +xcall_flush_tlb_page: /* 20 insns */
3382     /* %g5=context, %g1=vaddr */
3383     mov PRIMARY_CONTEXT, %g4
3384     ldxa [%g4] ASI_DMMU, %g2
3385     @@ -490,15 +573,20 @@ xcall_flush_tlb_page: /* 17 insns */
3386     retry
3387     nop
3388     nop
3389     + nop
3390     + nop
3391     + nop
3392    
3393     .globl xcall_flush_tlb_kernel_range
3394     -xcall_flush_tlb_kernel_range: /* 25 insns */
3395     +xcall_flush_tlb_kernel_range: /* 44 insns */
3396     sethi %hi(PAGE_SIZE - 1), %g2
3397     or %g2, %lo(PAGE_SIZE - 1), %g2
3398     andn %g1, %g2, %g1
3399     andn %g7, %g2, %g7
3400     sub %g7, %g1, %g3
3401     - add %g2, 1, %g2
3402     + srlx %g3, 18, %g2
3403     + brnz,pn %g2, 2f
3404     + add %g2, 1, %g2
3405     sub %g3, %g2, %g3
3406     or %g1, 0x20, %g1 ! Nucleus
3407     1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
3408     @@ -507,8 +595,25 @@ xcall_flush_tlb_kernel_range: /* 25 insns */
3409     brnz,pt %g3, 1b
3410     sub %g3, %g2, %g3
3411     retry
3412     - nop
3413     - nop
3414     +2: mov 63 * 8, %g1
3415     +1: ldxa [%g1] ASI_ITLB_DATA_ACCESS, %g2
3416     + andcc %g2, 0x40, %g0 /* _PAGE_L_4U */
3417     + bne,pn %xcc, 2f
3418     + mov TLB_TAG_ACCESS, %g2
3419     + stxa %g0, [%g2] ASI_IMMU
3420     + stxa %g0, [%g1] ASI_ITLB_DATA_ACCESS
3421     + membar #Sync
3422     +2: ldxa [%g1] ASI_DTLB_DATA_ACCESS, %g2
3423     + andcc %g2, 0x40, %g0
3424     + bne,pn %xcc, 2f
3425     + mov TLB_TAG_ACCESS, %g2
3426     + stxa %g0, [%g2] ASI_DMMU
3427     + stxa %g0, [%g1] ASI_DTLB_DATA_ACCESS
3428     + membar #Sync
3429     +2: sub %g1, 8, %g1
3430     + brgez,pt %g1, 1b
3431     + nop
3432     + retry
3433     nop
3434     nop
3435     nop
3436     @@ -637,6 +742,52 @@ xcall_fetch_glob_pmu_n4:
3437    
3438     retry
3439    
3440     +__cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */
3441     + sethi %hi(PAGE_SIZE - 1), %g2
3442     + or %g2, %lo(PAGE_SIZE - 1), %g2
3443     + andn %g1, %g2, %g1
3444     + andn %g7, %g2, %g7
3445     + sub %g7, %g1, %g3
3446     + srlx %g3, 18, %g2
3447     + brnz,pn %g2, 2f
3448     + add %g2, 1, %g2
3449     + sub %g3, %g2, %g3
3450     + or %g1, 0x20, %g1 ! Nucleus
3451     +1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
3452     + stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP
3453     + membar #Sync
3454     + brnz,pt %g3, 1b
3455     + sub %g3, %g2, %g3
3456     + retry
3457     +2: mov 0x80, %g2
3458     + stxa %g0, [%g2] ASI_DMMU_DEMAP
3459     + membar #Sync
3460     + stxa %g0, [%g2] ASI_IMMU_DEMAP
3461     + membar #Sync
3462     + retry
3463     + nop
3464     + nop
3465     + nop
3466     + nop
3467     + nop
3468     + nop
3469     + nop
3470     + nop
3471     + nop
3472     + nop
3473     + nop
3474     + nop
3475     + nop
3476     + nop
3477     + nop
3478     + nop
3479     + nop
3480     + nop
3481     + nop
3482     + nop
3483     + nop
3484     + nop
3485     +
3486     #ifdef DCACHE_ALIASING_POSSIBLE
3487     .align 32
3488     .globl xcall_flush_dcache_page_cheetah
3489     @@ -700,7 +851,7 @@ __hypervisor_tlb_xcall_error:
3490     ba,a,pt %xcc, rtrap
3491    
3492     .globl __hypervisor_xcall_flush_tlb_mm
3493     -__hypervisor_xcall_flush_tlb_mm: /* 21 insns */
3494     +__hypervisor_xcall_flush_tlb_mm: /* 24 insns */
3495     /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */
3496     mov %o0, %g2
3497     mov %o1, %g3
3498     @@ -714,7 +865,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
3499     mov HV_FAST_MMU_DEMAP_CTX, %o5
3500     ta HV_FAST_TRAP
3501     mov HV_FAST_MMU_DEMAP_CTX, %g6
3502     - brnz,pn %o0, __hypervisor_tlb_xcall_error
3503     + brnz,pn %o0, 1f
3504     mov %o0, %g5
3505     mov %g2, %o0
3506     mov %g3, %o1
3507     @@ -723,9 +874,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
3508     mov %g7, %o5
3509     membar #Sync
3510     retry
3511     +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
3512     + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
3513     + nop
3514    
3515     .globl __hypervisor_xcall_flush_tlb_page
3516     -__hypervisor_xcall_flush_tlb_page: /* 17 insns */
3517     +__hypervisor_xcall_flush_tlb_page: /* 20 insns */
3518     /* %g5=ctx, %g1=vaddr */
3519     mov %o0, %g2
3520     mov %o1, %g3
3521     @@ -737,42 +891,64 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */
3522     sllx %o0, PAGE_SHIFT, %o0
3523     ta HV_MMU_UNMAP_ADDR_TRAP
3524     mov HV_MMU_UNMAP_ADDR_TRAP, %g6
3525     - brnz,a,pn %o0, __hypervisor_tlb_xcall_error
3526     + brnz,a,pn %o0, 1f
3527     mov %o0, %g5
3528     mov %g2, %o0
3529     mov %g3, %o1
3530     mov %g4, %o2
3531     membar #Sync
3532     retry
3533     +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
3534     + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
3535     + nop
3536    
3537     .globl __hypervisor_xcall_flush_tlb_kernel_range
3538     -__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */
3539     +__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */
3540     /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */
3541     sethi %hi(PAGE_SIZE - 1), %g2
3542     or %g2, %lo(PAGE_SIZE - 1), %g2
3543     andn %g1, %g2, %g1
3544     andn %g7, %g2, %g7
3545     sub %g7, %g1, %g3
3546     + srlx %g3, 18, %g7
3547     add %g2, 1, %g2
3548     sub %g3, %g2, %g3
3549     mov %o0, %g2
3550     mov %o1, %g4
3551     - mov %o2, %g7
3552     + brnz,pn %g7, 2f
3553     + mov %o2, %g7
3554     1: add %g1, %g3, %o0 /* ARG0: virtual address */
3555     mov 0, %o1 /* ARG1: mmu context */
3556     mov HV_MMU_ALL, %o2 /* ARG2: flags */
3557     ta HV_MMU_UNMAP_ADDR_TRAP
3558     mov HV_MMU_UNMAP_ADDR_TRAP, %g6
3559     - brnz,pn %o0, __hypervisor_tlb_xcall_error
3560     + brnz,pn %o0, 1f
3561     mov %o0, %g5
3562     sethi %hi(PAGE_SIZE), %o2
3563     brnz,pt %g3, 1b
3564     sub %g3, %o2, %g3
3565     - mov %g2, %o0
3566     +5: mov %g2, %o0
3567     mov %g4, %o1
3568     mov %g7, %o2
3569     membar #Sync
3570     retry
3571     +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
3572     + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
3573     + nop
3574     +2: mov %o3, %g1
3575     + mov %o5, %g3
3576     + mov 0, %o0 /* ARG0: CPU lists unimplemented */
3577     + mov 0, %o1 /* ARG1: CPU lists unimplemented */
3578     + mov 0, %o2 /* ARG2: mmu context == nucleus */
3579     + mov HV_MMU_ALL, %o3 /* ARG3: flags */
3580     + mov HV_FAST_MMU_DEMAP_CTX, %o5
3581     + ta HV_FAST_TRAP
3582     + mov %g1, %o3
3583     + brz,pt %o0, 5b
3584     + mov %g3, %o5
3585     + mov HV_FAST_MMU_DEMAP_CTX, %g6
3586     + ba,pt %xcc, 1b
3587     + clr %g5
3588    
3589     /* These just get rescheduled to PIL vectors. */
3590     .globl xcall_call_function
3591     @@ -809,6 +985,58 @@ xcall_kgdb_capture:
3592    
3593     #endif /* CONFIG_SMP */
3594    
3595     + .globl cheetah_patch_cachetlbops
3596     +cheetah_patch_cachetlbops:
3597     + save %sp, -128, %sp
3598     +
3599     + sethi %hi(__flush_tlb_mm), %o0
3600     + or %o0, %lo(__flush_tlb_mm), %o0
3601     + sethi %hi(__cheetah_flush_tlb_mm), %o1
3602     + or %o1, %lo(__cheetah_flush_tlb_mm), %o1
3603     + call tlb_patch_one
3604     + mov 19, %o2
3605     +
3606     + sethi %hi(__flush_tlb_page), %o0
3607     + or %o0, %lo(__flush_tlb_page), %o0
3608     + sethi %hi(__cheetah_flush_tlb_page), %o1
3609     + or %o1, %lo(__cheetah_flush_tlb_page), %o1
3610     + call tlb_patch_one
3611     + mov 22, %o2
3612     +
3613     + sethi %hi(__flush_tlb_pending), %o0
3614     + or %o0, %lo(__flush_tlb_pending), %o0
3615     + sethi %hi(__cheetah_flush_tlb_pending), %o1
3616     + or %o1, %lo(__cheetah_flush_tlb_pending), %o1
3617     + call tlb_patch_one
3618     + mov 27, %o2
3619     +
3620     + sethi %hi(__flush_tlb_kernel_range), %o0
3621     + or %o0, %lo(__flush_tlb_kernel_range), %o0
3622     + sethi %hi(__cheetah_flush_tlb_kernel_range), %o1
3623     + or %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1
3624     + call tlb_patch_one
3625     + mov 31, %o2
3626     +
3627     +#ifdef DCACHE_ALIASING_POSSIBLE
3628     + sethi %hi(__flush_dcache_page), %o0
3629     + or %o0, %lo(__flush_dcache_page), %o0
3630     + sethi %hi(__cheetah_flush_dcache_page), %o1
3631     + or %o1, %lo(__cheetah_flush_dcache_page), %o1
3632     + call tlb_patch_one
3633     + mov 11, %o2
3634     +#endif /* DCACHE_ALIASING_POSSIBLE */
3635     +
3636     +#ifdef CONFIG_SMP
3637     + sethi %hi(xcall_flush_tlb_kernel_range), %o0
3638     + or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
3639     + sethi %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1
3640     + or %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1
3641     + call tlb_patch_one
3642     + mov 44, %o2
3643     +#endif /* CONFIG_SMP */
3644     +
3645     + ret
3646     + restore
3647    
3648     .globl hypervisor_patch_cachetlbops
3649     hypervisor_patch_cachetlbops:
3650     @@ -819,28 +1047,28 @@ hypervisor_patch_cachetlbops:
3651     sethi %hi(__hypervisor_flush_tlb_mm), %o1
3652     or %o1, %lo(__hypervisor_flush_tlb_mm), %o1
3653     call tlb_patch_one
3654     - mov 10, %o2
3655     + mov 19, %o2
3656    
3657     sethi %hi(__flush_tlb_page), %o0
3658     or %o0, %lo(__flush_tlb_page), %o0
3659     sethi %hi(__hypervisor_flush_tlb_page), %o1
3660     or %o1, %lo(__hypervisor_flush_tlb_page), %o1
3661     call tlb_patch_one
3662     - mov 11, %o2
3663     + mov 22, %o2
3664    
3665     sethi %hi(__flush_tlb_pending), %o0
3666     or %o0, %lo(__flush_tlb_pending), %o0
3667     sethi %hi(__hypervisor_flush_tlb_pending), %o1
3668     or %o1, %lo(__hypervisor_flush_tlb_pending), %o1
3669     call tlb_patch_one
3670     - mov 16, %o2
3671     + mov 27, %o2
3672    
3673     sethi %hi(__flush_tlb_kernel_range), %o0
3674     or %o0, %lo(__flush_tlb_kernel_range), %o0
3675     sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1
3676     or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1
3677     call tlb_patch_one
3678     - mov 16, %o2
3679     + mov 31, %o2
3680    
3681     #ifdef DCACHE_ALIASING_POSSIBLE
3682     sethi %hi(__flush_dcache_page), %o0
3683     @@ -857,21 +1085,21 @@ hypervisor_patch_cachetlbops:
3684     sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1
3685     or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1
3686     call tlb_patch_one
3687     - mov 21, %o2
3688     + mov 24, %o2
3689    
3690     sethi %hi(xcall_flush_tlb_page), %o0
3691     or %o0, %lo(xcall_flush_tlb_page), %o0
3692     sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1
3693     or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1
3694     call tlb_patch_one
3695     - mov 17, %o2
3696     + mov 20, %o2
3697    
3698     sethi %hi(xcall_flush_tlb_kernel_range), %o0
3699     or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
3700     sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1
3701     or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1
3702     call tlb_patch_one
3703     - mov 25, %o2
3704     + mov 44, %o2
3705     #endif /* CONFIG_SMP */
3706    
3707     ret
3708     diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
3709     index c4751ece76f6..45e87c9cc828 100644
3710     --- a/drivers/net/ethernet/broadcom/bgmac.c
3711     +++ b/drivers/net/ethernet/broadcom/bgmac.c
3712     @@ -307,6 +307,10 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
3713     u32 ctl;
3714    
3715     ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL);
3716     +
3717     + /* preserve ONLY bits 16-17 from current hardware value */
3718     + ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
3719     +
3720     if (bgmac->feature_flags & BGMAC_FEAT_RX_MASK_SETUP) {
3721     ctl &= ~BGMAC_DMA_RX_BL_MASK;
3722     ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT;
3723     @@ -317,7 +321,6 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
3724     ctl &= ~BGMAC_DMA_RX_PT_MASK;
3725     ctl |= BGMAC_DMA_RX_PT_1 << BGMAC_DMA_RX_PT_SHIFT;
3726     }
3727     - ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
3728     ctl |= BGMAC_DMA_RX_ENABLE;
3729     ctl |= BGMAC_DMA_RX_PARITY_DISABLE;
3730     ctl |= BGMAC_DMA_RX_OVERFLOW_CONT;
3731     diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
3732     index 505ceaf451e2..2c850a92ab15 100644
3733     --- a/drivers/net/ethernet/broadcom/bnx2.c
3734     +++ b/drivers/net/ethernet/broadcom/bnx2.c
3735     @@ -49,6 +49,7 @@
3736     #include <linux/firmware.h>
3737     #include <linux/log2.h>
3738     #include <linux/aer.h>
3739     +#include <linux/crash_dump.h>
3740    
3741     #if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE)
3742     #define BCM_CNIC 1
3743     @@ -4759,15 +4760,16 @@ bnx2_setup_msix_tbl(struct bnx2 *bp)
3744     BNX2_WR(bp, BNX2_PCI_GRC_WINDOW3_ADDR, BNX2_MSIX_PBA_ADDR);
3745     }
3746    
3747     -static int
3748     -bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
3749     +static void
3750     +bnx2_wait_dma_complete(struct bnx2 *bp)
3751     {
3752     u32 val;
3753     - int i, rc = 0;
3754     - u8 old_port;
3755     + int i;
3756    
3757     - /* Wait for the current PCI transaction to complete before
3758     - * issuing a reset. */
3759     + /*
3760     + * Wait for the current PCI transaction to complete before
3761     + * issuing a reset.
3762     + */
3763     if ((BNX2_CHIP(bp) == BNX2_CHIP_5706) ||
3764     (BNX2_CHIP(bp) == BNX2_CHIP_5708)) {
3765     BNX2_WR(bp, BNX2_MISC_ENABLE_CLR_BITS,
3766     @@ -4791,6 +4793,21 @@ bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
3767     }
3768     }
3769    
3770     + return;
3771     +}
3772     +
3773     +
3774     +static int
3775     +bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
3776     +{
3777     + u32 val;
3778     + int i, rc = 0;
3779     + u8 old_port;
3780     +
3781     + /* Wait for the current PCI transaction to complete before
3782     + * issuing a reset. */
3783     + bnx2_wait_dma_complete(bp);
3784     +
3785     /* Wait for the firmware to tell us it is ok to issue a reset. */
3786     bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT0 | reset_code, 1, 1);
3787    
3788     @@ -6356,6 +6373,10 @@ bnx2_open(struct net_device *dev)
3789     struct bnx2 *bp = netdev_priv(dev);
3790     int rc;
3791    
3792     + rc = bnx2_request_firmware(bp);
3793     + if (rc < 0)
3794     + goto out;
3795     +
3796     netif_carrier_off(dev);
3797    
3798     bnx2_disable_int(bp);
3799     @@ -6424,6 +6445,7 @@ open_err:
3800     bnx2_free_irq(bp);
3801     bnx2_free_mem(bp);
3802     bnx2_del_napi(bp);
3803     + bnx2_release_firmware(bp);
3804     goto out;
3805     }
3806    
3807     @@ -8570,12 +8592,15 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
3808    
3809     pci_set_drvdata(pdev, dev);
3810    
3811     - rc = bnx2_request_firmware(bp);
3812     - if (rc < 0)
3813     - goto error;
3814     -
3815     + /*
3816     + * In-flight DMA from 1st kernel could continue going in kdump kernel.
3817     + * New io-page table has been created before bnx2 does reset at open stage.
3818     + * We have to wait for the in-flight DMA to complete to avoid it look up
3819     + * into the newly created io-page table.
3820     + */
3821     + if (is_kdump_kernel())
3822     + bnx2_wait_dma_complete(bp);
3823    
3824     - bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET);
3825     memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN);
3826    
3827     dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
3828     @@ -8608,7 +8633,6 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
3829     return 0;
3830    
3831     error:
3832     - bnx2_release_firmware(bp);
3833     pci_iounmap(pdev, bp->regview);
3834     pci_release_regions(pdev);
3835     pci_disable_device(pdev);
3836     diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
3837     index d48873bcbddf..5cdc96bdd444 100644
3838     --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
3839     +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
3840     @@ -231,7 +231,7 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
3841    
3842     span_entry->used = true;
3843     span_entry->id = index;
3844     - span_entry->ref_count = 0;
3845     + span_entry->ref_count = 1;
3846     span_entry->local_port = local_port;
3847     return span_entry;
3848     }
3849     @@ -268,6 +268,7 @@ struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
3850    
3851     span_entry = mlxsw_sp_span_entry_find(port);
3852     if (span_entry) {
3853     + /* Already exists, just take a reference */
3854     span_entry->ref_count++;
3855     return span_entry;
3856     }
3857     @@ -278,6 +279,7 @@ struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
3858     static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
3859     struct mlxsw_sp_span_entry *span_entry)
3860     {
3861     + WARN_ON(!span_entry->ref_count);
3862     if (--span_entry->ref_count == 0)
3863     mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
3864     return 0;
3865     diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3866     index 3f5c51da6d3e..62514b9bf988 100644
3867     --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3868     +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3869     @@ -777,6 +777,26 @@ static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
3870     }
3871     }
3872    
3873     +static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
3874     +{
3875     + u8 num_rec, last_rec_index, num_entries;
3876     +
3877     + num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
3878     + last_rec_index = num_rec - 1;
3879     +
3880     + if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
3881     + return false;
3882     + if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
3883     + MLXSW_REG_RAUHTD_TYPE_IPV6)
3884     + return true;
3885     +
3886     + num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
3887     + last_rec_index);
3888     + if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
3889     + return true;
3890     + return false;
3891     +}
3892     +
3893     static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
3894     {
3895     char *rauhtd_pl;
3896     @@ -803,7 +823,7 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
3897     for (i = 0; i < num_rec; i++)
3898     mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
3899     i);
3900     - } while (num_rec);
3901     + } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
3902     rtnl_unlock();
3903    
3904     kfree(rauhtd_pl);
3905     diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
3906     index 4c8c60af7985..fe9e7b1979b8 100644
3907     --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
3908     +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
3909     @@ -871,6 +871,13 @@ static int stmmac_init_phy(struct net_device *dev)
3910     return -ENODEV;
3911     }
3912    
3913     + /* stmmac_adjust_link will change this to PHY_IGNORE_INTERRUPT to avoid
3914     + * subsequent PHY polling, make sure we force a link transition if
3915     + * we have a UP/DOWN/UP transition
3916     + */
3917     + if (phydev->is_pseudo_fixed_link)
3918     + phydev->irq = PHY_POLL;
3919     +
3920     pr_debug("stmmac_init_phy: %s: attached to PHY (UID 0x%x)"
3921     " Link = %d\n", dev->name, phydev->phy_id, phydev->link);
3922    
3923     diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
3924     index 5c8429f23a89..3a5530d0511b 100644
3925     --- a/drivers/usb/gadget/function/f_fs.c
3926     +++ b/drivers/usb/gadget/function/f_fs.c
3927     @@ -133,8 +133,60 @@ struct ffs_epfile {
3928     /*
3929     * Buffer for holding data from partial reads which may happen since
3930     * we’re rounding user read requests to a multiple of a max packet size.
3931     + *
3932     + * The pointer is initialised with NULL value and may be set by
3933     + * __ffs_epfile_read_data function to point to a temporary buffer.
3934     + *
3935     + * In normal operation, calls to __ffs_epfile_read_buffered will consume
3936     + * data from said buffer and eventually free it. Importantly, while the
3937     + * function is using the buffer, it sets the pointer to NULL. This is
3938     + * all right since __ffs_epfile_read_data and __ffs_epfile_read_buffered
3939     + * can never run concurrently (they are synchronised by epfile->mutex)
3940     + * so the latter will not assign a new value to the pointer.
3941     + *
3942     + * Meanwhile ffs_func_eps_disable frees the buffer (if the pointer is
3943     + * valid) and sets the pointer to READ_BUFFER_DROP value. This special
3944     + * value is crux of the synchronisation between ffs_func_eps_disable and
3945     + * __ffs_epfile_read_data.
3946     + *
3947     + * Once __ffs_epfile_read_data is about to finish it will try to set the
3948     + * pointer back to its old value (as described above), but seeing as the
3949     + * pointer is not-NULL (namely READ_BUFFER_DROP) it will instead free
3950     + * the buffer.
3951     + *
3952     + * == State transitions ==
3953     + *
3954     + * • ptr == NULL: (initial state)
3955     + * â—¦ __ffs_epfile_read_buffer_free: go to ptr == DROP
3956     + * â—¦ __ffs_epfile_read_buffered: nop
3957     + * â—¦ __ffs_epfile_read_data allocates temp buffer: go to ptr == buf
3958     + * â—¦ reading finishes: n/a, not in ‘and reading’ state
3959     + * • ptr == DROP:
3960     + * â—¦ __ffs_epfile_read_buffer_free: nop
3961     + * â—¦ __ffs_epfile_read_buffered: go to ptr == NULL
3962     + * â—¦ __ffs_epfile_read_data allocates temp buffer: free buf, nop
3963     + * â—¦ reading finishes: n/a, not in ‘and reading’ state
3964     + * • ptr == buf:
3965     + * â—¦ __ffs_epfile_read_buffer_free: free buf, go to ptr == DROP
3966     + * â—¦ __ffs_epfile_read_buffered: go to ptr == NULL and reading
3967     + * â—¦ __ffs_epfile_read_data: n/a, __ffs_epfile_read_buffered
3968     + * is always called first
3969     + * â—¦ reading finishes: n/a, not in ‘and reading’ state
3970     + * • ptr == NULL and reading:
3971     + * â—¦ __ffs_epfile_read_buffer_free: go to ptr == DROP and reading
3972     + * â—¦ __ffs_epfile_read_buffered: n/a, mutex is held
3973     + * â—¦ __ffs_epfile_read_data: n/a, mutex is held
3974     + * â—¦ reading finishes and …
3975     + * … all data read: free buf, go to ptr == NULL
3976     + * … otherwise: go to ptr == buf and reading
3977     + * • ptr == DROP and reading:
3978     + * â—¦ __ffs_epfile_read_buffer_free: nop
3979     + * â—¦ __ffs_epfile_read_buffered: n/a, mutex is held
3980     + * â—¦ __ffs_epfile_read_data: n/a, mutex is held
3981     + * â—¦ reading finishes: free buf, go to ptr == DROP
3982     */
3983     - struct ffs_buffer *read_buffer; /* P: epfile->mutex */
3984     + struct ffs_buffer *read_buffer;
3985     +#define READ_BUFFER_DROP ((struct ffs_buffer *)ERR_PTR(-ESHUTDOWN))
3986    
3987     char name[5];
3988    
3989     @@ -733,25 +785,47 @@ static void ffs_epfile_async_io_complete(struct usb_ep *_ep,
3990     schedule_work(&io_data->work);
3991     }
3992    
3993     +static void __ffs_epfile_read_buffer_free(struct ffs_epfile *epfile)
3994     +{
3995     + /*
3996     + * See comment in struct ffs_epfile for full read_buffer pointer
3997     + * synchronisation story.
3998     + */
3999     + struct ffs_buffer *buf = xchg(&epfile->read_buffer, READ_BUFFER_DROP);
4000     + if (buf && buf != READ_BUFFER_DROP)
4001     + kfree(buf);
4002     +}
4003     +
4004     /* Assumes epfile->mutex is held. */
4005     static ssize_t __ffs_epfile_read_buffered(struct ffs_epfile *epfile,
4006     struct iov_iter *iter)
4007     {
4008     - struct ffs_buffer *buf = epfile->read_buffer;
4009     + /*
4010     + * Null out epfile->read_buffer so ffs_func_eps_disable does not free
4011     + * the buffer while we are using it. See comment in struct ffs_epfile
4012     + * for full read_buffer pointer synchronisation story.
4013     + */
4014     + struct ffs_buffer *buf = xchg(&epfile->read_buffer, NULL);
4015     ssize_t ret;
4016     - if (!buf)
4017     + if (!buf || buf == READ_BUFFER_DROP)
4018     return 0;
4019    
4020     ret = copy_to_iter(buf->data, buf->length, iter);
4021     if (buf->length == ret) {
4022     kfree(buf);
4023     - epfile->read_buffer = NULL;
4024     - } else if (unlikely(iov_iter_count(iter))) {
4025     + return ret;
4026     + }
4027     +
4028     + if (unlikely(iov_iter_count(iter))) {
4029     ret = -EFAULT;
4030     } else {
4031     buf->length -= ret;
4032     buf->data += ret;
4033     }
4034     +
4035     + if (cmpxchg(&epfile->read_buffer, NULL, buf))
4036     + kfree(buf);
4037     +
4038     return ret;
4039     }
4040    
4041     @@ -780,7 +854,15 @@ static ssize_t __ffs_epfile_read_data(struct ffs_epfile *epfile,
4042     buf->length = data_len;
4043     buf->data = buf->storage;
4044     memcpy(buf->storage, data + ret, data_len);
4045     - epfile->read_buffer = buf;
4046     +
4047     + /*
4048     + * At this point read_buffer is NULL or READ_BUFFER_DROP (if
4049     + * ffs_func_eps_disable has been called in the meanwhile). See comment
4050     + * in struct ffs_epfile for full read_buffer pointer synchronisation
4051     + * story.
4052     + */
4053     + if (unlikely(cmpxchg(&epfile->read_buffer, NULL, buf)))
4054     + kfree(buf);
4055    
4056     return ret;
4057     }
4058     @@ -1094,8 +1176,7 @@ ffs_epfile_release(struct inode *inode, struct file *file)
4059    
4060     ENTER();
4061    
4062     - kfree(epfile->read_buffer);
4063     - epfile->read_buffer = NULL;
4064     + __ffs_epfile_read_buffer_free(epfile);
4065     ffs_data_closed(epfile->ffs);
4066    
4067     return 0;
4068     @@ -1721,24 +1802,20 @@ static void ffs_func_eps_disable(struct ffs_function *func)
4069     unsigned count = func->ffs->eps_count;
4070     unsigned long flags;
4071    
4072     + spin_lock_irqsave(&func->ffs->eps_lock, flags);
4073     do {
4074     - if (epfile)
4075     - mutex_lock(&epfile->mutex);
4076     - spin_lock_irqsave(&func->ffs->eps_lock, flags);
4077     /* pending requests get nuked */
4078     if (likely(ep->ep))
4079     usb_ep_disable(ep->ep);
4080     ++ep;
4081     - spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
4082    
4083     if (epfile) {
4084     epfile->ep = NULL;
4085     - kfree(epfile->read_buffer);
4086     - epfile->read_buffer = NULL;
4087     - mutex_unlock(&epfile->mutex);
4088     + __ffs_epfile_read_buffer_free(epfile);
4089     ++epfile;
4090     }
4091     } while (--count);
4092     + spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
4093     }
4094    
4095     static int ffs_func_eps_enable(struct ffs_function *func)
4096     diff --git a/include/net/ip.h b/include/net/ip.h
4097     index 156b0c11b524..0ccf6daf6f56 100644
4098     --- a/include/net/ip.h
4099     +++ b/include/net/ip.h
4100     @@ -47,7 +47,6 @@ struct inet_skb_parm {
4101     #define IPSKB_REROUTED BIT(4)
4102     #define IPSKB_DOREDIRECT BIT(5)
4103     #define IPSKB_FRAG_PMTU BIT(6)
4104     -#define IPSKB_FRAG_SEGS BIT(7)
4105    
4106     u16 frag_max_size;
4107     };
4108     diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
4109     index 43a5a0e4524c..b01d5d1d7439 100644
4110     --- a/include/net/ip6_tunnel.h
4111     +++ b/include/net/ip6_tunnel.h
4112     @@ -145,6 +145,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
4113     {
4114     int pkt_len, err;
4115    
4116     + memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
4117     pkt_len = skb->len - skb_inner_network_offset(skb);
4118     err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
4119     if (unlikely(net_xmit_eval(err)))
4120     diff --git a/include/net/sock.h b/include/net/sock.h
4121     index 8741988e6880..c26eab962ec7 100644
4122     --- a/include/net/sock.h
4123     +++ b/include/net/sock.h
4124     @@ -1587,11 +1587,11 @@ static inline void sock_put(struct sock *sk)
4125     void sock_gen_put(struct sock *sk);
4126    
4127     int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested,
4128     - unsigned int trim_cap);
4129     + unsigned int trim_cap, bool refcounted);
4130     static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
4131     const int nested)
4132     {
4133     - return __sk_receive_skb(sk, skb, nested, 1);
4134     + return __sk_receive_skb(sk, skb, nested, 1, true);
4135     }
4136    
4137     static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
4138     diff --git a/include/net/tcp.h b/include/net/tcp.h
4139     index 7717302cab91..0de698940793 100644
4140     --- a/include/net/tcp.h
4141     +++ b/include/net/tcp.h
4142     @@ -1164,6 +1164,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp)
4143     }
4144    
4145     bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
4146     +int tcp_filter(struct sock *sk, struct sk_buff *skb);
4147    
4148     #undef STATE_TRACE
4149    
4150     diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h
4151     index 5cd4d4d2dd1d..9c9c6ad55f14 100644
4152     --- a/include/uapi/linux/atm_zatm.h
4153     +++ b/include/uapi/linux/atm_zatm.h
4154     @@ -14,7 +14,6 @@
4155    
4156     #include <linux/atmapi.h>
4157     #include <linux/atmioc.h>
4158     -#include <linux/time.h>
4159    
4160     #define ZATM_GETPOOL _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc)
4161     /* get pool statistics */
4162     diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
4163     index 570eeca7bdfa..ad1bc67aff1b 100644
4164     --- a/kernel/bpf/hashtab.c
4165     +++ b/kernel/bpf/hashtab.c
4166     @@ -687,7 +687,8 @@ static void delete_all_elements(struct bpf_htab *htab)
4167    
4168     hlist_for_each_entry_safe(l, n, head, hash_node) {
4169     hlist_del_rcu(&l->hash_node);
4170     - htab_elem_free(htab, l);
4171     + if (l->state != HTAB_EXTRA_ELEM_USED)
4172     + htab_elem_free(htab, l);
4173     }
4174     }
4175     }
4176     diff --git a/net/core/dev.c b/net/core/dev.c
4177     index 44b3ba462ba1..9ce9d7284ea7 100644
4178     --- a/net/core/dev.c
4179     +++ b/net/core/dev.c
4180     @@ -2484,7 +2484,7 @@ int skb_checksum_help(struct sk_buff *skb)
4181     goto out;
4182     }
4183    
4184     - *(__sum16 *)(skb->data + offset) = csum_fold(csum);
4185     + *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
4186     out_set_summed:
4187     skb->ip_summed = CHECKSUM_NONE;
4188     out:
4189     diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
4190     index 52742a02814f..5550a86f7264 100644
4191     --- a/net/core/flow_dissector.c
4192     +++ b/net/core/flow_dissector.c
4193     @@ -118,7 +118,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
4194     struct flow_dissector_key_tags *key_tags;
4195     struct flow_dissector_key_keyid *key_keyid;
4196     u8 ip_proto = 0;
4197     - bool ret = false;
4198     + bool ret;
4199    
4200     if (!data) {
4201     data = skb->data;
4202     @@ -481,12 +481,17 @@ ip_proto_again:
4203     out_good:
4204     ret = true;
4205    
4206     -out_bad:
4207     + key_control->thoff = (u16)nhoff;
4208     +out:
4209     key_basic->n_proto = proto;
4210     key_basic->ip_proto = ip_proto;
4211     - key_control->thoff = (u16)nhoff;
4212    
4213     return ret;
4214     +
4215     +out_bad:
4216     + ret = false;
4217     + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
4218     + goto out;
4219     }
4220     EXPORT_SYMBOL(__skb_flow_dissect);
4221    
4222     diff --git a/net/core/sock.c b/net/core/sock.c
4223     index fd7b41edf1ce..10acaccca5c8 100644
4224     --- a/net/core/sock.c
4225     +++ b/net/core/sock.c
4226     @@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
4227     EXPORT_SYMBOL(sock_queue_rcv_skb);
4228    
4229     int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
4230     - const int nested, unsigned int trim_cap)
4231     + const int nested, unsigned int trim_cap, bool refcounted)
4232     {
4233     int rc = NET_RX_SUCCESS;
4234    
4235     @@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
4236    
4237     bh_unlock_sock(sk);
4238     out:
4239     - sock_put(sk);
4240     + if (refcounted)
4241     + sock_put(sk);
4242     return rc;
4243     discard_and_relse:
4244     kfree_skb(skb);
4245     @@ -1563,6 +1564,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
4246     RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
4247    
4248     newsk->sk_err = 0;
4249     + newsk->sk_err_soft = 0;
4250     newsk->sk_priority = 0;
4251     newsk->sk_incoming_cpu = raw_smp_processor_id();
4252     atomic64_set(&newsk->sk_cookie, 0);
4253     diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
4254     index 345a3aeb8c7e..b567c8725aea 100644
4255     --- a/net/dccp/ipv4.c
4256     +++ b/net/dccp/ipv4.c
4257     @@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
4258     {
4259     const struct iphdr *iph = (struct iphdr *)skb->data;
4260     const u8 offset = iph->ihl << 2;
4261     - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
4262     + const struct dccp_hdr *dh;
4263     struct dccp_sock *dp;
4264     struct inet_sock *inet;
4265     const int type = icmp_hdr(skb)->type;
4266     @@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
4267     int err;
4268     struct net *net = dev_net(skb->dev);
4269    
4270     - if (skb->len < offset + sizeof(*dh) ||
4271     - skb->len < offset + __dccp_basic_hdr_len(dh)) {
4272     - __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
4273     - return;
4274     - }
4275     + /* Only need dccph_dport & dccph_sport which are the first
4276     + * 4 bytes in dccp header.
4277     + * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
4278     + */
4279     + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
4280     + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
4281     + dh = (struct dccp_hdr *)(skb->data + offset);
4282    
4283     sk = __inet_lookup_established(net, &dccp_hashinfo,
4284     iph->daddr, dh->dccph_dport,
4285     @@ -868,7 +870,7 @@ lookup:
4286     goto discard_and_relse;
4287     nf_reset(skb);
4288    
4289     - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4);
4290     + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted);
4291    
4292     no_dccp_socket:
4293     if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
4294     diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
4295     index 3828f94b234c..715e5d1dc107 100644
4296     --- a/net/dccp/ipv6.c
4297     +++ b/net/dccp/ipv6.c
4298     @@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
4299     u8 type, u8 code, int offset, __be32 info)
4300     {
4301     const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
4302     - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
4303     + const struct dccp_hdr *dh;
4304     struct dccp_sock *dp;
4305     struct ipv6_pinfo *np;
4306     struct sock *sk;
4307     @@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
4308     __u64 seq;
4309     struct net *net = dev_net(skb->dev);
4310    
4311     - if (skb->len < offset + sizeof(*dh) ||
4312     - skb->len < offset + __dccp_basic_hdr_len(dh)) {
4313     - __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
4314     - ICMP6_MIB_INERRORS);
4315     - return;
4316     - }
4317     + /* Only need dccph_dport & dccph_sport which are the first
4318     + * 4 bytes in dccp header.
4319     + * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
4320     + */
4321     + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
4322     + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
4323     + dh = (struct dccp_hdr *)(skb->data + offset);
4324    
4325     sk = __inet6_lookup_established(net, &dccp_hashinfo,
4326     &hdr->daddr, dh->dccph_dport,
4327     @@ -738,7 +739,8 @@ lookup:
4328     if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
4329     goto discard_and_relse;
4330    
4331     - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0;
4332     + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4,
4333     + refcounted) ? -1 : 0;
4334    
4335     no_dccp_socket:
4336     if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
4337     @@ -956,6 +958,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
4338     .getsockopt = ipv6_getsockopt,
4339     .addr2sockaddr = inet6_csk_addr2sockaddr,
4340     .sockaddr_len = sizeof(struct sockaddr_in6),
4341     + .bind_conflict = inet6_csk_bind_conflict,
4342     #ifdef CONFIG_COMPAT
4343     .compat_setsockopt = compat_ipv6_setsockopt,
4344     .compat_getsockopt = compat_ipv6_getsockopt,
4345     diff --git a/net/dccp/proto.c b/net/dccp/proto.c
4346     index 41e65804ddf5..9fe25bf63296 100644
4347     --- a/net/dccp/proto.c
4348     +++ b/net/dccp/proto.c
4349     @@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long timeout)
4350     __kfree_skb(skb);
4351     }
4352    
4353     + /* If socket has been already reset kill it. */
4354     + if (sk->sk_state == DCCP_CLOSED)
4355     + goto adjudge_to_death;
4356     +
4357     if (data_was_unread) {
4358     /* Unread data was tossed, send an appropriate Reset Code */
4359     DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
4360     diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
4361     index e2ffc2a5c7db..7ef703102dca 100644
4362     --- a/net/ipv4/fib_trie.c
4363     +++ b/net/ipv4/fib_trie.c
4364     @@ -2455,22 +2455,19 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
4365     struct key_vector *l, **tp = &iter->tnode;
4366     t_key key;
4367    
4368     - /* use cache location of next-to-find key */
4369     + /* use cached location of previously found key */
4370     if (iter->pos > 0 && pos >= iter->pos) {
4371     - pos -= iter->pos;
4372     key = iter->key;
4373     } else {
4374     - iter->pos = 0;
4375     + iter->pos = 1;
4376     key = 0;
4377     }
4378    
4379     - while ((l = leaf_walk_rcu(tp, key)) != NULL) {
4380     + pos -= iter->pos;
4381     +
4382     + while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
4383     key = l->key + 1;
4384     iter->pos++;
4385     -
4386     - if (--pos <= 0)
4387     - break;
4388     -
4389     l = NULL;
4390    
4391     /* handle unlikely case of a key wrap */
4392     @@ -2479,7 +2476,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
4393     }
4394    
4395     if (l)
4396     - iter->key = key; /* remember it */
4397     + iter->key = l->key; /* remember it */
4398     else
4399     iter->pos = 0; /* forget it */
4400    
4401     @@ -2507,7 +2504,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
4402     return fib_route_get_idx(iter, *pos);
4403    
4404     iter->pos = 0;
4405     - iter->key = 0;
4406     + iter->key = KEY_MAX;
4407    
4408     return SEQ_START_TOKEN;
4409     }
4410     @@ -2516,7 +2513,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4411     {
4412     struct fib_route_iter *iter = seq->private;
4413     struct key_vector *l = NULL;
4414     - t_key key = iter->key;
4415     + t_key key = iter->key + 1;
4416    
4417     ++*pos;
4418    
4419     @@ -2525,7 +2522,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4420     l = leaf_walk_rcu(&iter->tnode, key);
4421    
4422     if (l) {
4423     - iter->key = l->key + 1;
4424     + iter->key = l->key;
4425     iter->pos++;
4426     } else {
4427     iter->pos = 0;
4428     diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
4429     index 38abe70e595f..48734ee6293f 100644
4430     --- a/net/ipv4/icmp.c
4431     +++ b/net/ipv4/icmp.c
4432     @@ -477,7 +477,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
4433     fl4->flowi4_proto = IPPROTO_ICMP;
4434     fl4->fl4_icmp_type = type;
4435     fl4->fl4_icmp_code = code;
4436     - fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev);
4437     + fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
4438    
4439     security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
4440     rt = __ip_route_output_key_hash(net, fl4,
4441     @@ -502,7 +502,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
4442     if (err)
4443     goto relookup_failed;
4444    
4445     - if (inet_addr_type_dev_table(net, skb_in->dev,
4446     + if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev,
4447     fl4_dec.saddr) == RTN_LOCAL) {
4448     rt2 = __ip_route_output_key(net, &fl4_dec);
4449     if (IS_ERR(rt2))
4450     diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
4451     index 8b4ffd216839..9f0a7b96646f 100644
4452     --- a/net/ipv4/ip_forward.c
4453     +++ b/net/ipv4/ip_forward.c
4454     @@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb)
4455     if (opt->is_strictroute && rt->rt_uses_gateway)
4456     goto sr_failed;
4457    
4458     - IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
4459     + IPCB(skb)->flags |= IPSKB_FORWARDED;
4460     mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
4461     if (ip_exceeds_mtu(skb, mtu)) {
4462     IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
4463     diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
4464     index dde37fb340bf..307daed9a4b9 100644
4465     --- a/net/ipv4/ip_output.c
4466     +++ b/net/ipv4/ip_output.c
4467     @@ -223,11 +223,9 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
4468     struct sk_buff *segs;
4469     int ret = 0;
4470    
4471     - /* common case: fragmentation of segments is not allowed,
4472     - * or seglen is <= mtu
4473     + /* common case: seglen is <= mtu
4474     */
4475     - if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) ||
4476     - skb_gso_validate_mtu(skb, mtu))
4477     + if (skb_gso_validate_mtu(skb, mtu))
4478     return ip_finish_output2(net, sk, skb);
4479    
4480     /* Slowpath - GSO segment length is exceeding the dst MTU.
4481     diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
4482     index 0f227db0e9ac..afd6b5968caf 100644
4483     --- a/net/ipv4/ip_tunnel_core.c
4484     +++ b/net/ipv4/ip_tunnel_core.c
4485     @@ -63,7 +63,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
4486     int pkt_len = skb->len - skb_inner_network_offset(skb);
4487     struct net *net = dev_net(rt->dst.dev);
4488     struct net_device *dev = skb->dev;
4489     - int skb_iif = skb->skb_iif;
4490     struct iphdr *iph;
4491     int err;
4492    
4493     @@ -73,16 +72,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
4494     skb_dst_set(skb, &rt->dst);
4495     memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
4496    
4497     - if (skb_iif && !(df & htons(IP_DF))) {
4498     - /* Arrived from an ingress interface, got encapsulated, with
4499     - * fragmentation of encapulating frames allowed.
4500     - * If skb is gso, the resulting encapsulated network segments
4501     - * may exceed dst mtu.
4502     - * Allow IP Fragmentation of segments.
4503     - */
4504     - IPCB(skb)->flags |= IPSKB_FRAG_SEGS;
4505     - }
4506     -
4507     /* Push down and install the IP header. */
4508     skb_push(skb, sizeof(struct iphdr));
4509     skb_reset_network_header(skb);
4510     diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
4511     index 5f006e13de56..27089f5ebbb1 100644
4512     --- a/net/ipv4/ipmr.c
4513     +++ b/net/ipv4/ipmr.c
4514     @@ -1749,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
4515     vif->dev->stats.tx_bytes += skb->len;
4516     }
4517    
4518     - IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
4519     + IPCB(skb)->flags |= IPSKB_FORWARDED;
4520    
4521     /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
4522     * not only before forwarding, but after forwarding on all output
4523     diff --git a/net/ipv4/route.c b/net/ipv4/route.c
4524     index 62c3ed0b7556..2f23ef1a8486 100644
4525     --- a/net/ipv4/route.c
4526     +++ b/net/ipv4/route.c
4527     @@ -753,7 +753,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
4528     goto reject_redirect;
4529     }
4530    
4531     - n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
4532     + n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
4533     + if (!n)
4534     + n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
4535     if (!IS_ERR(n)) {
4536     if (!(n->nud_state & NUD_VALID)) {
4537     neigh_event_send(n, NULL);
4538     diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
4539     index ffbb218de520..c876f5ddc86c 100644
4540     --- a/net/ipv4/tcp.c
4541     +++ b/net/ipv4/tcp.c
4542     @@ -1145,7 +1145,7 @@ restart:
4543    
4544     err = -EPIPE;
4545     if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
4546     - goto out_err;
4547     + goto do_error;
4548    
4549     sg = !!(sk->sk_route_caps & NETIF_F_SG);
4550    
4551     @@ -1219,7 +1219,7 @@ new_segment:
4552    
4553     if (!skb_can_coalesce(skb, i, pfrag->page,
4554     pfrag->offset)) {
4555     - if (i == sysctl_max_skb_frags || !sg) {
4556     + if (i >= sysctl_max_skb_frags || !sg) {
4557     tcp_mark_push(tp, skb);
4558     goto new_segment;
4559     }
4560     diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
4561     index 10d728b6804c..ab37c6775630 100644
4562     --- a/net/ipv4/tcp_dctcp.c
4563     +++ b/net/ipv4/tcp_dctcp.c
4564     @@ -56,6 +56,7 @@ struct dctcp {
4565     u32 next_seq;
4566     u32 ce_state;
4567     u32 delayed_ack_reserved;
4568     + u32 loss_cwnd;
4569     };
4570    
4571     static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */
4572     @@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk)
4573     ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
4574    
4575     ca->delayed_ack_reserved = 0;
4576     + ca->loss_cwnd = 0;
4577     ca->ce_state = 0;
4578    
4579     dctcp_reset(tp, ca);
4580     @@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk)
4581    
4582     static u32 dctcp_ssthresh(struct sock *sk)
4583     {
4584     - const struct dctcp *ca = inet_csk_ca(sk);
4585     + struct dctcp *ca = inet_csk_ca(sk);
4586     struct tcp_sock *tp = tcp_sk(sk);
4587    
4588     + ca->loss_cwnd = tp->snd_cwnd;
4589     return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
4590     }
4591    
4592     @@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
4593     return 0;
4594     }
4595    
4596     +static u32 dctcp_cwnd_undo(struct sock *sk)
4597     +{
4598     + const struct dctcp *ca = inet_csk_ca(sk);
4599     +
4600     + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
4601     +}
4602     +
4603     static struct tcp_congestion_ops dctcp __read_mostly = {
4604     .init = dctcp_init,
4605     .in_ack_event = dctcp_update_alpha,
4606     .cwnd_event = dctcp_cwnd_event,
4607     .ssthresh = dctcp_ssthresh,
4608     .cong_avoid = tcp_reno_cong_avoid,
4609     + .undo_cwnd = dctcp_cwnd_undo,
4610     .set_state = dctcp_state,
4611     .get_info = dctcp_get_info,
4612     .flags = TCP_CONG_NEEDS_ECN,
4613     diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
4614     index 7158d4f8dae4..7b235fa12903 100644
4615     --- a/net/ipv4/tcp_ipv4.c
4616     +++ b/net/ipv4/tcp_ipv4.c
4617     @@ -1537,6 +1537,21 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
4618     }
4619     EXPORT_SYMBOL(tcp_prequeue);
4620    
4621     +int tcp_filter(struct sock *sk, struct sk_buff *skb)
4622     +{
4623     + struct tcphdr *th = (struct tcphdr *)skb->data;
4624     + unsigned int eaten = skb->len;
4625     + int err;
4626     +
4627     + err = sk_filter_trim_cap(sk, skb, th->doff * 4);
4628     + if (!err) {
4629     + eaten -= skb->len;
4630     + TCP_SKB_CB(skb)->end_seq -= eaten;
4631     + }
4632     + return err;
4633     +}
4634     +EXPORT_SYMBOL(tcp_filter);
4635     +
4636     /*
4637     * From tcp_input.c
4638     */
4639     @@ -1648,8 +1663,10 @@ process:
4640    
4641     nf_reset(skb);
4642    
4643     - if (sk_filter(sk, skb))
4644     + if (tcp_filter(sk, skb))
4645     goto discard_and_relse;
4646     + th = (const struct tcphdr *)skb->data;
4647     + iph = ip_hdr(skb);
4648    
4649     skb->dev = NULL;
4650    
4651     diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
4652     index bd59c343d35f..7370ad2e693a 100644
4653     --- a/net/ipv6/icmp.c
4654     +++ b/net/ipv6/icmp.c
4655     @@ -448,7 +448,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
4656     if (__ipv6_addr_needs_scope_id(addr_type))
4657     iif = skb->dev->ifindex;
4658     else
4659     - iif = l3mdev_master_ifindex(skb->dev);
4660     + iif = l3mdev_master_ifindex(skb_dst(skb)->dev);
4661    
4662     /*
4663     * Must not send error if the source does not uniquely
4664     diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
4665     index fc67822c42e0..af6a09efad5b 100644
4666     --- a/net/ipv6/tcp_ipv6.c
4667     +++ b/net/ipv6/tcp_ipv6.c
4668     @@ -1228,7 +1228,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
4669     if (skb->protocol == htons(ETH_P_IP))
4670     return tcp_v4_do_rcv(sk, skb);
4671    
4672     - if (sk_filter(sk, skb))
4673     + if (tcp_filter(sk, skb))
4674     goto discard;
4675    
4676     /*
4677     @@ -1455,8 +1455,10 @@ process:
4678     if (tcp_v6_inbound_md5_hash(sk, skb))
4679     goto discard_and_relse;
4680    
4681     - if (sk_filter(sk, skb))
4682     + if (tcp_filter(sk, skb))
4683     goto discard_and_relse;
4684     + th = (const struct tcphdr *)skb->data;
4685     + hdr = ipv6_hdr(skb);
4686    
4687     skb->dev = NULL;
4688    
4689     diff --git a/net/sctp/socket.c b/net/sctp/socket.c
4690     index baccbf3c1c60..7b0e059bf13b 100644
4691     --- a/net/sctp/socket.c
4692     +++ b/net/sctp/socket.c
4693     @@ -1214,9 +1214,12 @@ static int __sctp_connect(struct sock *sk,
4694    
4695     timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK);
4696    
4697     - err = sctp_wait_for_connect(asoc, &timeo);
4698     - if ((err == 0 || err == -EINPROGRESS) && assoc_id)
4699     + if (assoc_id)
4700     *assoc_id = asoc->assoc_id;
4701     + err = sctp_wait_for_connect(asoc, &timeo);
4702     + /* Note: the asoc may be freed after the return of
4703     + * sctp_wait_for_connect.
4704     + */
4705    
4706     /* Don't free association on exit. */
4707     asoc = NULL;
4708     @@ -4278,19 +4281,18 @@ static void sctp_shutdown(struct sock *sk, int how)
4709     {
4710     struct net *net = sock_net(sk);
4711     struct sctp_endpoint *ep;
4712     - struct sctp_association *asoc;
4713    
4714     if (!sctp_style(sk, TCP))
4715     return;
4716    
4717     - if (how & SEND_SHUTDOWN) {
4718     + ep = sctp_sk(sk)->ep;
4719     + if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) {
4720     + struct sctp_association *asoc;
4721     +
4722     sk->sk_state = SCTP_SS_CLOSING;
4723     - ep = sctp_sk(sk)->ep;
4724     - if (!list_empty(&ep->asocs)) {
4725     - asoc = list_entry(ep->asocs.next,
4726     - struct sctp_association, asocs);
4727     - sctp_primitive_SHUTDOWN(net, asoc, NULL);
4728     - }
4729     + asoc = list_entry(ep->asocs.next,
4730     + struct sctp_association, asocs);
4731     + sctp_primitive_SHUTDOWN(net, asoc, NULL);
4732     }
4733     }
4734    
4735     diff --git a/net/socket.c b/net/socket.c
4736     index a1bd16106625..03bc2c289c94 100644
4737     --- a/net/socket.c
4738     +++ b/net/socket.c
4739     @@ -2041,6 +2041,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
4740     if (err)
4741     break;
4742     ++datagrams;
4743     + if (msg_data_left(&msg_sys))
4744     + break;
4745     cond_resched();
4746     }
4747    
4748     diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c
4749     index f3825b676e38..f046b77cfefe 100644
4750     --- a/tools/spi/spidev_test.c
4751     +++ b/tools/spi/spidev_test.c
4752     @@ -19,6 +19,7 @@
4753     #include <getopt.h>
4754     #include <fcntl.h>
4755     #include <sys/ioctl.h>
4756     +#include <linux/ioctl.h>
4757     #include <sys/stat.h>
4758     #include <linux/types.h>
4759     #include <linux/spi/spidev.h>