Magellan Linux

Annotation of /trunk/kernel-alx/patches-4.4/0133-4.4.34-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2869 - (hide annotations) (download)
Mon Mar 27 13:49:12 2017 UTC (7 years, 1 month ago) by niro
File size: 133458 byte(s)
linux-4.4.34
1 niro 2869 diff --git a/Makefile b/Makefile
2     index a513c045c8de..30924aabf1b4 100644
3     --- a/Makefile
4     +++ b/Makefile
5     @@ -1,6 +1,6 @@
6     VERSION = 4
7     PATCHLEVEL = 4
8     -SUBLEVEL = 33
9     +SUBLEVEL = 34
10     EXTRAVERSION =
11     NAME = Blurry Fish Butt
12    
13     diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h
14     index 70067ce184b1..f7de0dbc38af 100644
15     --- a/arch/sparc/include/asm/mmu_64.h
16     +++ b/arch/sparc/include/asm/mmu_64.h
17     @@ -92,7 +92,8 @@ struct tsb_config {
18     typedef struct {
19     spinlock_t lock;
20     unsigned long sparc64_ctx_val;
21     - unsigned long huge_pte_count;
22     + unsigned long hugetlb_pte_count;
23     + unsigned long thp_pte_count;
24     struct tsb_config tsb_block[MM_NUM_TSBS];
25     struct hv_tsb_descr tsb_descr[MM_NUM_TSBS];
26     } mm_context_t;
27     diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
28     index ea6e9a20f3ff..f428512481f9 100644
29     --- a/arch/sparc/include/asm/uaccess_64.h
30     +++ b/arch/sparc/include/asm/uaccess_64.h
31     @@ -98,7 +98,6 @@ struct exception_table_entry {
32     unsigned int insn, fixup;
33     };
34    
35     -void __ret_efault(void);
36     void __retl_efault(void);
37    
38     /* Uh, these should become the main single-value transfer routines..
39     @@ -179,20 +178,6 @@ int __put_user_bad(void);
40     __gu_ret; \
41     })
42    
43     -#define __get_user_nocheck_ret(data, addr, size, type, retval) ({ \
44     - register unsigned long __gu_val __asm__ ("l1"); \
45     - switch (size) { \
46     - case 1: __get_user_asm_ret(__gu_val, ub, addr, retval); break; \
47     - case 2: __get_user_asm_ret(__gu_val, uh, addr, retval); break; \
48     - case 4: __get_user_asm_ret(__gu_val, uw, addr, retval); break; \
49     - case 8: __get_user_asm_ret(__gu_val, x, addr, retval); break; \
50     - default: \
51     - if (__get_user_bad()) \
52     - return retval; \
53     - } \
54     - data = (__force type) __gu_val; \
55     -})
56     -
57     #define __get_user_asm(x, size, addr, ret) \
58     __asm__ __volatile__( \
59     "/* Get user asm, inline. */\n" \
60     @@ -214,80 +199,35 @@ __asm__ __volatile__( \
61     : "=r" (ret), "=r" (x) : "r" (__m(addr)), \
62     "i" (-EFAULT))
63    
64     -#define __get_user_asm_ret(x, size, addr, retval) \
65     -if (__builtin_constant_p(retval) && retval == -EFAULT) \
66     - __asm__ __volatile__( \
67     - "/* Get user asm ret, inline. */\n" \
68     - "1:\t" "ld"#size "a [%1] %%asi, %0\n\n\t" \
69     - ".section __ex_table,\"a\"\n\t" \
70     - ".align 4\n\t" \
71     - ".word 1b,__ret_efault\n\n\t" \
72     - ".previous\n\t" \
73     - : "=r" (x) : "r" (__m(addr))); \
74     -else \
75     - __asm__ __volatile__( \
76     - "/* Get user asm ret, inline. */\n" \
77     - "1:\t" "ld"#size "a [%1] %%asi, %0\n\n\t" \
78     - ".section .fixup,#alloc,#execinstr\n\t" \
79     - ".align 4\n" \
80     - "3:\n\t" \
81     - "ret\n\t" \
82     - " restore %%g0, %2, %%o0\n\n\t" \
83     - ".previous\n\t" \
84     - ".section __ex_table,\"a\"\n\t" \
85     - ".align 4\n\t" \
86     - ".word 1b, 3b\n\n\t" \
87     - ".previous\n\t" \
88     - : "=r" (x) : "r" (__m(addr)), "i" (retval))
89     -
90     int __get_user_bad(void);
91    
92     unsigned long __must_check ___copy_from_user(void *to,
93     const void __user *from,
94     unsigned long size);
95     -unsigned long copy_from_user_fixup(void *to, const void __user *from,
96     - unsigned long size);
97     static inline unsigned long __must_check
98     copy_from_user(void *to, const void __user *from, unsigned long size)
99     {
100     - unsigned long ret = ___copy_from_user(to, from, size);
101     -
102     - if (unlikely(ret))
103     - ret = copy_from_user_fixup(to, from, size);
104     -
105     - return ret;
106     + return ___copy_from_user(to, from, size);
107     }
108     #define __copy_from_user copy_from_user
109    
110     unsigned long __must_check ___copy_to_user(void __user *to,
111     const void *from,
112     unsigned long size);
113     -unsigned long copy_to_user_fixup(void __user *to, const void *from,
114     - unsigned long size);
115     static inline unsigned long __must_check
116     copy_to_user(void __user *to, const void *from, unsigned long size)
117     {
118     - unsigned long ret = ___copy_to_user(to, from, size);
119     -
120     - if (unlikely(ret))
121     - ret = copy_to_user_fixup(to, from, size);
122     - return ret;
123     + return ___copy_to_user(to, from, size);
124     }
125     #define __copy_to_user copy_to_user
126    
127     unsigned long __must_check ___copy_in_user(void __user *to,
128     const void __user *from,
129     unsigned long size);
130     -unsigned long copy_in_user_fixup(void __user *to, void __user *from,
131     - unsigned long size);
132     static inline unsigned long __must_check
133     copy_in_user(void __user *to, void __user *from, unsigned long size)
134     {
135     - unsigned long ret = ___copy_in_user(to, from, size);
136     -
137     - if (unlikely(ret))
138     - ret = copy_in_user_fixup(to, from, size);
139     - return ret;
140     + return ___copy_in_user(to, from, size);
141     }
142     #define __copy_in_user copy_in_user
143    
144     diff --git a/arch/sparc/kernel/dtlb_prot.S b/arch/sparc/kernel/dtlb_prot.S
145     index d668ca149e64..4087a62f96b0 100644
146     --- a/arch/sparc/kernel/dtlb_prot.S
147     +++ b/arch/sparc/kernel/dtlb_prot.S
148     @@ -25,13 +25,13 @@
149    
150     /* PROT ** ICACHE line 2: More real fault processing */
151     ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5
152     + srlx %g5, PAGE_SHIFT, %g5
153     + sllx %g5, PAGE_SHIFT, %g5 ! Clear context ID bits
154     bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup
155     mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4
156     ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault
157     nop
158     nop
159     - nop
160     - nop
161    
162     /* PROT ** ICACHE line 3: Unused... */
163     nop
164     diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
165     index 51faf92ace00..7eeeb1d5a410 100644
166     --- a/arch/sparc/kernel/head_64.S
167     +++ b/arch/sparc/kernel/head_64.S
168     @@ -922,47 +922,11 @@ prom_tba: .xword 0
169     tlb_type: .word 0 /* Must NOT end up in BSS */
170     .section ".fixup",#alloc,#execinstr
171    
172     - .globl __ret_efault, __retl_efault, __ret_one, __retl_one
173     -ENTRY(__ret_efault)
174     - ret
175     - restore %g0, -EFAULT, %o0
176     -ENDPROC(__ret_efault)
177     -
178     ENTRY(__retl_efault)
179     retl
180     mov -EFAULT, %o0
181     ENDPROC(__retl_efault)
182    
183     -ENTRY(__retl_one)
184     - retl
185     - mov 1, %o0
186     -ENDPROC(__retl_one)
187     -
188     -ENTRY(__retl_one_fp)
189     - VISExitHalf
190     - retl
191     - mov 1, %o0
192     -ENDPROC(__retl_one_fp)
193     -
194     -ENTRY(__ret_one_asi)
195     - wr %g0, ASI_AIUS, %asi
196     - ret
197     - restore %g0, 1, %o0
198     -ENDPROC(__ret_one_asi)
199     -
200     -ENTRY(__retl_one_asi)
201     - wr %g0, ASI_AIUS, %asi
202     - retl
203     - mov 1, %o0
204     -ENDPROC(__retl_one_asi)
205     -
206     -ENTRY(__retl_one_asi_fp)
207     - wr %g0, ASI_AIUS, %asi
208     - VISExitHalf
209     - retl
210     - mov 1, %o0
211     -ENDPROC(__retl_one_asi_fp)
212     -
213     ENTRY(__retl_o1)
214     retl
215     mov %o1, %o0
216     diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
217     index 59bbeff55024..07933b9e9ce0 100644
218     --- a/arch/sparc/kernel/jump_label.c
219     +++ b/arch/sparc/kernel/jump_label.c
220     @@ -13,19 +13,30 @@
221     void arch_jump_label_transform(struct jump_entry *entry,
222     enum jump_label_type type)
223     {
224     - u32 val;
225     u32 *insn = (u32 *) (unsigned long) entry->code;
226     + u32 val;
227    
228     if (type == JUMP_LABEL_JMP) {
229     s32 off = (s32)entry->target - (s32)entry->code;
230     + bool use_v9_branch = false;
231     +
232     + BUG_ON(off & 3);
233    
234     #ifdef CONFIG_SPARC64
235     - /* ba,pt %xcc, . + (off << 2) */
236     - val = 0x10680000 | ((u32) off >> 2);
237     -#else
238     - /* ba . + (off << 2) */
239     - val = 0x10800000 | ((u32) off >> 2);
240     + if (off <= 0xfffff && off >= -0x100000)
241     + use_v9_branch = true;
242     #endif
243     + if (use_v9_branch) {
244     + /* WDISP19 - target is . + immed << 2 */
245     + /* ba,pt %xcc, . + off */
246     + val = 0x10680000 | (((u32) off >> 2) & 0x7ffff);
247     + } else {
248     + /* WDISP22 - target is . + immed << 2 */
249     + BUG_ON(off > 0x7fffff);
250     + BUG_ON(off < -0x800000);
251     + /* ba . + off */
252     + val = 0x10800000 | (((u32) off >> 2) & 0x3fffff);
253     + }
254     } else {
255     val = 0x01000000;
256     }
257     diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S
258     index ef0d8e9e1210..f22bec0db645 100644
259     --- a/arch/sparc/kernel/ktlb.S
260     +++ b/arch/sparc/kernel/ktlb.S
261     @@ -20,6 +20,10 @@ kvmap_itlb:
262     mov TLB_TAG_ACCESS, %g4
263     ldxa [%g4] ASI_IMMU, %g4
264    
265     + /* The kernel executes in context zero, therefore we do not
266     + * need to clear the context ID bits out of %g4 here.
267     + */
268     +
269     /* sun4v_itlb_miss branches here with the missing virtual
270     * address already loaded into %g4
271     */
272     @@ -128,6 +132,10 @@ kvmap_dtlb:
273     mov TLB_TAG_ACCESS, %g4
274     ldxa [%g4] ASI_DMMU, %g4
275    
276     + /* The kernel executes in context zero, therefore we do not
277     + * need to clear the context ID bits out of %g4 here.
278     + */
279     +
280     /* sun4v_dtlb_miss branches here with the missing virtual
281     * address already loaded into %g4
282     */
283     @@ -251,6 +259,10 @@ kvmap_dtlb_longpath:
284     nop
285     .previous
286    
287     + /* The kernel executes in context zero, therefore we do not
288     + * need to clear the context ID bits out of %g5 here.
289     + */
290     +
291     be,pt %xcc, sparc64_realfault_common
292     mov FAULT_CODE_DTLB, %g4
293     ba,pt %xcc, winfix_trampoline
294     diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c
295     index a92d5d2c46a3..51b25325a961 100644
296     --- a/arch/sparc/kernel/sparc_ksyms_64.c
297     +++ b/arch/sparc/kernel/sparc_ksyms_64.c
298     @@ -27,7 +27,6 @@ EXPORT_SYMBOL(__flushw_user);
299     EXPORT_SYMBOL_GPL(real_hard_smp_processor_id);
300    
301     /* from head_64.S */
302     -EXPORT_SYMBOL(__ret_efault);
303     EXPORT_SYMBOL(tlb_type);
304     EXPORT_SYMBOL(sun4v_chip_type);
305     EXPORT_SYMBOL(prom_root_node);
306     diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
307     index be98685c14c6..d568c8207af7 100644
308     --- a/arch/sparc/kernel/tsb.S
309     +++ b/arch/sparc/kernel/tsb.S
310     @@ -29,13 +29,17 @@
311     */
312     tsb_miss_dtlb:
313     mov TLB_TAG_ACCESS, %g4
314     + ldxa [%g4] ASI_DMMU, %g4
315     + srlx %g4, PAGE_SHIFT, %g4
316     ba,pt %xcc, tsb_miss_page_table_walk
317     - ldxa [%g4] ASI_DMMU, %g4
318     + sllx %g4, PAGE_SHIFT, %g4
319    
320     tsb_miss_itlb:
321     mov TLB_TAG_ACCESS, %g4
322     + ldxa [%g4] ASI_IMMU, %g4
323     + srlx %g4, PAGE_SHIFT, %g4
324     ba,pt %xcc, tsb_miss_page_table_walk
325     - ldxa [%g4] ASI_IMMU, %g4
326     + sllx %g4, PAGE_SHIFT, %g4
327    
328     /* At this point we have:
329     * %g1 -- PAGE_SIZE TSB entry address
330     @@ -284,6 +288,10 @@ tsb_do_dtlb_fault:
331     nop
332     .previous
333    
334     + /* Clear context ID bits. */
335     + srlx %g5, PAGE_SHIFT, %g5
336     + sllx %g5, PAGE_SHIFT, %g5
337     +
338     be,pt %xcc, sparc64_realfault_common
339     mov FAULT_CODE_DTLB, %g4
340     ba,pt %xcc, winfix_trampoline
341     diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S
342     index b7d0bd6b1406..69a439fa2fc1 100644
343     --- a/arch/sparc/lib/GENcopy_from_user.S
344     +++ b/arch/sparc/lib/GENcopy_from_user.S
345     @@ -3,11 +3,11 @@
346     * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
347     */
348    
349     -#define EX_LD(x) \
350     +#define EX_LD(x,y) \
351     98: x; \
352     .section __ex_table,"a";\
353     .align 4; \
354     - .word 98b, __retl_one; \
355     + .word 98b, y; \
356     .text; \
357     .align 4;
358    
359     diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S
360     index 780550e1afc7..9947427ce354 100644
361     --- a/arch/sparc/lib/GENcopy_to_user.S
362     +++ b/arch/sparc/lib/GENcopy_to_user.S
363     @@ -3,11 +3,11 @@
364     * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
365     */
366    
367     -#define EX_ST(x) \
368     +#define EX_ST(x,y) \
369     98: x; \
370     .section __ex_table,"a";\
371     .align 4; \
372     - .word 98b, __retl_one; \
373     + .word 98b, y; \
374     .text; \
375     .align 4;
376    
377     diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S
378     index 89358ee94851..059ea24ad73d 100644
379     --- a/arch/sparc/lib/GENmemcpy.S
380     +++ b/arch/sparc/lib/GENmemcpy.S
381     @@ -4,21 +4,18 @@
382     */
383    
384     #ifdef __KERNEL__
385     +#include <linux/linkage.h>
386     #define GLOBAL_SPARE %g7
387     #else
388     #define GLOBAL_SPARE %g5
389     #endif
390    
391     #ifndef EX_LD
392     -#define EX_LD(x) x
393     +#define EX_LD(x,y) x
394     #endif
395    
396     #ifndef EX_ST
397     -#define EX_ST(x) x
398     -#endif
399     -
400     -#ifndef EX_RETVAL
401     -#define EX_RETVAL(x) x
402     +#define EX_ST(x,y) x
403     #endif
404    
405     #ifndef LOAD
406     @@ -45,6 +42,29 @@
407     .register %g3,#scratch
408    
409     .text
410     +
411     +#ifndef EX_RETVAL
412     +#define EX_RETVAL(x) x
413     +ENTRY(GEN_retl_o4_1)
414     + add %o4, %o2, %o4
415     + retl
416     + add %o4, 1, %o0
417     +ENDPROC(GEN_retl_o4_1)
418     +ENTRY(GEN_retl_g1_8)
419     + add %g1, %o2, %g1
420     + retl
421     + add %g1, 8, %o0
422     +ENDPROC(GEN_retl_g1_8)
423     +ENTRY(GEN_retl_o2_4)
424     + retl
425     + add %o2, 4, %o0
426     +ENDPROC(GEN_retl_o2_4)
427     +ENTRY(GEN_retl_o2_1)
428     + retl
429     + add %o2, 1, %o0
430     +ENDPROC(GEN_retl_o2_1)
431     +#endif
432     +
433     .align 64
434    
435     .globl FUNC_NAME
436     @@ -73,8 +93,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
437     sub %g0, %o4, %o4
438     sub %o2, %o4, %o2
439     1: subcc %o4, 1, %o4
440     - EX_LD(LOAD(ldub, %o1, %g1))
441     - EX_ST(STORE(stb, %g1, %o0))
442     + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1)
443     + EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1)
444     add %o1, 1, %o1
445     bne,pt %XCC, 1b
446     add %o0, 1, %o0
447     @@ -82,8 +102,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
448     andn %o2, 0x7, %g1
449     sub %o2, %g1, %o2
450     1: subcc %g1, 0x8, %g1
451     - EX_LD(LOAD(ldx, %o1, %g2))
452     - EX_ST(STORE(stx, %g2, %o0))
453     + EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8)
454     + EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8)
455     add %o1, 0x8, %o1
456     bne,pt %XCC, 1b
457     add %o0, 0x8, %o0
458     @@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
459    
460     1:
461     subcc %o2, 4, %o2
462     - EX_LD(LOAD(lduw, %o1, %g1))
463     - EX_ST(STORE(stw, %g1, %o1 + %o3))
464     + EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4)
465     + EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4)
466     bgu,pt %XCC, 1b
467     add %o1, 4, %o1
468    
469     @@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
470     .align 32
471     90:
472     subcc %o2, 1, %o2
473     - EX_LD(LOAD(ldub, %o1, %g1))
474     - EX_ST(STORE(stb, %g1, %o1 + %o3))
475     + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1)
476     + EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1)
477     bgu,pt %XCC, 90b
478     add %o1, 1, %o1
479     retl
480     diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
481     index 3269b0234093..4f2384a4286a 100644
482     --- a/arch/sparc/lib/Makefile
483     +++ b/arch/sparc/lib/Makefile
484     @@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o
485     lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
486     lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
487    
488     -lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o
489     +lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o
490     lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
491    
492     obj-$(CONFIG_SPARC64) += iomap.o
493     diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S
494     index d5242b8c4f94..b79a6998d87c 100644
495     --- a/arch/sparc/lib/NG2copy_from_user.S
496     +++ b/arch/sparc/lib/NG2copy_from_user.S
497     @@ -3,19 +3,19 @@
498     * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
499     */
500    
501     -#define EX_LD(x) \
502     +#define EX_LD(x,y) \
503     98: x; \
504     .section __ex_table,"a";\
505     .align 4; \
506     - .word 98b, __retl_one_asi;\
507     + .word 98b, y; \
508     .text; \
509     .align 4;
510    
511     -#define EX_LD_FP(x) \
512     +#define EX_LD_FP(x,y) \
513     98: x; \
514     .section __ex_table,"a";\
515     .align 4; \
516     - .word 98b, __retl_one_asi_fp;\
517     + .word 98b, y##_fp; \
518     .text; \
519     .align 4;
520    
521     diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S
522     index 4e962d993b10..dcec55f254ab 100644
523     --- a/arch/sparc/lib/NG2copy_to_user.S
524     +++ b/arch/sparc/lib/NG2copy_to_user.S
525     @@ -3,19 +3,19 @@
526     * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
527     */
528    
529     -#define EX_ST(x) \
530     +#define EX_ST(x,y) \
531     98: x; \
532     .section __ex_table,"a";\
533     .align 4; \
534     - .word 98b, __retl_one_asi;\
535     + .word 98b, y; \
536     .text; \
537     .align 4;
538    
539     -#define EX_ST_FP(x) \
540     +#define EX_ST_FP(x,y) \
541     98: x; \
542     .section __ex_table,"a";\
543     .align 4; \
544     - .word 98b, __retl_one_asi_fp;\
545     + .word 98b, y##_fp; \
546     .text; \
547     .align 4;
548    
549     diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S
550     index d5f585df2f3f..c629dbd121b6 100644
551     --- a/arch/sparc/lib/NG2memcpy.S
552     +++ b/arch/sparc/lib/NG2memcpy.S
553     @@ -4,6 +4,7 @@
554     */
555    
556     #ifdef __KERNEL__
557     +#include <linux/linkage.h>
558     #include <asm/visasm.h>
559     #include <asm/asi.h>
560     #define GLOBAL_SPARE %g7
561     @@ -32,21 +33,17 @@
562     #endif
563    
564     #ifndef EX_LD
565     -#define EX_LD(x) x
566     +#define EX_LD(x,y) x
567     #endif
568     #ifndef EX_LD_FP
569     -#define EX_LD_FP(x) x
570     +#define EX_LD_FP(x,y) x
571     #endif
572    
573     #ifndef EX_ST
574     -#define EX_ST(x) x
575     +#define EX_ST(x,y) x
576     #endif
577     #ifndef EX_ST_FP
578     -#define EX_ST_FP(x) x
579     -#endif
580     -
581     -#ifndef EX_RETVAL
582     -#define EX_RETVAL(x) x
583     +#define EX_ST_FP(x,y) x
584     #endif
585    
586     #ifndef LOAD
587     @@ -140,45 +137,110 @@
588     fsrc2 %x6, %f12; \
589     fsrc2 %x7, %f14;
590     #define FREG_LOAD_1(base, x0) \
591     - EX_LD_FP(LOAD(ldd, base + 0x00, %x0))
592     + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1)
593     #define FREG_LOAD_2(base, x0, x1) \
594     - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
595     - EX_LD_FP(LOAD(ldd, base + 0x08, %x1));
596     + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
597     + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1);
598     #define FREG_LOAD_3(base, x0, x1, x2) \
599     - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
600     - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
601     - EX_LD_FP(LOAD(ldd, base + 0x10, %x2));
602     + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
603     + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
604     + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1);
605     #define FREG_LOAD_4(base, x0, x1, x2, x3) \
606     - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
607     - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
608     - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
609     - EX_LD_FP(LOAD(ldd, base + 0x18, %x3));
610     + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
611     + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
612     + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
613     + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1);
614     #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
615     - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
616     - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
617     - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
618     - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
619     - EX_LD_FP(LOAD(ldd, base + 0x20, %x4));
620     + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
621     + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
622     + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
623     + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
624     + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1);
625     #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
626     - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
627     - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
628     - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
629     - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
630     - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
631     - EX_LD_FP(LOAD(ldd, base + 0x28, %x5));
632     + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
633     + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
634     + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
635     + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
636     + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
637     + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1);
638     #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
639     - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
640     - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
641     - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
642     - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
643     - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
644     - EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \
645     - EX_LD_FP(LOAD(ldd, base + 0x30, %x6));
646     + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
647     + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
648     + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
649     + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
650     + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
651     + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \
652     + EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1);
653    
654     .register %g2,#scratch
655     .register %g3,#scratch
656    
657     .text
658     +#ifndef EX_RETVAL
659     +#define EX_RETVAL(x) x
660     +__restore_fp:
661     + VISExitHalf
662     +__restore_asi:
663     + retl
664     + wr %g0, ASI_AIUS, %asi
665     +ENTRY(NG2_retl_o2)
666     + ba,pt %xcc, __restore_asi
667     + mov %o2, %o0
668     +ENDPROC(NG2_retl_o2)
669     +ENTRY(NG2_retl_o2_plus_1)
670     + ba,pt %xcc, __restore_asi
671     + add %o2, 1, %o0
672     +ENDPROC(NG2_retl_o2_plus_1)
673     +ENTRY(NG2_retl_o2_plus_4)
674     + ba,pt %xcc, __restore_asi
675     + add %o2, 4, %o0
676     +ENDPROC(NG2_retl_o2_plus_4)
677     +ENTRY(NG2_retl_o2_plus_8)
678     + ba,pt %xcc, __restore_asi
679     + add %o2, 8, %o0
680     +ENDPROC(NG2_retl_o2_plus_8)
681     +ENTRY(NG2_retl_o2_plus_o4_plus_1)
682     + add %o4, 1, %o4
683     + ba,pt %xcc, __restore_asi
684     + add %o2, %o4, %o0
685     +ENDPROC(NG2_retl_o2_plus_o4_plus_1)
686     +ENTRY(NG2_retl_o2_plus_o4_plus_8)
687     + add %o4, 8, %o4
688     + ba,pt %xcc, __restore_asi
689     + add %o2, %o4, %o0
690     +ENDPROC(NG2_retl_o2_plus_o4_plus_8)
691     +ENTRY(NG2_retl_o2_plus_o4_plus_16)
692     + add %o4, 16, %o4
693     + ba,pt %xcc, __restore_asi
694     + add %o2, %o4, %o0
695     +ENDPROC(NG2_retl_o2_plus_o4_plus_16)
696     +ENTRY(NG2_retl_o2_plus_g1_fp)
697     + ba,pt %xcc, __restore_fp
698     + add %o2, %g1, %o0
699     +ENDPROC(NG2_retl_o2_plus_g1_fp)
700     +ENTRY(NG2_retl_o2_plus_g1_plus_64_fp)
701     + add %g1, 64, %g1
702     + ba,pt %xcc, __restore_fp
703     + add %o2, %g1, %o0
704     +ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp)
705     +ENTRY(NG2_retl_o2_plus_g1_plus_1)
706     + add %g1, 1, %g1
707     + ba,pt %xcc, __restore_asi
708     + add %o2, %g1, %o0
709     +ENDPROC(NG2_retl_o2_plus_g1_plus_1)
710     +ENTRY(NG2_retl_o2_and_7_plus_o4)
711     + and %o2, 7, %o2
712     + ba,pt %xcc, __restore_asi
713     + add %o2, %o4, %o0
714     +ENDPROC(NG2_retl_o2_and_7_plus_o4)
715     +ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8)
716     + and %o2, 7, %o2
717     + add %o4, 8, %o4
718     + ba,pt %xcc, __restore_asi
719     + add %o2, %o4, %o0
720     +ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8)
721     +#endif
722     +
723     .align 64
724    
725     .globl FUNC_NAME
726     @@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
727     sub %g0, %o4, %o4 ! bytes to align dst
728     sub %o2, %o4, %o2
729     1: subcc %o4, 1, %o4
730     - EX_LD(LOAD(ldub, %o1, %g1))
731     - EX_ST(STORE(stb, %g1, %o0))
732     + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1)
733     + EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1)
734     add %o1, 1, %o1
735     bne,pt %XCC, 1b
736     add %o0, 1, %o0
737     @@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
738     nop
739     /* fall through for 0 < low bits < 8 */
740     110: sub %o4, 64, %g2
741     - EX_LD_FP(LOAD_BLK(%g2, %f0))
742     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
743     - EX_LD_FP(LOAD_BLK(%o4, %f16))
744     + EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1)
745     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
746     + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
747     FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
748     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
749     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
750     FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
751     subcc %g1, 64, %g1
752     add %o4, 64, %o4
753     @@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
754    
755     120: sub %o4, 56, %g2
756     FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
757     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
758     - EX_LD_FP(LOAD_BLK(%o4, %f16))
759     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
760     + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
761     FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
762     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
763     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
764     FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
765     subcc %g1, 64, %g1
766     add %o4, 64, %o4
767     @@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
768    
769     130: sub %o4, 48, %g2
770     FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
771     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
772     - EX_LD_FP(LOAD_BLK(%o4, %f16))
773     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
774     + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
775     FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
776     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
777     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
778     FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
779     subcc %g1, 64, %g1
780     add %o4, 64, %o4
781     @@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
782    
783     140: sub %o4, 40, %g2
784     FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
785     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
786     - EX_LD_FP(LOAD_BLK(%o4, %f16))
787     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
788     + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
789     FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
790     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
791     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
792     FREG_MOVE_5(f22, f24, f26, f28, f30)
793     subcc %g1, 64, %g1
794     add %o4, 64, %o4
795     @@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
796    
797     150: sub %o4, 32, %g2
798     FREG_LOAD_4(%g2, f0, f2, f4, f6)
799     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
800     - EX_LD_FP(LOAD_BLK(%o4, %f16))
801     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
802     + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
803     FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
804     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
805     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
806     FREG_MOVE_4(f24, f26, f28, f30)
807     subcc %g1, 64, %g1
808     add %o4, 64, %o4
809     @@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
810    
811     160: sub %o4, 24, %g2
812     FREG_LOAD_3(%g2, f0, f2, f4)
813     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
814     - EX_LD_FP(LOAD_BLK(%o4, %f16))
815     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
816     + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
817     FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
818     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
819     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
820     FREG_MOVE_3(f26, f28, f30)
821     subcc %g1, 64, %g1
822     add %o4, 64, %o4
823     @@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
824    
825     170: sub %o4, 16, %g2
826     FREG_LOAD_2(%g2, f0, f2)
827     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
828     - EX_LD_FP(LOAD_BLK(%o4, %f16))
829     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
830     + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
831     FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
832     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
833     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
834     FREG_MOVE_2(f28, f30)
835     subcc %g1, 64, %g1
836     add %o4, 64, %o4
837     @@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
838    
839     180: sub %o4, 8, %g2
840     FREG_LOAD_1(%g2, f0)
841     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
842     - EX_LD_FP(LOAD_BLK(%o4, %f16))
843     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
844     + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
845     FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
846     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
847     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
848     FREG_MOVE_1(f30)
849     subcc %g1, 64, %g1
850     add %o4, 64, %o4
851     @@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
852     nop
853    
854     190:
855     -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
856     +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
857     subcc %g1, 64, %g1
858     - EX_LD_FP(LOAD_BLK(%o4, %f0))
859     - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
860     + EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64)
861     + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64)
862     add %o4, 64, %o4
863     bne,pt %xcc, 1b
864     LOAD(prefetch, %o4 + 64, #one_read)
865     @@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
866     andn %o2, 0xf, %o4
867     and %o2, 0xf, %o2
868     1: subcc %o4, 0x10, %o4
869     - EX_LD(LOAD(ldx, %o1, %o5))
870     + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16)
871     add %o1, 0x08, %o1
872     - EX_LD(LOAD(ldx, %o1, %g1))
873     + EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16)
874     sub %o1, 0x08, %o1
875     - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
876     + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16)
877     add %o1, 0x8, %o1
878     - EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE))
879     + EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8)
880     bgu,pt %XCC, 1b
881     add %o1, 0x8, %o1
882     73: andcc %o2, 0x8, %g0
883     be,pt %XCC, 1f
884     nop
885     sub %o2, 0x8, %o2
886     - EX_LD(LOAD(ldx, %o1, %o5))
887     - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
888     + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8)
889     + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8)
890     add %o1, 0x8, %o1
891     1: andcc %o2, 0x4, %g0
892     be,pt %XCC, 1f
893     nop
894     sub %o2, 0x4, %o2
895     - EX_LD(LOAD(lduw, %o1, %o5))
896     - EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE))
897     + EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4)
898     + EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
899     add %o1, 0x4, %o1
900     1: cmp %o2, 0
901     be,pt %XCC, 85f
902     @@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
903     sub %o2, %g1, %o2
904    
905     1: subcc %g1, 1, %g1
906     - EX_LD(LOAD(ldub, %o1, %o5))
907     - EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE))
908     + EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1)
909     + EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1)
910     bgu,pt %icc, 1b
911     add %o1, 1, %o1
912    
913     @@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
914    
915     8: mov 64, GLOBAL_SPARE
916     andn %o1, 0x7, %o1
917     - EX_LD(LOAD(ldx, %o1, %g2))
918     + EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2)
919     sub GLOBAL_SPARE, %g1, GLOBAL_SPARE
920     andn %o2, 0x7, %o4
921     sllx %g2, %g1, %g2
922     1: add %o1, 0x8, %o1
923     - EX_LD(LOAD(ldx, %o1, %g3))
924     + EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4)
925     subcc %o4, 0x8, %o4
926     srlx %g3, GLOBAL_SPARE, %o5
927     or %o5, %g2, %o5
928     - EX_ST(STORE(stx, %o5, %o0))
929     + EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8)
930     add %o0, 0x8, %o0
931     bgu,pt %icc, 1b
932     sllx %g3, %g1, %g2
933     @@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
934    
935     1:
936     subcc %o2, 4, %o2
937     - EX_LD(LOAD(lduw, %o1, %g1))
938     - EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE))
939     + EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4)
940     + EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
941     bgu,pt %XCC, 1b
942     add %o1, 4, %o1
943    
944     @@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
945     .align 32
946     90:
947     subcc %o2, 1, %o2
948     - EX_LD(LOAD(ldub, %o1, %g1))
949     - EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE))
950     + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1)
951     + EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1)
952     bgu,pt %XCC, 90b
953     add %o1, 1, %o1
954     retl
955     diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S
956     index 2e8ee7ad07a9..16a286c1a528 100644
957     --- a/arch/sparc/lib/NG4copy_from_user.S
958     +++ b/arch/sparc/lib/NG4copy_from_user.S
959     @@ -3,19 +3,19 @@
960     * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
961     */
962    
963     -#define EX_LD(x) \
964     +#define EX_LD(x, y) \
965     98: x; \
966     .section __ex_table,"a";\
967     .align 4; \
968     - .word 98b, __retl_one_asi;\
969     + .word 98b, y; \
970     .text; \
971     .align 4;
972    
973     -#define EX_LD_FP(x) \
974     +#define EX_LD_FP(x,y) \
975     98: x; \
976     .section __ex_table,"a";\
977     .align 4; \
978     - .word 98b, __retl_one_asi_fp;\
979     + .word 98b, y##_fp; \
980     .text; \
981     .align 4;
982    
983     diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S
984     index be0bf4590df8..6b0276ffc858 100644
985     --- a/arch/sparc/lib/NG4copy_to_user.S
986     +++ b/arch/sparc/lib/NG4copy_to_user.S
987     @@ -3,19 +3,19 @@
988     * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
989     */
990    
991     -#define EX_ST(x) \
992     +#define EX_ST(x,y) \
993     98: x; \
994     .section __ex_table,"a";\
995     .align 4; \
996     - .word 98b, __retl_one_asi;\
997     + .word 98b, y; \
998     .text; \
999     .align 4;
1000    
1001     -#define EX_ST_FP(x) \
1002     +#define EX_ST_FP(x,y) \
1003     98: x; \
1004     .section __ex_table,"a";\
1005     .align 4; \
1006     - .word 98b, __retl_one_asi_fp;\
1007     + .word 98b, y##_fp; \
1008     .text; \
1009     .align 4;
1010    
1011     diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
1012     index 8e13ee1f4454..75bb93b1437f 100644
1013     --- a/arch/sparc/lib/NG4memcpy.S
1014     +++ b/arch/sparc/lib/NG4memcpy.S
1015     @@ -4,6 +4,7 @@
1016     */
1017    
1018     #ifdef __KERNEL__
1019     +#include <linux/linkage.h>
1020     #include <asm/visasm.h>
1021     #include <asm/asi.h>
1022     #define GLOBAL_SPARE %g7
1023     @@ -46,22 +47,19 @@
1024     #endif
1025    
1026     #ifndef EX_LD
1027     -#define EX_LD(x) x
1028     +#define EX_LD(x,y) x
1029     #endif
1030     #ifndef EX_LD_FP
1031     -#define EX_LD_FP(x) x
1032     +#define EX_LD_FP(x,y) x
1033     #endif
1034    
1035     #ifndef EX_ST
1036     -#define EX_ST(x) x
1037     +#define EX_ST(x,y) x
1038     #endif
1039     #ifndef EX_ST_FP
1040     -#define EX_ST_FP(x) x
1041     +#define EX_ST_FP(x,y) x
1042     #endif
1043    
1044     -#ifndef EX_RETVAL
1045     -#define EX_RETVAL(x) x
1046     -#endif
1047    
1048     #ifndef LOAD
1049     #define LOAD(type,addr,dest) type [addr], dest
1050     @@ -94,6 +92,158 @@
1051     .register %g3,#scratch
1052    
1053     .text
1054     +#ifndef EX_RETVAL
1055     +#define EX_RETVAL(x) x
1056     +__restore_asi_fp:
1057     + VISExitHalf
1058     +__restore_asi:
1059     + retl
1060     + wr %g0, ASI_AIUS, %asi
1061     +
1062     +ENTRY(NG4_retl_o2)
1063     + ba,pt %xcc, __restore_asi
1064     + mov %o2, %o0
1065     +ENDPROC(NG4_retl_o2)
1066     +ENTRY(NG4_retl_o2_plus_1)
1067     + ba,pt %xcc, __restore_asi
1068     + add %o2, 1, %o0
1069     +ENDPROC(NG4_retl_o2_plus_1)
1070     +ENTRY(NG4_retl_o2_plus_4)
1071     + ba,pt %xcc, __restore_asi
1072     + add %o2, 4, %o0
1073     +ENDPROC(NG4_retl_o2_plus_4)
1074     +ENTRY(NG4_retl_o2_plus_o5)
1075     + ba,pt %xcc, __restore_asi
1076     + add %o2, %o5, %o0
1077     +ENDPROC(NG4_retl_o2_plus_o5)
1078     +ENTRY(NG4_retl_o2_plus_o5_plus_4)
1079     + add %o5, 4, %o5
1080     + ba,pt %xcc, __restore_asi
1081     + add %o2, %o5, %o0
1082     +ENDPROC(NG4_retl_o2_plus_o5_plus_4)
1083     +ENTRY(NG4_retl_o2_plus_o5_plus_8)
1084     + add %o5, 8, %o5
1085     + ba,pt %xcc, __restore_asi
1086     + add %o2, %o5, %o0
1087     +ENDPROC(NG4_retl_o2_plus_o5_plus_8)
1088     +ENTRY(NG4_retl_o2_plus_o5_plus_16)
1089     + add %o5, 16, %o5
1090     + ba,pt %xcc, __restore_asi
1091     + add %o2, %o5, %o0
1092     +ENDPROC(NG4_retl_o2_plus_o5_plus_16)
1093     +ENTRY(NG4_retl_o2_plus_o5_plus_24)
1094     + add %o5, 24, %o5
1095     + ba,pt %xcc, __restore_asi
1096     + add %o2, %o5, %o0
1097     +ENDPROC(NG4_retl_o2_plus_o5_plus_24)
1098     +ENTRY(NG4_retl_o2_plus_o5_plus_32)
1099     + add %o5, 32, %o5
1100     + ba,pt %xcc, __restore_asi
1101     + add %o2, %o5, %o0
1102     +ENDPROC(NG4_retl_o2_plus_o5_plus_32)
1103     +ENTRY(NG4_retl_o2_plus_g1)
1104     + ba,pt %xcc, __restore_asi
1105     + add %o2, %g1, %o0
1106     +ENDPROC(NG4_retl_o2_plus_g1)
1107     +ENTRY(NG4_retl_o2_plus_g1_plus_1)
1108     + add %g1, 1, %g1
1109     + ba,pt %xcc, __restore_asi
1110     + add %o2, %g1, %o0
1111     +ENDPROC(NG4_retl_o2_plus_g1_plus_1)
1112     +ENTRY(NG4_retl_o2_plus_g1_plus_8)
1113     + add %g1, 8, %g1
1114     + ba,pt %xcc, __restore_asi
1115     + add %o2, %g1, %o0
1116     +ENDPROC(NG4_retl_o2_plus_g1_plus_8)
1117     +ENTRY(NG4_retl_o2_plus_o4)
1118     + ba,pt %xcc, __restore_asi
1119     + add %o2, %o4, %o0
1120     +ENDPROC(NG4_retl_o2_plus_o4)
1121     +ENTRY(NG4_retl_o2_plus_o4_plus_8)
1122     + add %o4, 8, %o4
1123     + ba,pt %xcc, __restore_asi
1124     + add %o2, %o4, %o0
1125     +ENDPROC(NG4_retl_o2_plus_o4_plus_8)
1126     +ENTRY(NG4_retl_o2_plus_o4_plus_16)
1127     + add %o4, 16, %o4
1128     + ba,pt %xcc, __restore_asi
1129     + add %o2, %o4, %o0
1130     +ENDPROC(NG4_retl_o2_plus_o4_plus_16)
1131     +ENTRY(NG4_retl_o2_plus_o4_plus_24)
1132     + add %o4, 24, %o4
1133     + ba,pt %xcc, __restore_asi
1134     + add %o2, %o4, %o0
1135     +ENDPROC(NG4_retl_o2_plus_o4_plus_24)
1136     +ENTRY(NG4_retl_o2_plus_o4_plus_32)
1137     + add %o4, 32, %o4
1138     + ba,pt %xcc, __restore_asi
1139     + add %o2, %o4, %o0
1140     +ENDPROC(NG4_retl_o2_plus_o4_plus_32)
1141     +ENTRY(NG4_retl_o2_plus_o4_plus_40)
1142     + add %o4, 40, %o4
1143     + ba,pt %xcc, __restore_asi
1144     + add %o2, %o4, %o0
1145     +ENDPROC(NG4_retl_o2_plus_o4_plus_40)
1146     +ENTRY(NG4_retl_o2_plus_o4_plus_48)
1147     + add %o4, 48, %o4
1148     + ba,pt %xcc, __restore_asi
1149     + add %o2, %o4, %o0
1150     +ENDPROC(NG4_retl_o2_plus_o4_plus_48)
1151     +ENTRY(NG4_retl_o2_plus_o4_plus_56)
1152     + add %o4, 56, %o4
1153     + ba,pt %xcc, __restore_asi
1154     + add %o2, %o4, %o0
1155     +ENDPROC(NG4_retl_o2_plus_o4_plus_56)
1156     +ENTRY(NG4_retl_o2_plus_o4_plus_64)
1157     + add %o4, 64, %o4
1158     + ba,pt %xcc, __restore_asi
1159     + add %o2, %o4, %o0
1160     +ENDPROC(NG4_retl_o2_plus_o4_plus_64)
1161     +ENTRY(NG4_retl_o2_plus_o4_fp)
1162     + ba,pt %xcc, __restore_asi_fp
1163     + add %o2, %o4, %o0
1164     +ENDPROC(NG4_retl_o2_plus_o4_fp)
1165     +ENTRY(NG4_retl_o2_plus_o4_plus_8_fp)
1166     + add %o4, 8, %o4
1167     + ba,pt %xcc, __restore_asi_fp
1168     + add %o2, %o4, %o0
1169     +ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp)
1170     +ENTRY(NG4_retl_o2_plus_o4_plus_16_fp)
1171     + add %o4, 16, %o4
1172     + ba,pt %xcc, __restore_asi_fp
1173     + add %o2, %o4, %o0
1174     +ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp)
1175     +ENTRY(NG4_retl_o2_plus_o4_plus_24_fp)
1176     + add %o4, 24, %o4
1177     + ba,pt %xcc, __restore_asi_fp
1178     + add %o2, %o4, %o0
1179     +ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp)
1180     +ENTRY(NG4_retl_o2_plus_o4_plus_32_fp)
1181     + add %o4, 32, %o4
1182     + ba,pt %xcc, __restore_asi_fp
1183     + add %o2, %o4, %o0
1184     +ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp)
1185     +ENTRY(NG4_retl_o2_plus_o4_plus_40_fp)
1186     + add %o4, 40, %o4
1187     + ba,pt %xcc, __restore_asi_fp
1188     + add %o2, %o4, %o0
1189     +ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp)
1190     +ENTRY(NG4_retl_o2_plus_o4_plus_48_fp)
1191     + add %o4, 48, %o4
1192     + ba,pt %xcc, __restore_asi_fp
1193     + add %o2, %o4, %o0
1194     +ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp)
1195     +ENTRY(NG4_retl_o2_plus_o4_plus_56_fp)
1196     + add %o4, 56, %o4
1197     + ba,pt %xcc, __restore_asi_fp
1198     + add %o2, %o4, %o0
1199     +ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp)
1200     +ENTRY(NG4_retl_o2_plus_o4_plus_64_fp)
1201     + add %o4, 64, %o4
1202     + ba,pt %xcc, __restore_asi_fp
1203     + add %o2, %o4, %o0
1204     +ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp)
1205     +#endif
1206     .align 64
1207    
1208     .globl FUNC_NAME
1209     @@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1210     brz,pt %g1, 51f
1211     sub %o2, %g1, %o2
1212    
1213     -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
1214     +
1215     +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
1216     add %o1, 1, %o1
1217     subcc %g1, 1, %g1
1218     add %o0, 1, %o0
1219     bne,pt %icc, 1b
1220     - EX_ST(STORE(stb, %g2, %o0 - 0x01))
1221     + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
1222    
1223     51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
1224     LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
1225     @@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1226     brz,pt %g1, .Llarge_aligned
1227     sub %o2, %g1, %o2
1228    
1229     -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2))
1230     +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
1231     add %o1, 8, %o1
1232     subcc %g1, 8, %g1
1233     add %o0, 8, %o0
1234     bne,pt %icc, 1b
1235     - EX_ST(STORE(stx, %g2, %o0 - 0x08))
1236     + EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8)
1237    
1238     .Llarge_aligned:
1239     /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
1240     andn %o2, 0x3f, %o4
1241     sub %o2, %o4, %o2
1242    
1243     -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
1244     +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4)
1245     add %o1, 0x40, %o1
1246     - EX_LD(LOAD(ldx, %o1 - 0x38, %g2))
1247     + EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4)
1248     subcc %o4, 0x40, %o4
1249     - EX_LD(LOAD(ldx, %o1 - 0x30, %g3))
1250     - EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE))
1251     - EX_LD(LOAD(ldx, %o1 - 0x20, %o5))
1252     - EX_ST(STORE_INIT(%g1, %o0))
1253     + EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64)
1254     + EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64)
1255     + EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64)
1256     + EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64)
1257     add %o0, 0x08, %o0
1258     - EX_ST(STORE_INIT(%g2, %o0))
1259     + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56)
1260     add %o0, 0x08, %o0
1261     - EX_LD(LOAD(ldx, %o1 - 0x18, %g2))
1262     - EX_ST(STORE_INIT(%g3, %o0))
1263     + EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48)
1264     + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48)
1265     add %o0, 0x08, %o0
1266     - EX_LD(LOAD(ldx, %o1 - 0x10, %g3))
1267     - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
1268     + EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40)
1269     + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40)
1270     add %o0, 0x08, %o0
1271     - EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE))
1272     - EX_ST(STORE_INIT(%o5, %o0))
1273     + EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32)
1274     + EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32)
1275     add %o0, 0x08, %o0
1276     - EX_ST(STORE_INIT(%g2, %o0))
1277     + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24)
1278     add %o0, 0x08, %o0
1279     - EX_ST(STORE_INIT(%g3, %o0))
1280     + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16)
1281     add %o0, 0x08, %o0
1282     - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
1283     + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8)
1284     add %o0, 0x08, %o0
1285     bne,pt %icc, 1b
1286     LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
1287     @@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1288     sub %o2, %o4, %o2
1289     alignaddr %o1, %g0, %g1
1290     add %o1, %o4, %o1
1291     - EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0))
1292     -1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2))
1293     + EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4)
1294     +1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4)
1295     subcc %o4, 0x40, %o4
1296     - EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4))
1297     - EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6))
1298     - EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8))
1299     - EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10))
1300     - EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12))
1301     - EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14))
1302     + EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64)
1303     + EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64)
1304     + EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64)
1305     + EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64)
1306     + EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64)
1307     + EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64)
1308     faligndata %f0, %f2, %f16
1309     - EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0))
1310     + EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64)
1311     faligndata %f2, %f4, %f18
1312     add %g1, 0x40, %g1
1313     faligndata %f4, %f6, %f20
1314     @@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1315     faligndata %f10, %f12, %f26
1316     faligndata %f12, %f14, %f28
1317     faligndata %f14, %f0, %f30
1318     - EX_ST_FP(STORE(std, %f16, %o0 + 0x00))
1319     - EX_ST_FP(STORE(std, %f18, %o0 + 0x08))
1320     - EX_ST_FP(STORE(std, %f20, %o0 + 0x10))
1321     - EX_ST_FP(STORE(std, %f22, %o0 + 0x18))
1322     - EX_ST_FP(STORE(std, %f24, %o0 + 0x20))
1323     - EX_ST_FP(STORE(std, %f26, %o0 + 0x28))
1324     - EX_ST_FP(STORE(std, %f28, %o0 + 0x30))
1325     - EX_ST_FP(STORE(std, %f30, %o0 + 0x38))
1326     + EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64)
1327     + EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56)
1328     + EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48)
1329     + EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40)
1330     + EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32)
1331     + EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24)
1332     + EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16)
1333     + EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8)
1334     add %o0, 0x40, %o0
1335     bne,pt %icc, 1b
1336     LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
1337     @@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1338     andncc %o2, 0x20 - 1, %o5
1339     be,pn %icc, 2f
1340     sub %o2, %o5, %o2
1341     -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
1342     - EX_LD(LOAD(ldx, %o1 + 0x08, %g2))
1343     - EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE))
1344     - EX_LD(LOAD(ldx, %o1 + 0x18, %o4))
1345     +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
1346     + EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5)
1347     + EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5)
1348     + EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5)
1349     add %o1, 0x20, %o1
1350     subcc %o5, 0x20, %o5
1351     - EX_ST(STORE(stx, %g1, %o0 + 0x00))
1352     - EX_ST(STORE(stx, %g2, %o0 + 0x08))
1353     - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10))
1354     - EX_ST(STORE(stx, %o4, %o0 + 0x18))
1355     + EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32)
1356     + EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24)
1357     + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24)
1358     + EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8)
1359     bne,pt %icc, 1b
1360     add %o0, 0x20, %o0
1361     2: andcc %o2, 0x18, %o5
1362     be,pt %icc, 3f
1363     sub %o2, %o5, %o2
1364     -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
1365     +
1366     +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
1367     add %o1, 0x08, %o1
1368     add %o0, 0x08, %o0
1369     subcc %o5, 0x08, %o5
1370     bne,pt %icc, 1b
1371     - EX_ST(STORE(stx, %g1, %o0 - 0x08))
1372     + EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8)
1373     3: brz,pt %o2, .Lexit
1374     cmp %o2, 0x04
1375     bl,pn %icc, .Ltiny
1376     nop
1377     - EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
1378     + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2)
1379     add %o1, 0x04, %o1
1380     add %o0, 0x04, %o0
1381     subcc %o2, 0x04, %o2
1382     bne,pn %icc, .Ltiny
1383     - EX_ST(STORE(stw, %g1, %o0 - 0x04))
1384     + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4)
1385     ba,a,pt %icc, .Lexit
1386     .Lmedium_unaligned:
1387     /* First get dest 8 byte aligned. */
1388     @@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1389     brz,pt %g1, 2f
1390     sub %o2, %g1, %o2
1391    
1392     -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
1393     +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
1394     add %o1, 1, %o1
1395     subcc %g1, 1, %g1
1396     add %o0, 1, %o0
1397     bne,pt %icc, 1b
1398     - EX_ST(STORE(stb, %g2, %o0 - 0x01))
1399     + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
1400     2:
1401     and %o1, 0x7, %g1
1402     brz,pn %g1, .Lmedium_noprefetch
1403     @@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1404     mov 64, %g2
1405     sub %g2, %g1, %g2
1406     andn %o1, 0x7, %o1
1407     - EX_LD(LOAD(ldx, %o1 + 0x00, %o4))
1408     + EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2)
1409     sllx %o4, %g1, %o4
1410     andn %o2, 0x08 - 1, %o5
1411     sub %o2, %o5, %o2
1412     -1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3))
1413     +1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5)
1414     add %o1, 0x08, %o1
1415     subcc %o5, 0x08, %o5
1416     srlx %g3, %g2, GLOBAL_SPARE
1417     or GLOBAL_SPARE, %o4, GLOBAL_SPARE
1418     - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00))
1419     + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8)
1420     add %o0, 0x08, %o0
1421     bne,pt %icc, 1b
1422     sllx %g3, %g1, %o4
1423     @@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1424     ba,pt %icc, .Lsmall_unaligned
1425    
1426     .Ltiny:
1427     - EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
1428     + EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
1429     subcc %o2, 1, %o2
1430     be,pn %icc, .Lexit
1431     - EX_ST(STORE(stb, %g1, %o0 + 0x00))
1432     - EX_LD(LOAD(ldub, %o1 + 0x01, %g1))
1433     + EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1)
1434     + EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2)
1435     subcc %o2, 1, %o2
1436     be,pn %icc, .Lexit
1437     - EX_ST(STORE(stb, %g1, %o0 + 0x01))
1438     - EX_LD(LOAD(ldub, %o1 + 0x02, %g1))
1439     + EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1)
1440     + EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2)
1441     ba,pt %icc, .Lexit
1442     - EX_ST(STORE(stb, %g1, %o0 + 0x02))
1443     + EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2)
1444    
1445     .Lsmall:
1446     andcc %g2, 0x3, %g0
1447     @@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1448     andn %o2, 0x4 - 1, %o5
1449     sub %o2, %o5, %o2
1450     1:
1451     - EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
1452     + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
1453     add %o1, 0x04, %o1
1454     subcc %o5, 0x04, %o5
1455     add %o0, 0x04, %o0
1456     bne,pt %icc, 1b
1457     - EX_ST(STORE(stw, %g1, %o0 - 0x04))
1458     + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4)
1459     brz,pt %o2, .Lexit
1460     nop
1461     ba,a,pt %icc, .Ltiny
1462    
1463     .Lsmall_unaligned:
1464     -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
1465     +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
1466     add %o1, 1, %o1
1467     add %o0, 1, %o0
1468     subcc %o2, 1, %o2
1469     bne,pt %icc, 1b
1470     - EX_ST(STORE(stb, %g1, %o0 - 0x01))
1471     + EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
1472     ba,a,pt %icc, .Lexit
1473     .size FUNC_NAME, .-FUNC_NAME
1474     diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S
1475     index 5d1e4d1ac21e..9cd42fcbc781 100644
1476     --- a/arch/sparc/lib/NGcopy_from_user.S
1477     +++ b/arch/sparc/lib/NGcopy_from_user.S
1478     @@ -3,11 +3,11 @@
1479     * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
1480     */
1481    
1482     -#define EX_LD(x) \
1483     +#define EX_LD(x,y) \
1484     98: x; \
1485     .section __ex_table,"a";\
1486     .align 4; \
1487     - .word 98b, __ret_one_asi;\
1488     + .word 98b, y; \
1489     .text; \
1490     .align 4;
1491    
1492     diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S
1493     index ff630dcb273c..5c358afd464e 100644
1494     --- a/arch/sparc/lib/NGcopy_to_user.S
1495     +++ b/arch/sparc/lib/NGcopy_to_user.S
1496     @@ -3,11 +3,11 @@
1497     * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
1498     */
1499    
1500     -#define EX_ST(x) \
1501     +#define EX_ST(x,y) \
1502     98: x; \
1503     .section __ex_table,"a";\
1504     .align 4; \
1505     - .word 98b, __ret_one_asi;\
1506     + .word 98b, y; \
1507     .text; \
1508     .align 4;
1509    
1510     diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S
1511     index 96a14caf6966..d88c4ed50a00 100644
1512     --- a/arch/sparc/lib/NGmemcpy.S
1513     +++ b/arch/sparc/lib/NGmemcpy.S
1514     @@ -4,6 +4,7 @@
1515     */
1516    
1517     #ifdef __KERNEL__
1518     +#include <linux/linkage.h>
1519     #include <asm/asi.h>
1520     #include <asm/thread_info.h>
1521     #define GLOBAL_SPARE %g7
1522     @@ -27,15 +28,11 @@
1523     #endif
1524    
1525     #ifndef EX_LD
1526     -#define EX_LD(x) x
1527     +#define EX_LD(x,y) x
1528     #endif
1529    
1530     #ifndef EX_ST
1531     -#define EX_ST(x) x
1532     -#endif
1533     -
1534     -#ifndef EX_RETVAL
1535     -#define EX_RETVAL(x) x
1536     +#define EX_ST(x,y) x
1537     #endif
1538    
1539     #ifndef LOAD
1540     @@ -79,6 +76,92 @@
1541     .register %g3,#scratch
1542    
1543     .text
1544     +#ifndef EX_RETVAL
1545     +#define EX_RETVAL(x) x
1546     +__restore_asi:
1547     + ret
1548     + wr %g0, ASI_AIUS, %asi
1549     + restore
1550     +ENTRY(NG_ret_i2_plus_i4_plus_1)
1551     + ba,pt %xcc, __restore_asi
1552     + add %i2, %i5, %i0
1553     +ENDPROC(NG_ret_i2_plus_i4_plus_1)
1554     +ENTRY(NG_ret_i2_plus_g1)
1555     + ba,pt %xcc, __restore_asi
1556     + add %i2, %g1, %i0
1557     +ENDPROC(NG_ret_i2_plus_g1)
1558     +ENTRY(NG_ret_i2_plus_g1_minus_8)
1559     + sub %g1, 8, %g1
1560     + ba,pt %xcc, __restore_asi
1561     + add %i2, %g1, %i0
1562     +ENDPROC(NG_ret_i2_plus_g1_minus_8)
1563     +ENTRY(NG_ret_i2_plus_g1_minus_16)
1564     + sub %g1, 16, %g1
1565     + ba,pt %xcc, __restore_asi
1566     + add %i2, %g1, %i0
1567     +ENDPROC(NG_ret_i2_plus_g1_minus_16)
1568     +ENTRY(NG_ret_i2_plus_g1_minus_24)
1569     + sub %g1, 24, %g1
1570     + ba,pt %xcc, __restore_asi
1571     + add %i2, %g1, %i0
1572     +ENDPROC(NG_ret_i2_plus_g1_minus_24)
1573     +ENTRY(NG_ret_i2_plus_g1_minus_32)
1574     + sub %g1, 32, %g1
1575     + ba,pt %xcc, __restore_asi
1576     + add %i2, %g1, %i0
1577     +ENDPROC(NG_ret_i2_plus_g1_minus_32)
1578     +ENTRY(NG_ret_i2_plus_g1_minus_40)
1579     + sub %g1, 40, %g1
1580     + ba,pt %xcc, __restore_asi
1581     + add %i2, %g1, %i0
1582     +ENDPROC(NG_ret_i2_plus_g1_minus_40)
1583     +ENTRY(NG_ret_i2_plus_g1_minus_48)
1584     + sub %g1, 48, %g1
1585     + ba,pt %xcc, __restore_asi
1586     + add %i2, %g1, %i0
1587     +ENDPROC(NG_ret_i2_plus_g1_minus_48)
1588     +ENTRY(NG_ret_i2_plus_g1_minus_56)
1589     + sub %g1, 56, %g1
1590     + ba,pt %xcc, __restore_asi
1591     + add %i2, %g1, %i0
1592     +ENDPROC(NG_ret_i2_plus_g1_minus_56)
1593     +ENTRY(NG_ret_i2_plus_i4)
1594     + ba,pt %xcc, __restore_asi
1595     + add %i2, %i4, %i0
1596     +ENDPROC(NG_ret_i2_plus_i4)
1597     +ENTRY(NG_ret_i2_plus_i4_minus_8)
1598     + sub %i4, 8, %i4
1599     + ba,pt %xcc, __restore_asi
1600     + add %i2, %i4, %i0
1601     +ENDPROC(NG_ret_i2_plus_i4_minus_8)
1602     +ENTRY(NG_ret_i2_plus_8)
1603     + ba,pt %xcc, __restore_asi
1604     + add %i2, 8, %i0
1605     +ENDPROC(NG_ret_i2_plus_8)
1606     +ENTRY(NG_ret_i2_plus_4)
1607     + ba,pt %xcc, __restore_asi
1608     + add %i2, 4, %i0
1609     +ENDPROC(NG_ret_i2_plus_4)
1610     +ENTRY(NG_ret_i2_plus_1)
1611     + ba,pt %xcc, __restore_asi
1612     + add %i2, 1, %i0
1613     +ENDPROC(NG_ret_i2_plus_1)
1614     +ENTRY(NG_ret_i2_plus_g1_plus_1)
1615     + add %g1, 1, %g1
1616     + ba,pt %xcc, __restore_asi
1617     + add %i2, %g1, %i0
1618     +ENDPROC(NG_ret_i2_plus_g1_plus_1)
1619     +ENTRY(NG_ret_i2)
1620     + ba,pt %xcc, __restore_asi
1621     + mov %i2, %i0
1622     +ENDPROC(NG_ret_i2)
1623     +ENTRY(NG_ret_i2_and_7_plus_i4)
1624     + and %i2, 7, %i2
1625     + ba,pt %xcc, __restore_asi
1626     + add %i2, %i4, %i0
1627     +ENDPROC(NG_ret_i2_and_7_plus_i4)
1628     +#endif
1629     +
1630     .align 64
1631    
1632     .globl FUNC_NAME
1633     @@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1634     sub %g0, %i4, %i4 ! bytes to align dst
1635     sub %i2, %i4, %i2
1636     1: subcc %i4, 1, %i4
1637     - EX_LD(LOAD(ldub, %i1, %g1))
1638     - EX_ST(STORE(stb, %g1, %o0))
1639     + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1)
1640     + EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1)
1641     add %i1, 1, %i1
1642     bne,pt %XCC, 1b
1643     add %o0, 1, %o0
1644     @@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1645     and %i4, 0x7, GLOBAL_SPARE
1646     sll GLOBAL_SPARE, 3, GLOBAL_SPARE
1647     mov 64, %i5
1648     - EX_LD(LOAD_TWIN(%i1, %g2, %g3))
1649     + EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1)
1650     sub %i5, GLOBAL_SPARE, %i5
1651     mov 16, %o4
1652     mov 32, %o5
1653     @@ -178,31 +261,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1654     srlx WORD3, PRE_SHIFT, TMP; \
1655     or WORD2, TMP, WORD2;
1656    
1657     -8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
1658     +8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
1659     MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
1660     LOAD(prefetch, %i1 + %i3, #one_read)
1661    
1662     - EX_ST(STORE_INIT(%g2, %o0 + 0x00))
1663     - EX_ST(STORE_INIT(%g3, %o0 + 0x08))
1664     + EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1)
1665     + EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
1666    
1667     - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
1668     + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
1669     MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
1670    
1671     - EX_ST(STORE_INIT(%o2, %o0 + 0x10))
1672     - EX_ST(STORE_INIT(%o3, %o0 + 0x18))
1673     + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
1674     + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
1675    
1676     - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
1677     + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
1678     MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
1679    
1680     - EX_ST(STORE_INIT(%g2, %o0 + 0x20))
1681     - EX_ST(STORE_INIT(%g3, %o0 + 0x28))
1682     + EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
1683     + EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
1684    
1685     - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
1686     + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
1687     add %i1, 64, %i1
1688     MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
1689    
1690     - EX_ST(STORE_INIT(%o2, %o0 + 0x30))
1691     - EX_ST(STORE_INIT(%o3, %o0 + 0x38))
1692     + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
1693     + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
1694    
1695     subcc %g1, 64, %g1
1696     bne,pt %XCC, 8b
1697     @@ -211,31 +294,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1698     ba,pt %XCC, 60f
1699     add %i1, %i4, %i1
1700    
1701     -9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
1702     +9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
1703     MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
1704     LOAD(prefetch, %i1 + %i3, #one_read)
1705    
1706     - EX_ST(STORE_INIT(%g3, %o0 + 0x00))
1707     - EX_ST(STORE_INIT(%o2, %o0 + 0x08))
1708     + EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1)
1709     + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
1710    
1711     - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
1712     + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
1713     MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
1714    
1715     - EX_ST(STORE_INIT(%o3, %o0 + 0x10))
1716     - EX_ST(STORE_INIT(%g2, %o0 + 0x18))
1717     + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
1718     + EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
1719    
1720     - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
1721     + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
1722     MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
1723    
1724     - EX_ST(STORE_INIT(%g3, %o0 + 0x20))
1725     - EX_ST(STORE_INIT(%o2, %o0 + 0x28))
1726     + EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
1727     + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
1728    
1729     - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
1730     + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
1731     add %i1, 64, %i1
1732     MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
1733    
1734     - EX_ST(STORE_INIT(%o3, %o0 + 0x30))
1735     - EX_ST(STORE_INIT(%g2, %o0 + 0x38))
1736     + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
1737     + EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
1738    
1739     subcc %g1, 64, %g1
1740     bne,pt %XCC, 9b
1741     @@ -249,25 +332,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1742     * one twin load ahead, then add 8 back into source when
1743     * we finish the loop.
1744     */
1745     - EX_LD(LOAD_TWIN(%i1, %o4, %o5))
1746     + EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1)
1747     mov 16, %o7
1748     mov 32, %g2
1749     mov 48, %g3
1750     mov 64, %o1
1751     -1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
1752     +1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
1753     LOAD(prefetch, %i1 + %o1, #one_read)
1754     - EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line
1755     - EX_ST(STORE_INIT(%o2, %o0 + 0x08))
1756     - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
1757     - EX_ST(STORE_INIT(%o3, %o0 + 0x10))
1758     - EX_ST(STORE_INIT(%o4, %o0 + 0x18))
1759     - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
1760     - EX_ST(STORE_INIT(%o5, %o0 + 0x20))
1761     - EX_ST(STORE_INIT(%o2, %o0 + 0x28))
1762     - EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5))
1763     + EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
1764     + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
1765     + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
1766     + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
1767     + EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
1768     + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
1769     + EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
1770     + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
1771     + EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48)
1772     add %i1, 64, %i1
1773     - EX_ST(STORE_INIT(%o3, %o0 + 0x30))
1774     - EX_ST(STORE_INIT(%o4, %o0 + 0x38))
1775     + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
1776     + EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
1777     subcc %g1, 64, %g1
1778     bne,pt %XCC, 1b
1779     add %o0, 64, %o0
1780     @@ -282,20 +365,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1781     mov 32, %g2
1782     mov 48, %g3
1783     mov 64, %o1
1784     -1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5))
1785     - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
1786     +1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1)
1787     + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
1788     LOAD(prefetch, %i1 + %o1, #one_read)
1789     - EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line
1790     - EX_ST(STORE_INIT(%o5, %o0 + 0x08))
1791     - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
1792     - EX_ST(STORE_INIT(%o2, %o0 + 0x10))
1793     - EX_ST(STORE_INIT(%o3, %o0 + 0x18))
1794     - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
1795     + EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
1796     + EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
1797     + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
1798     + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
1799     + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
1800     + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
1801     add %i1, 64, %i1
1802     - EX_ST(STORE_INIT(%o4, %o0 + 0x20))
1803     - EX_ST(STORE_INIT(%o5, %o0 + 0x28))
1804     - EX_ST(STORE_INIT(%o2, %o0 + 0x30))
1805     - EX_ST(STORE_INIT(%o3, %o0 + 0x38))
1806     + EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
1807     + EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
1808     + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
1809     + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
1810     subcc %g1, 64, %g1
1811     bne,pt %XCC, 1b
1812     add %o0, 64, %o0
1813     @@ -321,28 +404,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1814     andn %i2, 0xf, %i4
1815     and %i2, 0xf, %i2
1816     1: subcc %i4, 0x10, %i4
1817     - EX_LD(LOAD(ldx, %i1, %o4))
1818     + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4)
1819     add %i1, 0x08, %i1
1820     - EX_LD(LOAD(ldx, %i1, %g1))
1821     + EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4)
1822     sub %i1, 0x08, %i1
1823     - EX_ST(STORE(stx, %o4, %i1 + %i3))
1824     + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4)
1825     add %i1, 0x8, %i1
1826     - EX_ST(STORE(stx, %g1, %i1 + %i3))
1827     + EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8)
1828     bgu,pt %XCC, 1b
1829     add %i1, 0x8, %i1
1830     73: andcc %i2, 0x8, %g0
1831     be,pt %XCC, 1f
1832     nop
1833     sub %i2, 0x8, %i2
1834     - EX_LD(LOAD(ldx, %i1, %o4))
1835     - EX_ST(STORE(stx, %o4, %i1 + %i3))
1836     + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8)
1837     + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8)
1838     add %i1, 0x8, %i1
1839     1: andcc %i2, 0x4, %g0
1840     be,pt %XCC, 1f
1841     nop
1842     sub %i2, 0x4, %i2
1843     - EX_LD(LOAD(lduw, %i1, %i5))
1844     - EX_ST(STORE(stw, %i5, %i1 + %i3))
1845     + EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4)
1846     + EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4)
1847     add %i1, 0x4, %i1
1848     1: cmp %i2, 0
1849     be,pt %XCC, 85f
1850     @@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1851     sub %i2, %g1, %i2
1852    
1853     1: subcc %g1, 1, %g1
1854     - EX_LD(LOAD(ldub, %i1, %i5))
1855     - EX_ST(STORE(stb, %i5, %i1 + %i3))
1856     + EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1)
1857     + EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1)
1858     bgu,pt %icc, 1b
1859     add %i1, 1, %i1
1860    
1861     @@ -375,16 +458,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1862    
1863     8: mov 64, %i3
1864     andn %i1, 0x7, %i1
1865     - EX_LD(LOAD(ldx, %i1, %g2))
1866     + EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2)
1867     sub %i3, %g1, %i3
1868     andn %i2, 0x7, %i4
1869     sllx %g2, %g1, %g2
1870     1: add %i1, 0x8, %i1
1871     - EX_LD(LOAD(ldx, %i1, %g3))
1872     + EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4)
1873     subcc %i4, 0x8, %i4
1874     srlx %g3, %i3, %i5
1875     or %i5, %g2, %i5
1876     - EX_ST(STORE(stx, %i5, %o0))
1877     + EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4)
1878     add %o0, 0x8, %o0
1879     bgu,pt %icc, 1b
1880     sllx %g3, %g1, %g2
1881     @@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1882    
1883     1:
1884     subcc %i2, 4, %i2
1885     - EX_LD(LOAD(lduw, %i1, %g1))
1886     - EX_ST(STORE(stw, %g1, %i1 + %i3))
1887     + EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4)
1888     + EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4)
1889     bgu,pt %XCC, 1b
1890     add %i1, 4, %i1
1891    
1892     @@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1893     .align 32
1894     90:
1895     subcc %i2, 1, %i2
1896     - EX_LD(LOAD(ldub, %i1, %g1))
1897     - EX_ST(STORE(stb, %g1, %i1 + %i3))
1898     + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1)
1899     + EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1)
1900     bgu,pt %XCC, 90b
1901     add %i1, 1, %i1
1902     ret
1903     diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S
1904     index ecc5692fa2b4..bb6ff73229e3 100644
1905     --- a/arch/sparc/lib/U1copy_from_user.S
1906     +++ b/arch/sparc/lib/U1copy_from_user.S
1907     @@ -3,19 +3,19 @@
1908     * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
1909     */
1910    
1911     -#define EX_LD(x) \
1912     +#define EX_LD(x,y) \
1913     98: x; \
1914     .section __ex_table,"a";\
1915     .align 4; \
1916     - .word 98b, __retl_one; \
1917     + .word 98b, y; \
1918     .text; \
1919     .align 4;
1920    
1921     -#define EX_LD_FP(x) \
1922     +#define EX_LD_FP(x,y) \
1923     98: x; \
1924     .section __ex_table,"a";\
1925     .align 4; \
1926     - .word 98b, __retl_one_fp;\
1927     + .word 98b, y; \
1928     .text; \
1929     .align 4;
1930    
1931     diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S
1932     index 9eea392e44d4..ed92ce739558 100644
1933     --- a/arch/sparc/lib/U1copy_to_user.S
1934     +++ b/arch/sparc/lib/U1copy_to_user.S
1935     @@ -3,19 +3,19 @@
1936     * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
1937     */
1938    
1939     -#define EX_ST(x) \
1940     +#define EX_ST(x,y) \
1941     98: x; \
1942     .section __ex_table,"a";\
1943     .align 4; \
1944     - .word 98b, __retl_one; \
1945     + .word 98b, y; \
1946     .text; \
1947     .align 4;
1948    
1949     -#define EX_ST_FP(x) \
1950     +#define EX_ST_FP(x,y) \
1951     98: x; \
1952     .section __ex_table,"a";\
1953     .align 4; \
1954     - .word 98b, __retl_one_fp;\
1955     + .word 98b, y; \
1956     .text; \
1957     .align 4;
1958    
1959     diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S
1960     index 3e6209ebb7d7..f30d2ab2c371 100644
1961     --- a/arch/sparc/lib/U1memcpy.S
1962     +++ b/arch/sparc/lib/U1memcpy.S
1963     @@ -5,6 +5,7 @@
1964     */
1965    
1966     #ifdef __KERNEL__
1967     +#include <linux/linkage.h>
1968     #include <asm/visasm.h>
1969     #include <asm/asi.h>
1970     #define GLOBAL_SPARE g7
1971     @@ -23,21 +24,17 @@
1972     #endif
1973    
1974     #ifndef EX_LD
1975     -#define EX_LD(x) x
1976     +#define EX_LD(x,y) x
1977     #endif
1978     #ifndef EX_LD_FP
1979     -#define EX_LD_FP(x) x
1980     +#define EX_LD_FP(x,y) x
1981     #endif
1982    
1983     #ifndef EX_ST
1984     -#define EX_ST(x) x
1985     +#define EX_ST(x,y) x
1986     #endif
1987     #ifndef EX_ST_FP
1988     -#define EX_ST_FP(x) x
1989     -#endif
1990     -
1991     -#ifndef EX_RETVAL
1992     -#define EX_RETVAL(x) x
1993     +#define EX_ST_FP(x,y) x
1994     #endif
1995    
1996     #ifndef LOAD
1997     @@ -78,53 +75,169 @@
1998     faligndata %f7, %f8, %f60; \
1999     faligndata %f8, %f9, %f62;
2000    
2001     -#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \
2002     - EX_LD_FP(LOAD_BLK(%src, %fdest)); \
2003     - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
2004     - add %src, 0x40, %src; \
2005     - subcc %len, 0x40, %len; \
2006     - be,pn %xcc, jmptgt; \
2007     - add %dest, 0x40, %dest; \
2008     -
2009     -#define LOOP_CHUNK1(src, dest, len, branch_dest) \
2010     - MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest)
2011     -#define LOOP_CHUNK2(src, dest, len, branch_dest) \
2012     - MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
2013     -#define LOOP_CHUNK3(src, dest, len, branch_dest) \
2014     - MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
2015     +#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \
2016     + EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \
2017     + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
2018     + add %src, 0x40, %src; \
2019     + subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \
2020     + be,pn %xcc, jmptgt; \
2021     + add %dest, 0x40, %dest; \
2022     +
2023     +#define LOOP_CHUNK1(src, dest, branch_dest) \
2024     + MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest)
2025     +#define LOOP_CHUNK2(src, dest, branch_dest) \
2026     + MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest)
2027     +#define LOOP_CHUNK3(src, dest, branch_dest) \
2028     + MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest)
2029    
2030     #define DO_SYNC membar #Sync;
2031     #define STORE_SYNC(dest, fsrc) \
2032     - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
2033     + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
2034     add %dest, 0x40, %dest; \
2035     DO_SYNC
2036    
2037     #define STORE_JUMP(dest, fsrc, target) \
2038     - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
2039     + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \
2040     add %dest, 0x40, %dest; \
2041     ba,pt %xcc, target; \
2042     nop;
2043    
2044     -#define FINISH_VISCHUNK(dest, f0, f1, left) \
2045     - subcc %left, 8, %left;\
2046     - bl,pn %xcc, 95f; \
2047     - faligndata %f0, %f1, %f48; \
2048     - EX_ST_FP(STORE(std, %f48, %dest)); \
2049     +#define FINISH_VISCHUNK(dest, f0, f1) \
2050     + subcc %g3, 8, %g3; \
2051     + bl,pn %xcc, 95f; \
2052     + faligndata %f0, %f1, %f48; \
2053     + EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \
2054     add %dest, 8, %dest;
2055    
2056     -#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \
2057     - subcc %left, 8, %left; \
2058     - bl,pn %xcc, 95f; \
2059     +#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
2060     + subcc %g3, 8, %g3; \
2061     + bl,pn %xcc, 95f; \
2062     fsrc2 %f0, %f1;
2063    
2064     -#define UNEVEN_VISCHUNK(dest, f0, f1, left) \
2065     - UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \
2066     +#define UNEVEN_VISCHUNK(dest, f0, f1) \
2067     + UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
2068     ba,a,pt %xcc, 93f;
2069    
2070     .register %g2,#scratch
2071     .register %g3,#scratch
2072    
2073     .text
2074     +#ifndef EX_RETVAL
2075     +#define EX_RETVAL(x) x
2076     +ENTRY(U1_g1_1_fp)
2077     + VISExitHalf
2078     + add %g1, 1, %g1
2079     + add %g1, %g2, %g1
2080     + retl
2081     + add %g1, %o2, %o0
2082     +ENDPROC(U1_g1_1_fp)
2083     +ENTRY(U1_g2_0_fp)
2084     + VISExitHalf
2085     + retl
2086     + add %g2, %o2, %o0
2087     +ENDPROC(U1_g2_0_fp)
2088     +ENTRY(U1_g2_8_fp)
2089     + VISExitHalf
2090     + add %g2, 8, %g2
2091     + retl
2092     + add %g2, %o2, %o0
2093     +ENDPROC(U1_g2_8_fp)
2094     +ENTRY(U1_gs_0_fp)
2095     + VISExitHalf
2096     + add %GLOBAL_SPARE, %g3, %o0
2097     + retl
2098     + add %o0, %o2, %o0
2099     +ENDPROC(U1_gs_0_fp)
2100     +ENTRY(U1_gs_80_fp)
2101     + VISExitHalf
2102     + add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
2103     + add %GLOBAL_SPARE, %g3, %o0
2104     + retl
2105     + add %o0, %o2, %o0
2106     +ENDPROC(U1_gs_80_fp)
2107     +ENTRY(U1_gs_40_fp)
2108     + VISExitHalf
2109     + add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE
2110     + add %GLOBAL_SPARE, %g3, %o0
2111     + retl
2112     + add %o0, %o2, %o0
2113     +ENDPROC(U1_gs_40_fp)
2114     +ENTRY(U1_g3_0_fp)
2115     + VISExitHalf
2116     + retl
2117     + add %g3, %o2, %o0
2118     +ENDPROC(U1_g3_0_fp)
2119     +ENTRY(U1_g3_8_fp)
2120     + VISExitHalf
2121     + add %g3, 8, %g3
2122     + retl
2123     + add %g3, %o2, %o0
2124     +ENDPROC(U1_g3_8_fp)
2125     +ENTRY(U1_o2_0_fp)
2126     + VISExitHalf
2127     + retl
2128     + mov %o2, %o0
2129     +ENDPROC(U1_o2_0_fp)
2130     +ENTRY(U1_o2_1_fp)
2131     + VISExitHalf
2132     + retl
2133     + add %o2, 1, %o0
2134     +ENDPROC(U1_o2_1_fp)
2135     +ENTRY(U1_gs_0)
2136     + VISExitHalf
2137     + retl
2138     + add %GLOBAL_SPARE, %o2, %o0
2139     +ENDPROC(U1_gs_0)
2140     +ENTRY(U1_gs_8)
2141     + VISExitHalf
2142     + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
2143     + retl
2144     + add %GLOBAL_SPARE, 0x8, %o0
2145     +ENDPROC(U1_gs_8)
2146     +ENTRY(U1_gs_10)
2147     + VISExitHalf
2148     + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
2149     + retl
2150     + add %GLOBAL_SPARE, 0x10, %o0
2151     +ENDPROC(U1_gs_10)
2152     +ENTRY(U1_o2_0)
2153     + retl
2154     + mov %o2, %o0
2155     +ENDPROC(U1_o2_0)
2156     +ENTRY(U1_o2_8)
2157     + retl
2158     + add %o2, 8, %o0
2159     +ENDPROC(U1_o2_8)
2160     +ENTRY(U1_o2_4)
2161     + retl
2162     + add %o2, 4, %o0
2163     +ENDPROC(U1_o2_4)
2164     +ENTRY(U1_o2_1)
2165     + retl
2166     + add %o2, 1, %o0
2167     +ENDPROC(U1_o2_1)
2168     +ENTRY(U1_g1_0)
2169     + retl
2170     + add %g1, %o2, %o0
2171     +ENDPROC(U1_g1_0)
2172     +ENTRY(U1_g1_1)
2173     + add %g1, 1, %g1
2174     + retl
2175     + add %g1, %o2, %o0
2176     +ENDPROC(U1_g1_1)
2177     +ENTRY(U1_gs_0_o2_adj)
2178     + and %o2, 7, %o2
2179     + retl
2180     + add %GLOBAL_SPARE, %o2, %o0
2181     +ENDPROC(U1_gs_0_o2_adj)
2182     +ENTRY(U1_gs_8_o2_adj)
2183     + and %o2, 7, %o2
2184     + add %GLOBAL_SPARE, 8, %GLOBAL_SPARE
2185     + retl
2186     + add %GLOBAL_SPARE, %o2, %o0
2187     +ENDPROC(U1_gs_8_o2_adj)
2188     +#endif
2189     +
2190     .align 64
2191    
2192     .globl FUNC_NAME
2193     @@ -166,8 +279,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2194     and %g2, 0x38, %g2
2195    
2196     1: subcc %g1, 0x1, %g1
2197     - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
2198     - EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE))
2199     + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp)
2200     + EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp)
2201     bgu,pt %XCC, 1b
2202     add %o1, 0x1, %o1
2203    
2204     @@ -178,20 +291,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2205     be,pt %icc, 3f
2206     alignaddr %o1, %g0, %o1
2207    
2208     - EX_LD_FP(LOAD(ldd, %o1, %f4))
2209     -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
2210     + EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp)
2211     +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp)
2212     add %o1, 0x8, %o1
2213     subcc %g2, 0x8, %g2
2214     faligndata %f4, %f6, %f0
2215     - EX_ST_FP(STORE(std, %f0, %o0))
2216     + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
2217     be,pn %icc, 3f
2218     add %o0, 0x8, %o0
2219    
2220     - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
2221     + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp)
2222     add %o1, 0x8, %o1
2223     subcc %g2, 0x8, %g2
2224     faligndata %f6, %f4, %f0
2225     - EX_ST_FP(STORE(std, %f0, %o0))
2226     + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
2227     bne,pt %icc, 1b
2228     add %o0, 0x8, %o0
2229    
2230     @@ -214,13 +327,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2231     add %g1, %GLOBAL_SPARE, %g1
2232     subcc %o2, %g3, %o2
2233    
2234     - EX_LD_FP(LOAD_BLK(%o1, %f0))
2235     + EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp)
2236     add %o1, 0x40, %o1
2237     add %g1, %g3, %g1
2238     - EX_LD_FP(LOAD_BLK(%o1, %f16))
2239     + EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp)
2240     add %o1, 0x40, %o1
2241     sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
2242     - EX_LD_FP(LOAD_BLK(%o1, %f32))
2243     + EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp)
2244     add %o1, 0x40, %o1
2245    
2246     /* There are 8 instances of the unrolled loop,
2247     @@ -240,11 +353,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2248    
2249     .align 64
2250     1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
2251     - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2252     + LOOP_CHUNK1(o1, o0, 1f)
2253     FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
2254     - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2255     + LOOP_CHUNK2(o1, o0, 2f)
2256     FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
2257     - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2258     + LOOP_CHUNK3(o1, o0, 3f)
2259     ba,pt %xcc, 1b+4
2260     faligndata %f0, %f2, %f48
2261     1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
2262     @@ -261,11 +374,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2263     STORE_JUMP(o0, f48, 56f)
2264    
2265     1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
2266     - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2267     + LOOP_CHUNK1(o1, o0, 1f)
2268     FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
2269     - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2270     + LOOP_CHUNK2(o1, o0, 2f)
2271     FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
2272     - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2273     + LOOP_CHUNK3(o1, o0, 3f)
2274     ba,pt %xcc, 1b+4
2275     faligndata %f2, %f4, %f48
2276     1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
2277     @@ -282,11 +395,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2278     STORE_JUMP(o0, f48, 57f)
2279    
2280     1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
2281     - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2282     + LOOP_CHUNK1(o1, o0, 1f)
2283     FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
2284     - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2285     + LOOP_CHUNK2(o1, o0, 2f)
2286     FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
2287     - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2288     + LOOP_CHUNK3(o1, o0, 3f)
2289     ba,pt %xcc, 1b+4
2290     faligndata %f4, %f6, %f48
2291     1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
2292     @@ -303,11 +416,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2293     STORE_JUMP(o0, f48, 58f)
2294    
2295     1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
2296     - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2297     + LOOP_CHUNK1(o1, o0, 1f)
2298     FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
2299     - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2300     + LOOP_CHUNK2(o1, o0, 2f)
2301     FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
2302     - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2303     + LOOP_CHUNK3(o1, o0, 3f)
2304     ba,pt %xcc, 1b+4
2305     faligndata %f6, %f8, %f48
2306     1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
2307     @@ -324,11 +437,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2308     STORE_JUMP(o0, f48, 59f)
2309    
2310     1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
2311     - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2312     + LOOP_CHUNK1(o1, o0, 1f)
2313     FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
2314     - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2315     + LOOP_CHUNK2(o1, o0, 2f)
2316     FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
2317     - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2318     + LOOP_CHUNK3(o1, o0, 3f)
2319     ba,pt %xcc, 1b+4
2320     faligndata %f8, %f10, %f48
2321     1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
2322     @@ -345,11 +458,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2323     STORE_JUMP(o0, f48, 60f)
2324    
2325     1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
2326     - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2327     + LOOP_CHUNK1(o1, o0, 1f)
2328     FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
2329     - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2330     + LOOP_CHUNK2(o1, o0, 2f)
2331     FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
2332     - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2333     + LOOP_CHUNK3(o1, o0, 3f)
2334     ba,pt %xcc, 1b+4
2335     faligndata %f10, %f12, %f48
2336     1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
2337     @@ -366,11 +479,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2338     STORE_JUMP(o0, f48, 61f)
2339    
2340     1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
2341     - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2342     + LOOP_CHUNK1(o1, o0, 1f)
2343     FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
2344     - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2345     + LOOP_CHUNK2(o1, o0, 2f)
2346     FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
2347     - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2348     + LOOP_CHUNK3(o1, o0, 3f)
2349     ba,pt %xcc, 1b+4
2350     faligndata %f12, %f14, %f48
2351     1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
2352     @@ -387,11 +500,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2353     STORE_JUMP(o0, f48, 62f)
2354    
2355     1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
2356     - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2357     + LOOP_CHUNK1(o1, o0, 1f)
2358     FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
2359     - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2360     + LOOP_CHUNK2(o1, o0, 2f)
2361     FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
2362     - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2363     + LOOP_CHUNK3(o1, o0, 3f)
2364     ba,pt %xcc, 1b+4
2365     faligndata %f14, %f16, %f48
2366     1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
2367     @@ -407,53 +520,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2368     FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
2369     STORE_JUMP(o0, f48, 63f)
2370    
2371     -40: FINISH_VISCHUNK(o0, f0, f2, g3)
2372     -41: FINISH_VISCHUNK(o0, f2, f4, g3)
2373     -42: FINISH_VISCHUNK(o0, f4, f6, g3)
2374     -43: FINISH_VISCHUNK(o0, f6, f8, g3)
2375     -44: FINISH_VISCHUNK(o0, f8, f10, g3)
2376     -45: FINISH_VISCHUNK(o0, f10, f12, g3)
2377     -46: FINISH_VISCHUNK(o0, f12, f14, g3)
2378     -47: UNEVEN_VISCHUNK(o0, f14, f0, g3)
2379     -48: FINISH_VISCHUNK(o0, f16, f18, g3)
2380     -49: FINISH_VISCHUNK(o0, f18, f20, g3)
2381     -50: FINISH_VISCHUNK(o0, f20, f22, g3)
2382     -51: FINISH_VISCHUNK(o0, f22, f24, g3)
2383     -52: FINISH_VISCHUNK(o0, f24, f26, g3)
2384     -53: FINISH_VISCHUNK(o0, f26, f28, g3)
2385     -54: FINISH_VISCHUNK(o0, f28, f30, g3)
2386     -55: UNEVEN_VISCHUNK(o0, f30, f0, g3)
2387     -56: FINISH_VISCHUNK(o0, f32, f34, g3)
2388     -57: FINISH_VISCHUNK(o0, f34, f36, g3)
2389     -58: FINISH_VISCHUNK(o0, f36, f38, g3)
2390     -59: FINISH_VISCHUNK(o0, f38, f40, g3)
2391     -60: FINISH_VISCHUNK(o0, f40, f42, g3)
2392     -61: FINISH_VISCHUNK(o0, f42, f44, g3)
2393     -62: FINISH_VISCHUNK(o0, f44, f46, g3)
2394     -63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3)
2395     -
2396     -93: EX_LD_FP(LOAD(ldd, %o1, %f2))
2397     +40: FINISH_VISCHUNK(o0, f0, f2)
2398     +41: FINISH_VISCHUNK(o0, f2, f4)
2399     +42: FINISH_VISCHUNK(o0, f4, f6)
2400     +43: FINISH_VISCHUNK(o0, f6, f8)
2401     +44: FINISH_VISCHUNK(o0, f8, f10)
2402     +45: FINISH_VISCHUNK(o0, f10, f12)
2403     +46: FINISH_VISCHUNK(o0, f12, f14)
2404     +47: UNEVEN_VISCHUNK(o0, f14, f0)
2405     +48: FINISH_VISCHUNK(o0, f16, f18)
2406     +49: FINISH_VISCHUNK(o0, f18, f20)
2407     +50: FINISH_VISCHUNK(o0, f20, f22)
2408     +51: FINISH_VISCHUNK(o0, f22, f24)
2409     +52: FINISH_VISCHUNK(o0, f24, f26)
2410     +53: FINISH_VISCHUNK(o0, f26, f28)
2411     +54: FINISH_VISCHUNK(o0, f28, f30)
2412     +55: UNEVEN_VISCHUNK(o0, f30, f0)
2413     +56: FINISH_VISCHUNK(o0, f32, f34)
2414     +57: FINISH_VISCHUNK(o0, f34, f36)
2415     +58: FINISH_VISCHUNK(o0, f36, f38)
2416     +59: FINISH_VISCHUNK(o0, f38, f40)
2417     +60: FINISH_VISCHUNK(o0, f40, f42)
2418     +61: FINISH_VISCHUNK(o0, f42, f44)
2419     +62: FINISH_VISCHUNK(o0, f44, f46)
2420     +63: UNEVEN_VISCHUNK_LAST(o0, f46, f0)
2421     +
2422     +93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp)
2423     add %o1, 8, %o1
2424     subcc %g3, 8, %g3
2425     faligndata %f0, %f2, %f8
2426     - EX_ST_FP(STORE(std, %f8, %o0))
2427     + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
2428     bl,pn %xcc, 95f
2429     add %o0, 8, %o0
2430     - EX_LD_FP(LOAD(ldd, %o1, %f0))
2431     + EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp)
2432     add %o1, 8, %o1
2433     subcc %g3, 8, %g3
2434     faligndata %f2, %f0, %f8
2435     - EX_ST_FP(STORE(std, %f8, %o0))
2436     + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
2437     bge,pt %xcc, 93b
2438     add %o0, 8, %o0
2439    
2440     95: brz,pt %o2, 2f
2441     mov %g1, %o1
2442    
2443     -1: EX_LD_FP(LOAD(ldub, %o1, %o3))
2444     +1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp)
2445     add %o1, 1, %o1
2446     subcc %o2, 1, %o2
2447     - EX_ST_FP(STORE(stb, %o3, %o0))
2448     + EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp)
2449     bne,pt %xcc, 1b
2450     add %o0, 1, %o0
2451    
2452     @@ -469,27 +582,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2453    
2454     72: andn %o2, 0xf, %GLOBAL_SPARE
2455     and %o2, 0xf, %o2
2456     -1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
2457     - EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
2458     +1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0)
2459     + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0)
2460     subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE
2461     - EX_ST(STORE(stx, %o5, %o1 + %o3))
2462     + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10)
2463     add %o1, 0x8, %o1
2464     - EX_ST(STORE(stx, %g1, %o1 + %o3))
2465     + EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8)
2466     bgu,pt %XCC, 1b
2467     add %o1, 0x8, %o1
2468     73: andcc %o2, 0x8, %g0
2469     be,pt %XCC, 1f
2470     nop
2471     - EX_LD(LOAD(ldx, %o1, %o5))
2472     + EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)
2473     sub %o2, 0x8, %o2
2474     - EX_ST(STORE(stx, %o5, %o1 + %o3))
2475     + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8)
2476     add %o1, 0x8, %o1
2477     1: andcc %o2, 0x4, %g0
2478     be,pt %XCC, 1f
2479     nop
2480     - EX_LD(LOAD(lduw, %o1, %o5))
2481     + EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)
2482     sub %o2, 0x4, %o2
2483     - EX_ST(STORE(stw, %o5, %o1 + %o3))
2484     + EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4)
2485     add %o1, 0x4, %o1
2486     1: cmp %o2, 0
2487     be,pt %XCC, 85f
2488     @@ -503,9 +616,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2489     sub %g0, %g1, %g1
2490     sub %o2, %g1, %o2
2491    
2492     -1: EX_LD(LOAD(ldub, %o1, %o5))
2493     +1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)
2494     subcc %g1, 1, %g1
2495     - EX_ST(STORE(stb, %o5, %o1 + %o3))
2496     + EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1)
2497     bgu,pt %icc, 1b
2498     add %o1, 1, %o1
2499    
2500     @@ -521,16 +634,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2501    
2502     8: mov 64, %o3
2503     andn %o1, 0x7, %o1
2504     - EX_LD(LOAD(ldx, %o1, %g2))
2505     + EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)
2506     sub %o3, %g1, %o3
2507     andn %o2, 0x7, %GLOBAL_SPARE
2508     sllx %g2, %g1, %g2
2509     -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
2510     +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj)
2511     subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE
2512     add %o1, 0x8, %o1
2513     srlx %g3, %o3, %o5
2514     or %o5, %g2, %o5
2515     - EX_ST(STORE(stx, %o5, %o0))
2516     + EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj)
2517     add %o0, 0x8, %o0
2518     bgu,pt %icc, 1b
2519     sllx %g3, %g1, %g2
2520     @@ -548,9 +661,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2521     bne,pn %XCC, 90f
2522     sub %o0, %o1, %o3
2523    
2524     -1: EX_LD(LOAD(lduw, %o1, %g1))
2525     +1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)
2526     subcc %o2, 4, %o2
2527     - EX_ST(STORE(stw, %g1, %o1 + %o3))
2528     + EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4)
2529     bgu,pt %XCC, 1b
2530     add %o1, 4, %o1
2531    
2532     @@ -558,9 +671,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2533     mov EX_RETVAL(%o4), %o0
2534    
2535     .align 32
2536     -90: EX_LD(LOAD(ldub, %o1, %g1))
2537     +90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)
2538     subcc %o2, 1, %o2
2539     - EX_ST(STORE(stb, %g1, %o1 + %o3))
2540     + EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1)
2541     bgu,pt %XCC, 90b
2542     add %o1, 1, %o1
2543     retl
2544     diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S
2545     index 88ad73d86fe4..db73010a1af8 100644
2546     --- a/arch/sparc/lib/U3copy_from_user.S
2547     +++ b/arch/sparc/lib/U3copy_from_user.S
2548     @@ -3,19 +3,19 @@
2549     * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
2550     */
2551    
2552     -#define EX_LD(x) \
2553     +#define EX_LD(x,y) \
2554     98: x; \
2555     .section __ex_table,"a";\
2556     .align 4; \
2557     - .word 98b, __retl_one; \
2558     + .word 98b, y; \
2559     .text; \
2560     .align 4;
2561    
2562     -#define EX_LD_FP(x) \
2563     +#define EX_LD_FP(x,y) \
2564     98: x; \
2565     .section __ex_table,"a";\
2566     .align 4; \
2567     - .word 98b, __retl_one_fp;\
2568     + .word 98b, y##_fp; \
2569     .text; \
2570     .align 4;
2571    
2572     diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S
2573     index 845139d75537..c4ee858e352a 100644
2574     --- a/arch/sparc/lib/U3copy_to_user.S
2575     +++ b/arch/sparc/lib/U3copy_to_user.S
2576     @@ -3,19 +3,19 @@
2577     * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
2578     */
2579    
2580     -#define EX_ST(x) \
2581     +#define EX_ST(x,y) \
2582     98: x; \
2583     .section __ex_table,"a";\
2584     .align 4; \
2585     - .word 98b, __retl_one; \
2586     + .word 98b, y; \
2587     .text; \
2588     .align 4;
2589    
2590     -#define EX_ST_FP(x) \
2591     +#define EX_ST_FP(x,y) \
2592     98: x; \
2593     .section __ex_table,"a";\
2594     .align 4; \
2595     - .word 98b, __retl_one_fp;\
2596     + .word 98b, y##_fp; \
2597     .text; \
2598     .align 4;
2599    
2600     diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S
2601     index 491ee69e4995..54f98706b03b 100644
2602     --- a/arch/sparc/lib/U3memcpy.S
2603     +++ b/arch/sparc/lib/U3memcpy.S
2604     @@ -4,6 +4,7 @@
2605     */
2606    
2607     #ifdef __KERNEL__
2608     +#include <linux/linkage.h>
2609     #include <asm/visasm.h>
2610     #include <asm/asi.h>
2611     #define GLOBAL_SPARE %g7
2612     @@ -22,21 +23,17 @@
2613     #endif
2614    
2615     #ifndef EX_LD
2616     -#define EX_LD(x) x
2617     +#define EX_LD(x,y) x
2618     #endif
2619     #ifndef EX_LD_FP
2620     -#define EX_LD_FP(x) x
2621     +#define EX_LD_FP(x,y) x
2622     #endif
2623    
2624     #ifndef EX_ST
2625     -#define EX_ST(x) x
2626     +#define EX_ST(x,y) x
2627     #endif
2628     #ifndef EX_ST_FP
2629     -#define EX_ST_FP(x) x
2630     -#endif
2631     -
2632     -#ifndef EX_RETVAL
2633     -#define EX_RETVAL(x) x
2634     +#define EX_ST_FP(x,y) x
2635     #endif
2636    
2637     #ifndef LOAD
2638     @@ -77,6 +74,87 @@
2639     */
2640    
2641     .text
2642     +#ifndef EX_RETVAL
2643     +#define EX_RETVAL(x) x
2644     +__restore_fp:
2645     + VISExitHalf
2646     + retl
2647     + nop
2648     +ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
2649     + add %g1, 1, %g1
2650     + add %g2, %g1, %g2
2651     + ba,pt %xcc, __restore_fp
2652     + add %o2, %g2, %o0
2653     +ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
2654     +ENTRY(U3_retl_o2_plus_g2_fp)
2655     + ba,pt %xcc, __restore_fp
2656     + add %o2, %g2, %o0
2657     +ENDPROC(U3_retl_o2_plus_g2_fp)
2658     +ENTRY(U3_retl_o2_plus_g2_plus_8_fp)
2659     + add %g2, 8, %g2
2660     + ba,pt %xcc, __restore_fp
2661     + add %o2, %g2, %o0
2662     +ENDPROC(U3_retl_o2_plus_g2_plus_8_fp)
2663     +ENTRY(U3_retl_o2)
2664     + retl
2665     + mov %o2, %o0
2666     +ENDPROC(U3_retl_o2)
2667     +ENTRY(U3_retl_o2_plus_1)
2668     + retl
2669     + add %o2, 1, %o0
2670     +ENDPROC(U3_retl_o2_plus_1)
2671     +ENTRY(U3_retl_o2_plus_4)
2672     + retl
2673     + add %o2, 4, %o0
2674     +ENDPROC(U3_retl_o2_plus_4)
2675     +ENTRY(U3_retl_o2_plus_8)
2676     + retl
2677     + add %o2, 8, %o0
2678     +ENDPROC(U3_retl_o2_plus_8)
2679     +ENTRY(U3_retl_o2_plus_g1_plus_1)
2680     + add %g1, 1, %g1
2681     + retl
2682     + add %o2, %g1, %o0
2683     +ENDPROC(U3_retl_o2_plus_g1_plus_1)
2684     +ENTRY(U3_retl_o2_fp)
2685     + ba,pt %xcc, __restore_fp
2686     + mov %o2, %o0
2687     +ENDPROC(U3_retl_o2_fp)
2688     +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
2689     + sll %o3, 6, %o3
2690     + add %o3, 0x80, %o3
2691     + ba,pt %xcc, __restore_fp
2692     + add %o2, %o3, %o0
2693     +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
2694     +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
2695     + sll %o3, 6, %o3
2696     + add %o3, 0x40, %o3
2697     + ba,pt %xcc, __restore_fp
2698     + add %o2, %o3, %o0
2699     +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
2700     +ENTRY(U3_retl_o2_plus_GS_plus_0x10)
2701     + add GLOBAL_SPARE, 0x10, GLOBAL_SPARE
2702     + retl
2703     + add %o2, GLOBAL_SPARE, %o0
2704     +ENDPROC(U3_retl_o2_plus_GS_plus_0x10)
2705     +ENTRY(U3_retl_o2_plus_GS_plus_0x08)
2706     + add GLOBAL_SPARE, 0x08, GLOBAL_SPARE
2707     + retl
2708     + add %o2, GLOBAL_SPARE, %o0
2709     +ENDPROC(U3_retl_o2_plus_GS_plus_0x08)
2710     +ENTRY(U3_retl_o2_and_7_plus_GS)
2711     + and %o2, 7, %o2
2712     + retl
2713     + add %o2, GLOBAL_SPARE, %o2
2714     +ENDPROC(U3_retl_o2_and_7_plus_GS)
2715     +ENTRY(U3_retl_o2_and_7_plus_GS_plus_8)
2716     + add GLOBAL_SPARE, 8, GLOBAL_SPARE
2717     + and %o2, 7, %o2
2718     + retl
2719     + add %o2, GLOBAL_SPARE, %o2
2720     +ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8)
2721     +#endif
2722     +
2723     .align 64
2724    
2725     /* The cheetah's flexible spine, oversized liver, enlarged heart,
2726     @@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2727     and %g2, 0x38, %g2
2728    
2729     1: subcc %g1, 0x1, %g1
2730     - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
2731     - EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE))
2732     + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1)
2733     + EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1)
2734     bgu,pt %XCC, 1b
2735     add %o1, 0x1, %o1
2736    
2737     @@ -138,20 +216,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2738     be,pt %icc, 3f
2739     alignaddr %o1, %g0, %o1
2740    
2741     - EX_LD_FP(LOAD(ldd, %o1, %f4))
2742     -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
2743     + EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2)
2744     +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2)
2745     add %o1, 0x8, %o1
2746     subcc %g2, 0x8, %g2
2747     faligndata %f4, %f6, %f0
2748     - EX_ST_FP(STORE(std, %f0, %o0))
2749     + EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8)
2750     be,pn %icc, 3f
2751     add %o0, 0x8, %o0
2752    
2753     - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
2754     + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2)
2755     add %o1, 0x8, %o1
2756     subcc %g2, 0x8, %g2
2757     faligndata %f6, %f4, %f2
2758     - EX_ST_FP(STORE(std, %f2, %o0))
2759     + EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8)
2760     bne,pt %icc, 1b
2761     add %o0, 0x8, %o0
2762    
2763     @@ -161,25 +239,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2764     LOAD(prefetch, %o1 + 0x080, #one_read)
2765     LOAD(prefetch, %o1 + 0x0c0, #one_read)
2766     LOAD(prefetch, %o1 + 0x100, #one_read)
2767     - EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0))
2768     + EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2)
2769     LOAD(prefetch, %o1 + 0x140, #one_read)
2770     - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
2771     + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2)
2772     LOAD(prefetch, %o1 + 0x180, #one_read)
2773     - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
2774     + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2)
2775     LOAD(prefetch, %o1 + 0x1c0, #one_read)
2776     faligndata %f0, %f2, %f16
2777     - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
2778     + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2)
2779     faligndata %f2, %f4, %f18
2780     - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
2781     + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2)
2782     faligndata %f4, %f6, %f20
2783     - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
2784     + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2)
2785     faligndata %f6, %f8, %f22
2786    
2787     - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
2788     + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2)
2789     faligndata %f8, %f10, %f24
2790     - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
2791     + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2)
2792     faligndata %f10, %f12, %f26
2793     - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
2794     + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2)
2795    
2796     subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE
2797     add %o1, 0x40, %o1
2798     @@ -190,26 +268,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2799    
2800     .align 64
2801     1:
2802     - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
2803     + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2804     faligndata %f12, %f14, %f28
2805     - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
2806     + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2807     faligndata %f14, %f0, %f30
2808     - EX_ST_FP(STORE_BLK(%f16, %o0))
2809     - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
2810     + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2811     + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2812     faligndata %f0, %f2, %f16
2813     add %o0, 0x40, %o0
2814    
2815     - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
2816     + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2817     faligndata %f2, %f4, %f18
2818     - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
2819     + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2820     faligndata %f4, %f6, %f20
2821     - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
2822     + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2823     subcc %o3, 0x01, %o3
2824     faligndata %f6, %f8, %f22
2825     - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
2826     + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2827    
2828     faligndata %f8, %f10, %f24
2829     - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
2830     + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2831     LOAD(prefetch, %o1 + 0x1c0, #one_read)
2832     faligndata %f10, %f12, %f26
2833     bg,pt %XCC, 1b
2834     @@ -217,29 +295,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2835    
2836     /* Finally we copy the last full 64-byte block. */
2837     2:
2838     - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
2839     + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2840     faligndata %f12, %f14, %f28
2841     - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
2842     + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2843     faligndata %f14, %f0, %f30
2844     - EX_ST_FP(STORE_BLK(%f16, %o0))
2845     - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
2846     + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2847     + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2848     faligndata %f0, %f2, %f16
2849     - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
2850     + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2851     faligndata %f2, %f4, %f18
2852     - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
2853     + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2854     faligndata %f4, %f6, %f20
2855     - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
2856     + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2857     faligndata %f6, %f8, %f22
2858     - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
2859     + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2860     faligndata %f8, %f10, %f24
2861     cmp %g1, 0
2862     be,pt %XCC, 1f
2863     add %o0, 0x40, %o0
2864     - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
2865     + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2866     1: faligndata %f10, %f12, %f26
2867     faligndata %f12, %f14, %f28
2868     faligndata %f14, %f0, %f30
2869     - EX_ST_FP(STORE_BLK(%f16, %o0))
2870     + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2871     add %o0, 0x40, %o0
2872     add %o1, 0x40, %o1
2873     membar #Sync
2874     @@ -259,20 +337,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2875    
2876     sub %o2, %g2, %o2
2877     be,a,pt %XCC, 1f
2878     - EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0))
2879     + EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2)
2880    
2881     -1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2))
2882     +1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2)
2883     add %o1, 0x8, %o1
2884     subcc %g2, 0x8, %g2
2885     faligndata %f0, %f2, %f8
2886     - EX_ST_FP(STORE(std, %f8, %o0))
2887     + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
2888     be,pn %XCC, 2f
2889     add %o0, 0x8, %o0
2890     - EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0))
2891     + EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2)
2892     add %o1, 0x8, %o1
2893     subcc %g2, 0x8, %g2
2894     faligndata %f2, %f0, %f8
2895     - EX_ST_FP(STORE(std, %f8, %o0))
2896     + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
2897     bne,pn %XCC, 1b
2898     add %o0, 0x8, %o0
2899    
2900     @@ -292,30 +370,33 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2901     andcc %o2, 0x8, %g0
2902     be,pt %icc, 1f
2903     nop
2904     - EX_LD(LOAD(ldx, %o1, %o5))
2905     - EX_ST(STORE(stx, %o5, %o1 + %o3))
2906     + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2)
2907     + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2)
2908     add %o1, 0x8, %o1
2909     + sub %o2, 8, %o2
2910    
2911     1: andcc %o2, 0x4, %g0
2912     be,pt %icc, 1f
2913     nop
2914     - EX_LD(LOAD(lduw, %o1, %o5))
2915     - EX_ST(STORE(stw, %o5, %o1 + %o3))
2916     + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2)
2917     + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2)
2918     add %o1, 0x4, %o1
2919     + sub %o2, 4, %o2
2920    
2921     1: andcc %o2, 0x2, %g0
2922     be,pt %icc, 1f
2923     nop
2924     - EX_LD(LOAD(lduh, %o1, %o5))
2925     - EX_ST(STORE(sth, %o5, %o1 + %o3))
2926     + EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2)
2927     + EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2)
2928     add %o1, 0x2, %o1
2929     + sub %o2, 2, %o2
2930    
2931     1: andcc %o2, 0x1, %g0
2932     be,pt %icc, 85f
2933     nop
2934     - EX_LD(LOAD(ldub, %o1, %o5))
2935     + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2)
2936     ba,pt %xcc, 85f
2937     - EX_ST(STORE(stb, %o5, %o1 + %o3))
2938     + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2)
2939    
2940     .align 64
2941     70: /* 16 < len <= 64 */
2942     @@ -326,26 +407,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2943     andn %o2, 0xf, GLOBAL_SPARE
2944     and %o2, 0xf, %o2
2945     1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE
2946     - EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
2947     - EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
2948     - EX_ST(STORE(stx, %o5, %o1 + %o3))
2949     + EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10)
2950     + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10)
2951     + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10)
2952     add %o1, 0x8, %o1
2953     - EX_ST(STORE(stx, %g1, %o1 + %o3))
2954     + EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08)
2955     bgu,pt %XCC, 1b
2956     add %o1, 0x8, %o1
2957     73: andcc %o2, 0x8, %g0
2958     be,pt %XCC, 1f
2959     nop
2960     sub %o2, 0x8, %o2
2961     - EX_LD(LOAD(ldx, %o1, %o5))
2962     - EX_ST(STORE(stx, %o5, %o1 + %o3))
2963     + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8)
2964     + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8)
2965     add %o1, 0x8, %o1
2966     1: andcc %o2, 0x4, %g0
2967     be,pt %XCC, 1f
2968     nop
2969     sub %o2, 0x4, %o2
2970     - EX_LD(LOAD(lduw, %o1, %o5))
2971     - EX_ST(STORE(stw, %o5, %o1 + %o3))
2972     + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4)
2973     + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4)
2974     add %o1, 0x4, %o1
2975     1: cmp %o2, 0
2976     be,pt %XCC, 85f
2977     @@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2978     sub %o2, %g1, %o2
2979    
2980     1: subcc %g1, 1, %g1
2981     - EX_LD(LOAD(ldub, %o1, %o5))
2982     - EX_ST(STORE(stb, %o5, %o1 + %o3))
2983     + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1)
2984     + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1)
2985     bgu,pt %icc, 1b
2986     add %o1, 1, %o1
2987    
2988     @@ -378,16 +459,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2989    
2990     8: mov 64, %o3
2991     andn %o1, 0x7, %o1
2992     - EX_LD(LOAD(ldx, %o1, %g2))
2993     + EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2)
2994     sub %o3, %g1, %o3
2995     andn %o2, 0x7, GLOBAL_SPARE
2996     sllx %g2, %g1, %g2
2997     -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
2998     +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS)
2999     subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE
3000     add %o1, 0x8, %o1
3001     srlx %g3, %o3, %o5
3002     or %o5, %g2, %o5
3003     - EX_ST(STORE(stx, %o5, %o0))
3004     + EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8)
3005     add %o0, 0x8, %o0
3006     bgu,pt %icc, 1b
3007     sllx %g3, %g1, %g2
3008     @@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
3009    
3010     1:
3011     subcc %o2, 4, %o2
3012     - EX_LD(LOAD(lduw, %o1, %g1))
3013     - EX_ST(STORE(stw, %g1, %o1 + %o3))
3014     + EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4)
3015     + EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4)
3016     bgu,pt %XCC, 1b
3017     add %o1, 4, %o1
3018    
3019     @@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
3020     .align 32
3021     90:
3022     subcc %o2, 1, %o2
3023     - EX_LD(LOAD(ldub, %o1, %g1))
3024     - EX_ST(STORE(stb, %g1, %o1 + %o3))
3025     + EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1)
3026     + EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1)
3027     bgu,pt %XCC, 90b
3028     add %o1, 1, %o1
3029     retl
3030     diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S
3031     index 302c0e60dc2c..4c89b486fa0d 100644
3032     --- a/arch/sparc/lib/copy_in_user.S
3033     +++ b/arch/sparc/lib/copy_in_user.S
3034     @@ -8,18 +8,33 @@
3035    
3036     #define XCC xcc
3037    
3038     -#define EX(x,y) \
3039     +#define EX(x,y,z) \
3040     98: x,y; \
3041     .section __ex_table,"a";\
3042     .align 4; \
3043     - .word 98b, __retl_one; \
3044     + .word 98b, z; \
3045     .text; \
3046     .align 4;
3047    
3048     +#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8)
3049     +#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4)
3050     +#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1)
3051     +
3052     .register %g2,#scratch
3053     .register %g3,#scratch
3054    
3055     .text
3056     +__retl_o4_plus_8:
3057     + add %o4, %o2, %o4
3058     + retl
3059     + add %o4, 8, %o0
3060     +__retl_o2_plus_4:
3061     + retl
3062     + add %o2, 4, %o0
3063     +__retl_o2_plus_1:
3064     + retl
3065     + add %o2, 1, %o0
3066     +
3067     .align 32
3068    
3069     /* Don't try to get too fancy here, just nice and
3070     @@ -44,8 +59,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
3071     andn %o2, 0x7, %o4
3072     and %o2, 0x7, %o2
3073     1: subcc %o4, 0x8, %o4
3074     - EX(ldxa [%o1] %asi, %o5)
3075     - EX(stxa %o5, [%o0] %asi)
3076     + EX_O4(ldxa [%o1] %asi, %o5)
3077     + EX_O4(stxa %o5, [%o0] %asi)
3078     add %o1, 0x8, %o1
3079     bgu,pt %XCC, 1b
3080     add %o0, 0x8, %o0
3081     @@ -53,8 +68,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
3082     be,pt %XCC, 1f
3083     nop
3084     sub %o2, 0x4, %o2
3085     - EX(lduwa [%o1] %asi, %o5)
3086     - EX(stwa %o5, [%o0] %asi)
3087     + EX_O2_4(lduwa [%o1] %asi, %o5)
3088     + EX_O2_4(stwa %o5, [%o0] %asi)
3089     add %o1, 0x4, %o1
3090     add %o0, 0x4, %o0
3091     1: cmp %o2, 0
3092     @@ -70,8 +85,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
3093    
3094     82:
3095     subcc %o2, 4, %o2
3096     - EX(lduwa [%o1] %asi, %g1)
3097     - EX(stwa %g1, [%o0] %asi)
3098     + EX_O2_4(lduwa [%o1] %asi, %g1)
3099     + EX_O2_4(stwa %g1, [%o0] %asi)
3100     add %o1, 4, %o1
3101     bgu,pt %XCC, 82b
3102     add %o0, 4, %o0
3103     @@ -82,8 +97,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
3104     .align 32
3105     90:
3106     subcc %o2, 1, %o2
3107     - EX(lduba [%o1] %asi, %g1)
3108     - EX(stba %g1, [%o0] %asi)
3109     + EX_O2_1(lduba [%o1] %asi, %g1)
3110     + EX_O2_1(stba %g1, [%o0] %asi)
3111     add %o1, 1, %o1
3112     bgu,pt %XCC, 90b
3113     add %o0, 1, %o0
3114     diff --git a/arch/sparc/lib/user_fixup.c b/arch/sparc/lib/user_fixup.c
3115     deleted file mode 100644
3116     index ac96ae236709..000000000000
3117     --- a/arch/sparc/lib/user_fixup.c
3118     +++ /dev/null
3119     @@ -1,71 +0,0 @@
3120     -/* user_fixup.c: Fix up user copy faults.
3121     - *
3122     - * Copyright (C) 2004 David S. Miller <davem@redhat.com>
3123     - */
3124     -
3125     -#include <linux/compiler.h>
3126     -#include <linux/kernel.h>
3127     -#include <linux/string.h>
3128     -#include <linux/errno.h>
3129     -#include <linux/module.h>
3130     -
3131     -#include <asm/uaccess.h>
3132     -
3133     -/* Calculating the exact fault address when using
3134     - * block loads and stores can be very complicated.
3135     - *
3136     - * Instead of trying to be clever and handling all
3137     - * of the cases, just fix things up simply here.
3138     - */
3139     -
3140     -static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset)
3141     -{
3142     - unsigned long fault_addr = current_thread_info()->fault_address;
3143     - unsigned long end = start + size;
3144     -
3145     - if (fault_addr < start || fault_addr >= end) {
3146     - *offset = 0;
3147     - } else {
3148     - *offset = fault_addr - start;
3149     - size = end - fault_addr;
3150     - }
3151     - return size;
3152     -}
3153     -
3154     -unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size)
3155     -{
3156     - unsigned long offset;
3157     -
3158     - size = compute_size((unsigned long) from, size, &offset);
3159     - if (likely(size))
3160     - memset(to + offset, 0, size);
3161     -
3162     - return size;
3163     -}
3164     -EXPORT_SYMBOL(copy_from_user_fixup);
3165     -
3166     -unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size)
3167     -{
3168     - unsigned long offset;
3169     -
3170     - return compute_size((unsigned long) to, size, &offset);
3171     -}
3172     -EXPORT_SYMBOL(copy_to_user_fixup);
3173     -
3174     -unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size)
3175     -{
3176     - unsigned long fault_addr = current_thread_info()->fault_address;
3177     - unsigned long start = (unsigned long) to;
3178     - unsigned long end = start + size;
3179     -
3180     - if (fault_addr >= start && fault_addr < end)
3181     - return end - fault_addr;
3182     -
3183     - start = (unsigned long) from;
3184     - end = start + size;
3185     - if (fault_addr >= start && fault_addr < end)
3186     - return end - fault_addr;
3187     -
3188     - return size;
3189     -}
3190     -EXPORT_SYMBOL(copy_in_user_fixup);
3191     diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
3192     index dbabe5713a15..e15f33715103 100644
3193     --- a/arch/sparc/mm/fault_64.c
3194     +++ b/arch/sparc/mm/fault_64.c
3195     @@ -479,14 +479,14 @@ good_area:
3196     up_read(&mm->mmap_sem);
3197    
3198     mm_rss = get_mm_rss(mm);
3199     -#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
3200     - mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE));
3201     +#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
3202     + mm_rss -= (mm->context.thp_pte_count * (HPAGE_SIZE / PAGE_SIZE));
3203     #endif
3204     if (unlikely(mm_rss >
3205     mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit))
3206     tsb_grow(mm, MM_TSB_BASE, mm_rss);
3207     #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
3208     - mm_rss = mm->context.huge_pte_count;
3209     + mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count;
3210     if (unlikely(mm_rss >
3211     mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) {
3212     if (mm->context.tsb_block[MM_TSB_HUGE].tsb)
3213     diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
3214     index 364d093f46c6..da1142401bf4 100644
3215     --- a/arch/sparc/mm/hugetlbpage.c
3216     +++ b/arch/sparc/mm/hugetlbpage.c
3217     @@ -180,7 +180,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
3218     unsigned long nptes;
3219    
3220     if (!pte_present(*ptep) && pte_present(entry))
3221     - mm->context.huge_pte_count++;
3222     + mm->context.hugetlb_pte_count++;
3223    
3224     addr &= HPAGE_MASK;
3225    
3226     @@ -212,7 +212,7 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
3227    
3228     entry = *ptep;
3229     if (pte_present(entry))
3230     - mm->context.huge_pte_count--;
3231     + mm->context.hugetlb_pte_count--;
3232    
3233     addr &= HPAGE_MASK;
3234     nptes = 1 << HUGETLB_PAGE_ORDER;
3235     diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
3236     index 3c4b8975fa76..a5331c336b2a 100644
3237     --- a/arch/sparc/mm/init_64.c
3238     +++ b/arch/sparc/mm/init_64.c
3239     @@ -346,7 +346,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
3240     spin_lock_irqsave(&mm->context.lock, flags);
3241    
3242     #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
3243     - if (mm->context.huge_pte_count && is_hugetlb_pte(pte))
3244     + if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) &&
3245     + is_hugetlb_pte(pte))
3246     __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
3247     address, pte_val(pte));
3248     else
3249     diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
3250     index f81cd9736700..3659d37b4d81 100644
3251     --- a/arch/sparc/mm/tlb.c
3252     +++ b/arch/sparc/mm/tlb.c
3253     @@ -175,9 +175,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
3254    
3255     if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) {
3256     if (pmd_val(pmd) & _PAGE_PMD_HUGE)
3257     - mm->context.huge_pte_count++;
3258     + mm->context.thp_pte_count++;
3259     else
3260     - mm->context.huge_pte_count--;
3261     + mm->context.thp_pte_count--;
3262    
3263     /* Do not try to allocate the TSB hash table if we
3264     * don't have one already. We have various locks held
3265     diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
3266     index a0604a493a36..9cdeca0fa955 100644
3267     --- a/arch/sparc/mm/tsb.c
3268     +++ b/arch/sparc/mm/tsb.c
3269     @@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned long vaddr)
3270     return (tag == (vaddr >> 22));
3271     }
3272    
3273     +static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end)
3274     +{
3275     + unsigned long idx;
3276     +
3277     + for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) {
3278     + struct tsb *ent = &swapper_tsb[idx];
3279     + unsigned long match = idx << 13;
3280     +
3281     + match |= (ent->tag << 22);
3282     + if (match >= start && match < end)
3283     + ent->tag = (1UL << TSB_TAG_INVALID_BIT);
3284     + }
3285     +}
3286     +
3287     /* TSB flushes need only occur on the processor initiating the address
3288     * space modification, not on each cpu the address space has run on.
3289     * Only the TLB flush needs that treatment.
3290     @@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
3291     {
3292     unsigned long v;
3293    
3294     + if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES)
3295     + return flush_tsb_kernel_range_scan(start, end);
3296     +
3297     for (v = start; v < end; v += PAGE_SIZE) {
3298     unsigned long hash = tsb_hash(v, PAGE_SHIFT,
3299     KERNEL_TSB_NENTRIES);
3300     @@ -470,7 +487,7 @@ retry_tsb_alloc:
3301     int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
3302     {
3303     #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
3304     - unsigned long huge_pte_count;
3305     + unsigned long total_huge_pte_count;
3306     #endif
3307     unsigned int i;
3308    
3309     @@ -479,12 +496,14 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
3310     mm->context.sparc64_ctx_val = 0UL;
3311    
3312     #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
3313     - /* We reset it to zero because the fork() page copying
3314     + /* We reset them to zero because the fork() page copying
3315     * will re-increment the counters as the parent PTEs are
3316     * copied into the child address space.
3317     */
3318     - huge_pte_count = mm->context.huge_pte_count;
3319     - mm->context.huge_pte_count = 0;
3320     + total_huge_pte_count = mm->context.hugetlb_pte_count +
3321     + mm->context.thp_pte_count;
3322     + mm->context.hugetlb_pte_count = 0;
3323     + mm->context.thp_pte_count = 0;
3324     #endif
3325    
3326     /* copy_mm() copies over the parent's mm_struct before calling
3327     @@ -500,8 +519,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
3328     tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
3329    
3330     #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
3331     - if (unlikely(huge_pte_count))
3332     - tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
3333     + if (unlikely(total_huge_pte_count))
3334     + tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count);
3335     #endif
3336    
3337     if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
3338     diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S
3339     index b4f4733abc6e..5d2fd6cd3189 100644
3340     --- a/arch/sparc/mm/ultra.S
3341     +++ b/arch/sparc/mm/ultra.S
3342     @@ -30,7 +30,7 @@
3343     .text
3344     .align 32
3345     .globl __flush_tlb_mm
3346     -__flush_tlb_mm: /* 18 insns */
3347     +__flush_tlb_mm: /* 19 insns */
3348     /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
3349     ldxa [%o1] ASI_DMMU, %g2
3350     cmp %g2, %o0
3351     @@ -81,7 +81,7 @@ __flush_tlb_page: /* 22 insns */
3352    
3353     .align 32
3354     .globl __flush_tlb_pending
3355     -__flush_tlb_pending: /* 26 insns */
3356     +__flush_tlb_pending: /* 27 insns */
3357     /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
3358     rdpr %pstate, %g7
3359     sllx %o1, 3, %o1
3360     @@ -113,12 +113,14 @@ __flush_tlb_pending: /* 26 insns */
3361    
3362     .align 32
3363     .globl __flush_tlb_kernel_range
3364     -__flush_tlb_kernel_range: /* 16 insns */
3365     +__flush_tlb_kernel_range: /* 31 insns */
3366     /* %o0=start, %o1=end */
3367     cmp %o0, %o1
3368     be,pn %xcc, 2f
3369     + sub %o1, %o0, %o3
3370     + srlx %o3, 18, %o4
3371     + brnz,pn %o4, __spitfire_flush_tlb_kernel_range_slow
3372     sethi %hi(PAGE_SIZE), %o4
3373     - sub %o1, %o0, %o3
3374     sub %o3, %o4, %o3
3375     or %o0, 0x20, %o0 ! Nucleus
3376     1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
3377     @@ -131,6 +133,41 @@ __flush_tlb_kernel_range: /* 16 insns */
3378     retl
3379     nop
3380     nop
3381     + nop
3382     + nop
3383     + nop
3384     + nop
3385     + nop
3386     + nop
3387     + nop
3388     + nop
3389     + nop
3390     + nop
3391     + nop
3392     + nop
3393     + nop
3394     +
3395     +__spitfire_flush_tlb_kernel_range_slow:
3396     + mov 63 * 8, %o4
3397     +1: ldxa [%o4] ASI_ITLB_DATA_ACCESS, %o3
3398     + andcc %o3, 0x40, %g0 /* _PAGE_L_4U */
3399     + bne,pn %xcc, 2f
3400     + mov TLB_TAG_ACCESS, %o3
3401     + stxa %g0, [%o3] ASI_IMMU
3402     + stxa %g0, [%o4] ASI_ITLB_DATA_ACCESS
3403     + membar #Sync
3404     +2: ldxa [%o4] ASI_DTLB_DATA_ACCESS, %o3
3405     + andcc %o3, 0x40, %g0
3406     + bne,pn %xcc, 2f
3407     + mov TLB_TAG_ACCESS, %o3
3408     + stxa %g0, [%o3] ASI_DMMU
3409     + stxa %g0, [%o4] ASI_DTLB_DATA_ACCESS
3410     + membar #Sync
3411     +2: sub %o4, 8, %o4
3412     + brgez,pt %o4, 1b
3413     + nop
3414     + retl
3415     + nop
3416    
3417     __spitfire_flush_tlb_mm_slow:
3418     rdpr %pstate, %g1
3419     @@ -285,6 +322,40 @@ __cheetah_flush_tlb_pending: /* 27 insns */
3420     retl
3421     wrpr %g7, 0x0, %pstate
3422    
3423     +__cheetah_flush_tlb_kernel_range: /* 31 insns */
3424     + /* %o0=start, %o1=end */
3425     + cmp %o0, %o1
3426     + be,pn %xcc, 2f
3427     + sub %o1, %o0, %o3
3428     + srlx %o3, 18, %o4
3429     + brnz,pn %o4, 3f
3430     + sethi %hi(PAGE_SIZE), %o4
3431     + sub %o3, %o4, %o3
3432     + or %o0, 0x20, %o0 ! Nucleus
3433     +1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
3434     + stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP
3435     + membar #Sync
3436     + brnz,pt %o3, 1b
3437     + sub %o3, %o4, %o3
3438     +2: sethi %hi(KERNBASE), %o3
3439     + flush %o3
3440     + retl
3441     + nop
3442     +3: mov 0x80, %o4
3443     + stxa %g0, [%o4] ASI_DMMU_DEMAP
3444     + membar #Sync
3445     + stxa %g0, [%o4] ASI_IMMU_DEMAP
3446     + membar #Sync
3447     + retl
3448     + nop
3449     + nop
3450     + nop
3451     + nop
3452     + nop
3453     + nop
3454     + nop
3455     + nop
3456     +
3457     #ifdef DCACHE_ALIASING_POSSIBLE
3458     __cheetah_flush_dcache_page: /* 11 insns */
3459     sethi %hi(PAGE_OFFSET), %g1
3460     @@ -309,19 +380,28 @@ __hypervisor_tlb_tl0_error:
3461     ret
3462     restore
3463    
3464     -__hypervisor_flush_tlb_mm: /* 10 insns */
3465     +__hypervisor_flush_tlb_mm: /* 19 insns */
3466     mov %o0, %o2 /* ARG2: mmu context */
3467     mov 0, %o0 /* ARG0: CPU lists unimplemented */
3468     mov 0, %o1 /* ARG1: CPU lists unimplemented */
3469     mov HV_MMU_ALL, %o3 /* ARG3: flags */
3470     mov HV_FAST_MMU_DEMAP_CTX, %o5
3471     ta HV_FAST_TRAP
3472     - brnz,pn %o0, __hypervisor_tlb_tl0_error
3473     + brnz,pn %o0, 1f
3474     mov HV_FAST_MMU_DEMAP_CTX, %o1
3475     retl
3476     nop
3477     +1: sethi %hi(__hypervisor_tlb_tl0_error), %o5
3478     + jmpl %o5 + %lo(__hypervisor_tlb_tl0_error), %g0
3479     + nop
3480     + nop
3481     + nop
3482     + nop
3483     + nop
3484     + nop
3485     + nop
3486    
3487     -__hypervisor_flush_tlb_page: /* 11 insns */
3488     +__hypervisor_flush_tlb_page: /* 22 insns */
3489     /* %o0 = context, %o1 = vaddr */
3490     mov %o0, %g2
3491     mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */
3492     @@ -330,12 +410,23 @@ __hypervisor_flush_tlb_page: /* 11 insns */
3493     srlx %o0, PAGE_SHIFT, %o0
3494     sllx %o0, PAGE_SHIFT, %o0
3495     ta HV_MMU_UNMAP_ADDR_TRAP
3496     - brnz,pn %o0, __hypervisor_tlb_tl0_error
3497     + brnz,pn %o0, 1f
3498     mov HV_MMU_UNMAP_ADDR_TRAP, %o1
3499     retl
3500     nop
3501     +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2
3502     + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
3503     + nop
3504     + nop
3505     + nop
3506     + nop
3507     + nop
3508     + nop
3509     + nop
3510     + nop
3511     + nop
3512    
3513     -__hypervisor_flush_tlb_pending: /* 16 insns */
3514     +__hypervisor_flush_tlb_pending: /* 27 insns */
3515     /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
3516     sllx %o1, 3, %g1
3517     mov %o2, %g2
3518     @@ -347,31 +438,57 @@ __hypervisor_flush_tlb_pending: /* 16 insns */
3519     srlx %o0, PAGE_SHIFT, %o0
3520     sllx %o0, PAGE_SHIFT, %o0
3521     ta HV_MMU_UNMAP_ADDR_TRAP
3522     - brnz,pn %o0, __hypervisor_tlb_tl0_error
3523     + brnz,pn %o0, 1f
3524     mov HV_MMU_UNMAP_ADDR_TRAP, %o1
3525     brnz,pt %g1, 1b
3526     nop
3527     retl
3528     nop
3529     +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2
3530     + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
3531     + nop
3532     + nop
3533     + nop
3534     + nop
3535     + nop
3536     + nop
3537     + nop
3538     + nop
3539     + nop
3540    
3541     -__hypervisor_flush_tlb_kernel_range: /* 16 insns */
3542     +__hypervisor_flush_tlb_kernel_range: /* 31 insns */
3543     /* %o0=start, %o1=end */
3544     cmp %o0, %o1
3545     be,pn %xcc, 2f
3546     - sethi %hi(PAGE_SIZE), %g3
3547     - mov %o0, %g1
3548     - sub %o1, %g1, %g2
3549     + sub %o1, %o0, %g2
3550     + srlx %g2, 18, %g3
3551     + brnz,pn %g3, 4f
3552     + mov %o0, %g1
3553     + sethi %hi(PAGE_SIZE), %g3
3554     sub %g2, %g3, %g2
3555     1: add %g1, %g2, %o0 /* ARG0: virtual address */
3556     mov 0, %o1 /* ARG1: mmu context */
3557     mov HV_MMU_ALL, %o2 /* ARG2: flags */
3558     ta HV_MMU_UNMAP_ADDR_TRAP
3559     - brnz,pn %o0, __hypervisor_tlb_tl0_error
3560     + brnz,pn %o0, 3f
3561     mov HV_MMU_UNMAP_ADDR_TRAP, %o1
3562     brnz,pt %g2, 1b
3563     sub %g2, %g3, %g2
3564     2: retl
3565     nop
3566     +3: sethi %hi(__hypervisor_tlb_tl0_error), %o2
3567     + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
3568     + nop
3569     +4: mov 0, %o0 /* ARG0: CPU lists unimplemented */
3570     + mov 0, %o1 /* ARG1: CPU lists unimplemented */
3571     + mov 0, %o2 /* ARG2: mmu context == nucleus */
3572     + mov HV_MMU_ALL, %o3 /* ARG3: flags */
3573     + mov HV_FAST_MMU_DEMAP_CTX, %o5
3574     + ta HV_FAST_TRAP
3575     + brnz,pn %o0, 3b
3576     + mov HV_FAST_MMU_DEMAP_CTX, %o1
3577     + retl
3578     + nop
3579    
3580     #ifdef DCACHE_ALIASING_POSSIBLE
3581     /* XXX Niagara and friends have an 8K cache, so no aliasing is
3582     @@ -394,43 +511,6 @@ tlb_patch_one:
3583     retl
3584     nop
3585    
3586     - .globl cheetah_patch_cachetlbops
3587     -cheetah_patch_cachetlbops:
3588     - save %sp, -128, %sp
3589     -
3590     - sethi %hi(__flush_tlb_mm), %o0
3591     - or %o0, %lo(__flush_tlb_mm), %o0
3592     - sethi %hi(__cheetah_flush_tlb_mm), %o1
3593     - or %o1, %lo(__cheetah_flush_tlb_mm), %o1
3594     - call tlb_patch_one
3595     - mov 19, %o2
3596     -
3597     - sethi %hi(__flush_tlb_page), %o0
3598     - or %o0, %lo(__flush_tlb_page), %o0
3599     - sethi %hi(__cheetah_flush_tlb_page), %o1
3600     - or %o1, %lo(__cheetah_flush_tlb_page), %o1
3601     - call tlb_patch_one
3602     - mov 22, %o2
3603     -
3604     - sethi %hi(__flush_tlb_pending), %o0
3605     - or %o0, %lo(__flush_tlb_pending), %o0
3606     - sethi %hi(__cheetah_flush_tlb_pending), %o1
3607     - or %o1, %lo(__cheetah_flush_tlb_pending), %o1
3608     - call tlb_patch_one
3609     - mov 27, %o2
3610     -
3611     -#ifdef DCACHE_ALIASING_POSSIBLE
3612     - sethi %hi(__flush_dcache_page), %o0
3613     - or %o0, %lo(__flush_dcache_page), %o0
3614     - sethi %hi(__cheetah_flush_dcache_page), %o1
3615     - or %o1, %lo(__cheetah_flush_dcache_page), %o1
3616     - call tlb_patch_one
3617     - mov 11, %o2
3618     -#endif /* DCACHE_ALIASING_POSSIBLE */
3619     -
3620     - ret
3621     - restore
3622     -
3623     #ifdef CONFIG_SMP
3624     /* These are all called by the slaves of a cross call, at
3625     * trap level 1, with interrupts fully disabled.
3626     @@ -447,7 +527,7 @@ cheetah_patch_cachetlbops:
3627     */
3628     .align 32
3629     .globl xcall_flush_tlb_mm
3630     -xcall_flush_tlb_mm: /* 21 insns */
3631     +xcall_flush_tlb_mm: /* 24 insns */
3632     mov PRIMARY_CONTEXT, %g2
3633     ldxa [%g2] ASI_DMMU, %g3
3634     srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4
3635     @@ -469,9 +549,12 @@ xcall_flush_tlb_mm: /* 21 insns */
3636     nop
3637     nop
3638     nop
3639     + nop
3640     + nop
3641     + nop
3642    
3643     .globl xcall_flush_tlb_page
3644     -xcall_flush_tlb_page: /* 17 insns */
3645     +xcall_flush_tlb_page: /* 20 insns */
3646     /* %g5=context, %g1=vaddr */
3647     mov PRIMARY_CONTEXT, %g4
3648     ldxa [%g4] ASI_DMMU, %g2
3649     @@ -490,15 +573,20 @@ xcall_flush_tlb_page: /* 17 insns */
3650     retry
3651     nop
3652     nop
3653     + nop
3654     + nop
3655     + nop
3656    
3657     .globl xcall_flush_tlb_kernel_range
3658     -xcall_flush_tlb_kernel_range: /* 25 insns */
3659     +xcall_flush_tlb_kernel_range: /* 44 insns */
3660     sethi %hi(PAGE_SIZE - 1), %g2
3661     or %g2, %lo(PAGE_SIZE - 1), %g2
3662     andn %g1, %g2, %g1
3663     andn %g7, %g2, %g7
3664     sub %g7, %g1, %g3
3665     - add %g2, 1, %g2
3666     + srlx %g3, 18, %g2
3667     + brnz,pn %g2, 2f
3668     + add %g2, 1, %g2
3669     sub %g3, %g2, %g3
3670     or %g1, 0x20, %g1 ! Nucleus
3671     1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
3672     @@ -507,8 +595,25 @@ xcall_flush_tlb_kernel_range: /* 25 insns */
3673     brnz,pt %g3, 1b
3674     sub %g3, %g2, %g3
3675     retry
3676     - nop
3677     - nop
3678     +2: mov 63 * 8, %g1
3679     +1: ldxa [%g1] ASI_ITLB_DATA_ACCESS, %g2
3680     + andcc %g2, 0x40, %g0 /* _PAGE_L_4U */
3681     + bne,pn %xcc, 2f
3682     + mov TLB_TAG_ACCESS, %g2
3683     + stxa %g0, [%g2] ASI_IMMU
3684     + stxa %g0, [%g1] ASI_ITLB_DATA_ACCESS
3685     + membar #Sync
3686     +2: ldxa [%g1] ASI_DTLB_DATA_ACCESS, %g2
3687     + andcc %g2, 0x40, %g0
3688     + bne,pn %xcc, 2f
3689     + mov TLB_TAG_ACCESS, %g2
3690     + stxa %g0, [%g2] ASI_DMMU
3691     + stxa %g0, [%g1] ASI_DTLB_DATA_ACCESS
3692     + membar #Sync
3693     +2: sub %g1, 8, %g1
3694     + brgez,pt %g1, 1b
3695     + nop
3696     + retry
3697     nop
3698     nop
3699     nop
3700     @@ -637,6 +742,52 @@ xcall_fetch_glob_pmu_n4:
3701    
3702     retry
3703    
3704     +__cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */
3705     + sethi %hi(PAGE_SIZE - 1), %g2
3706     + or %g2, %lo(PAGE_SIZE - 1), %g2
3707     + andn %g1, %g2, %g1
3708     + andn %g7, %g2, %g7
3709     + sub %g7, %g1, %g3
3710     + srlx %g3, 18, %g2
3711     + brnz,pn %g2, 2f
3712     + add %g2, 1, %g2
3713     + sub %g3, %g2, %g3
3714     + or %g1, 0x20, %g1 ! Nucleus
3715     +1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
3716     + stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP
3717     + membar #Sync
3718     + brnz,pt %g3, 1b
3719     + sub %g3, %g2, %g3
3720     + retry
3721     +2: mov 0x80, %g2
3722     + stxa %g0, [%g2] ASI_DMMU_DEMAP
3723     + membar #Sync
3724     + stxa %g0, [%g2] ASI_IMMU_DEMAP
3725     + membar #Sync
3726     + retry
3727     + nop
3728     + nop
3729     + nop
3730     + nop
3731     + nop
3732     + nop
3733     + nop
3734     + nop
3735     + nop
3736     + nop
3737     + nop
3738     + nop
3739     + nop
3740     + nop
3741     + nop
3742     + nop
3743     + nop
3744     + nop
3745     + nop
3746     + nop
3747     + nop
3748     + nop
3749     +
3750     #ifdef DCACHE_ALIASING_POSSIBLE
3751     .align 32
3752     .globl xcall_flush_dcache_page_cheetah
3753     @@ -700,7 +851,7 @@ __hypervisor_tlb_xcall_error:
3754     ba,a,pt %xcc, rtrap
3755    
3756     .globl __hypervisor_xcall_flush_tlb_mm
3757     -__hypervisor_xcall_flush_tlb_mm: /* 21 insns */
3758     +__hypervisor_xcall_flush_tlb_mm: /* 24 insns */
3759     /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */
3760     mov %o0, %g2
3761     mov %o1, %g3
3762     @@ -714,7 +865,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
3763     mov HV_FAST_MMU_DEMAP_CTX, %o5
3764     ta HV_FAST_TRAP
3765     mov HV_FAST_MMU_DEMAP_CTX, %g6
3766     - brnz,pn %o0, __hypervisor_tlb_xcall_error
3767     + brnz,pn %o0, 1f
3768     mov %o0, %g5
3769     mov %g2, %o0
3770     mov %g3, %o1
3771     @@ -723,9 +874,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
3772     mov %g7, %o5
3773     membar #Sync
3774     retry
3775     +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
3776     + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
3777     + nop
3778    
3779     .globl __hypervisor_xcall_flush_tlb_page
3780     -__hypervisor_xcall_flush_tlb_page: /* 17 insns */
3781     +__hypervisor_xcall_flush_tlb_page: /* 20 insns */
3782     /* %g5=ctx, %g1=vaddr */
3783     mov %o0, %g2
3784     mov %o1, %g3
3785     @@ -737,42 +891,64 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */
3786     sllx %o0, PAGE_SHIFT, %o0
3787     ta HV_MMU_UNMAP_ADDR_TRAP
3788     mov HV_MMU_UNMAP_ADDR_TRAP, %g6
3789     - brnz,a,pn %o0, __hypervisor_tlb_xcall_error
3790     + brnz,a,pn %o0, 1f
3791     mov %o0, %g5
3792     mov %g2, %o0
3793     mov %g3, %o1
3794     mov %g4, %o2
3795     membar #Sync
3796     retry
3797     +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
3798     + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
3799     + nop
3800    
3801     .globl __hypervisor_xcall_flush_tlb_kernel_range
3802     -__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */
3803     +__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */
3804     /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */
3805     sethi %hi(PAGE_SIZE - 1), %g2
3806     or %g2, %lo(PAGE_SIZE - 1), %g2
3807     andn %g1, %g2, %g1
3808     andn %g7, %g2, %g7
3809     sub %g7, %g1, %g3
3810     + srlx %g3, 18, %g7
3811     add %g2, 1, %g2
3812     sub %g3, %g2, %g3
3813     mov %o0, %g2
3814     mov %o1, %g4
3815     - mov %o2, %g7
3816     + brnz,pn %g7, 2f
3817     + mov %o2, %g7
3818     1: add %g1, %g3, %o0 /* ARG0: virtual address */
3819     mov 0, %o1 /* ARG1: mmu context */
3820     mov HV_MMU_ALL, %o2 /* ARG2: flags */
3821     ta HV_MMU_UNMAP_ADDR_TRAP
3822     mov HV_MMU_UNMAP_ADDR_TRAP, %g6
3823     - brnz,pn %o0, __hypervisor_tlb_xcall_error
3824     + brnz,pn %o0, 1f
3825     mov %o0, %g5
3826     sethi %hi(PAGE_SIZE), %o2
3827     brnz,pt %g3, 1b
3828     sub %g3, %o2, %g3
3829     - mov %g2, %o0
3830     +5: mov %g2, %o0
3831     mov %g4, %o1
3832     mov %g7, %o2
3833     membar #Sync
3834     retry
3835     +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
3836     + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
3837     + nop
3838     +2: mov %o3, %g1
3839     + mov %o5, %g3
3840     + mov 0, %o0 /* ARG0: CPU lists unimplemented */
3841     + mov 0, %o1 /* ARG1: CPU lists unimplemented */
3842     + mov 0, %o2 /* ARG2: mmu context == nucleus */
3843     + mov HV_MMU_ALL, %o3 /* ARG3: flags */
3844     + mov HV_FAST_MMU_DEMAP_CTX, %o5
3845     + ta HV_FAST_TRAP
3846     + mov %g1, %o3
3847     + brz,pt %o0, 5b
3848     + mov %g3, %o5
3849     + mov HV_FAST_MMU_DEMAP_CTX, %g6
3850     + ba,pt %xcc, 1b
3851     + clr %g5
3852    
3853     /* These just get rescheduled to PIL vectors. */
3854     .globl xcall_call_function
3855     @@ -809,6 +985,58 @@ xcall_kgdb_capture:
3856    
3857     #endif /* CONFIG_SMP */
3858    
3859     + .globl cheetah_patch_cachetlbops
3860     +cheetah_patch_cachetlbops:
3861     + save %sp, -128, %sp
3862     +
3863     + sethi %hi(__flush_tlb_mm), %o0
3864     + or %o0, %lo(__flush_tlb_mm), %o0
3865     + sethi %hi(__cheetah_flush_tlb_mm), %o1
3866     + or %o1, %lo(__cheetah_flush_tlb_mm), %o1
3867     + call tlb_patch_one
3868     + mov 19, %o2
3869     +
3870     + sethi %hi(__flush_tlb_page), %o0
3871     + or %o0, %lo(__flush_tlb_page), %o0
3872     + sethi %hi(__cheetah_flush_tlb_page), %o1
3873     + or %o1, %lo(__cheetah_flush_tlb_page), %o1
3874     + call tlb_patch_one
3875     + mov 22, %o2
3876     +
3877     + sethi %hi(__flush_tlb_pending), %o0
3878     + or %o0, %lo(__flush_tlb_pending), %o0
3879     + sethi %hi(__cheetah_flush_tlb_pending), %o1
3880     + or %o1, %lo(__cheetah_flush_tlb_pending), %o1
3881     + call tlb_patch_one
3882     + mov 27, %o2
3883     +
3884     + sethi %hi(__flush_tlb_kernel_range), %o0
3885     + or %o0, %lo(__flush_tlb_kernel_range), %o0
3886     + sethi %hi(__cheetah_flush_tlb_kernel_range), %o1
3887     + or %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1
3888     + call tlb_patch_one
3889     + mov 31, %o2
3890     +
3891     +#ifdef DCACHE_ALIASING_POSSIBLE
3892     + sethi %hi(__flush_dcache_page), %o0
3893     + or %o0, %lo(__flush_dcache_page), %o0
3894     + sethi %hi(__cheetah_flush_dcache_page), %o1
3895     + or %o1, %lo(__cheetah_flush_dcache_page), %o1
3896     + call tlb_patch_one
3897     + mov 11, %o2
3898     +#endif /* DCACHE_ALIASING_POSSIBLE */
3899     +
3900     +#ifdef CONFIG_SMP
3901     + sethi %hi(xcall_flush_tlb_kernel_range), %o0
3902     + or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
3903     + sethi %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1
3904     + or %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1
3905     + call tlb_patch_one
3906     + mov 44, %o2
3907     +#endif /* CONFIG_SMP */
3908     +
3909     + ret
3910     + restore
3911    
3912     .globl hypervisor_patch_cachetlbops
3913     hypervisor_patch_cachetlbops:
3914     @@ -819,28 +1047,28 @@ hypervisor_patch_cachetlbops:
3915     sethi %hi(__hypervisor_flush_tlb_mm), %o1
3916     or %o1, %lo(__hypervisor_flush_tlb_mm), %o1
3917     call tlb_patch_one
3918     - mov 10, %o2
3919     + mov 19, %o2
3920    
3921     sethi %hi(__flush_tlb_page), %o0
3922     or %o0, %lo(__flush_tlb_page), %o0
3923     sethi %hi(__hypervisor_flush_tlb_page), %o1
3924     or %o1, %lo(__hypervisor_flush_tlb_page), %o1
3925     call tlb_patch_one
3926     - mov 11, %o2
3927     + mov 22, %o2
3928    
3929     sethi %hi(__flush_tlb_pending), %o0
3930     or %o0, %lo(__flush_tlb_pending), %o0
3931     sethi %hi(__hypervisor_flush_tlb_pending), %o1
3932     or %o1, %lo(__hypervisor_flush_tlb_pending), %o1
3933     call tlb_patch_one
3934     - mov 16, %o2
3935     + mov 27, %o2
3936    
3937     sethi %hi(__flush_tlb_kernel_range), %o0
3938     or %o0, %lo(__flush_tlb_kernel_range), %o0
3939     sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1
3940     or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1
3941     call tlb_patch_one
3942     - mov 16, %o2
3943     + mov 31, %o2
3944    
3945     #ifdef DCACHE_ALIASING_POSSIBLE
3946     sethi %hi(__flush_dcache_page), %o0
3947     @@ -857,21 +1085,21 @@ hypervisor_patch_cachetlbops:
3948     sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1
3949     or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1
3950     call tlb_patch_one
3951     - mov 21, %o2
3952     + mov 24, %o2
3953    
3954     sethi %hi(xcall_flush_tlb_page), %o0
3955     or %o0, %lo(xcall_flush_tlb_page), %o0
3956     sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1
3957     or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1
3958     call tlb_patch_one
3959     - mov 17, %o2
3960     + mov 20, %o2
3961    
3962     sethi %hi(xcall_flush_tlb_kernel_range), %o0
3963     or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
3964     sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1
3965     or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1
3966     call tlb_patch_one
3967     - mov 25, %o2
3968     + mov 44, %o2
3969     #endif /* CONFIG_SMP */
3970    
3971     ret
3972     diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
3973     index c32f5d32f811..b56c9c581359 100644
3974     --- a/drivers/net/ethernet/broadcom/bgmac.c
3975     +++ b/drivers/net/ethernet/broadcom/bgmac.c
3976     @@ -314,6 +314,10 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
3977     u32 ctl;
3978    
3979     ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL);
3980     +
3981     + /* preserve ONLY bits 16-17 from current hardware value */
3982     + ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
3983     +
3984     if (bgmac->core->id.rev >= 4) {
3985     ctl &= ~BGMAC_DMA_RX_BL_MASK;
3986     ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT;
3987     @@ -324,7 +328,6 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
3988     ctl &= ~BGMAC_DMA_RX_PT_MASK;
3989     ctl |= BGMAC_DMA_RX_PT_1 << BGMAC_DMA_RX_PT_SHIFT;
3990     }
3991     - ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
3992     ctl |= BGMAC_DMA_RX_ENABLE;
3993     ctl |= BGMAC_DMA_RX_PARITY_DISABLE;
3994     ctl |= BGMAC_DMA_RX_OVERFLOW_CONT;
3995     diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
3996     index ca0d3802f2af..4e603d060e80 100644
3997     --- a/drivers/tty/serial/sunhv.c
3998     +++ b/drivers/tty/serial/sunhv.c
3999     @@ -490,12 +490,6 @@ static void sunhv_console_write_bychar(struct console *con, const char *s, unsig
4000     locked = spin_trylock_irqsave(&port->lock, flags);
4001     else
4002     spin_lock_irqsave(&port->lock, flags);
4003     - if (port->sysrq) {
4004     - locked = 0;
4005     - } else if (oops_in_progress) {
4006     - locked = spin_trylock(&port->lock);
4007     - } else
4008     - spin_lock(&port->lock);
4009    
4010     for (i = 0; i < n; i++) {
4011     if (*s == '\n')
4012     diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
4013     index 629e3c865072..9bee25cfa0be 100644
4014     --- a/drivers/tty/tty_ldisc.c
4015     +++ b/drivers/tty/tty_ldisc.c
4016     @@ -417,6 +417,10 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush);
4017     * they are not on hot paths so a little discipline won't do
4018     * any harm.
4019     *
4020     + * The line discipline-related tty_struct fields are reset to
4021     + * prevent the ldisc driver from re-using stale information for
4022     + * the new ldisc instance.
4023     + *
4024     * Locking: takes termios_rwsem
4025     */
4026    
4027     @@ -425,6 +429,9 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
4028     down_write(&tty->termios_rwsem);
4029     tty->termios.c_line = num;
4030     up_write(&tty->termios_rwsem);
4031     +
4032     + tty->disc_data = NULL;
4033     + tty->receive_room = 0;
4034     }
4035    
4036     /**
4037     diff --git a/include/linux/filter.h b/include/linux/filter.h
4038     index 5110d4211866..ccb98b459c59 100644
4039     --- a/include/linux/filter.h
4040     +++ b/include/linux/filter.h
4041     @@ -421,7 +421,11 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
4042     }
4043     #endif /* CONFIG_DEBUG_SET_MODULE_RONX */
4044    
4045     -int sk_filter(struct sock *sk, struct sk_buff *skb);
4046     +int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
4047     +static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
4048     +{
4049     + return sk_filter_trim_cap(sk, skb, 1);
4050     +}
4051    
4052     int bpf_prog_select_runtime(struct bpf_prog *fp);
4053     void bpf_prog_free(struct bpf_prog *fp);
4054     diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
4055     index ff788b665277..9c2c044153f6 100644
4056     --- a/include/net/ip6_tunnel.h
4057     +++ b/include/net/ip6_tunnel.h
4058     @@ -86,6 +86,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
4059     struct net_device_stats *stats = &dev->stats;
4060     int pkt_len, err;
4061    
4062     + memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
4063     pkt_len = skb->len - skb_inner_network_offset(skb);
4064     err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
4065    
4066     diff --git a/include/net/tcp.h b/include/net/tcp.h
4067     index 9c3ab544d3a8..e9d7a8ef9a6d 100644
4068     --- a/include/net/tcp.h
4069     +++ b/include/net/tcp.h
4070     @@ -1156,6 +1156,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp)
4071     }
4072    
4073     bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
4074     +int tcp_filter(struct sock *sk, struct sk_buff *skb);
4075    
4076     #undef STATE_TRACE
4077    
4078     diff --git a/net/core/dev.c b/net/core/dev.c
4079     index b3fa4b86ab4c..9ca749c81b6c 100644
4080     --- a/net/core/dev.c
4081     +++ b/net/core/dev.c
4082     @@ -2462,7 +2462,7 @@ int skb_checksum_help(struct sk_buff *skb)
4083     goto out;
4084     }
4085    
4086     - *(__sum16 *)(skb->data + offset) = csum_fold(csum);
4087     + *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
4088     out_set_summed:
4089     skb->ip_summed = CHECKSUM_NONE;
4090     out:
4091     diff --git a/net/core/filter.c b/net/core/filter.c
4092     index 75e9b2b2336d..e94355452166 100644
4093     --- a/net/core/filter.c
4094     +++ b/net/core/filter.c
4095     @@ -52,9 +52,10 @@
4096     #include <net/dst.h>
4097    
4098     /**
4099     - * sk_filter - run a packet through a socket filter
4100     + * sk_filter_trim_cap - run a packet through a socket filter
4101     * @sk: sock associated with &sk_buff
4102     * @skb: buffer to filter
4103     + * @cap: limit on how short the eBPF program may trim the packet
4104     *
4105     * Run the eBPF program and then cut skb->data to correct size returned by
4106     * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
4107     @@ -63,7 +64,7 @@
4108     * be accepted or -EPERM if the packet should be tossed.
4109     *
4110     */
4111     -int sk_filter(struct sock *sk, struct sk_buff *skb)
4112     +int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
4113     {
4114     int err;
4115     struct sk_filter *filter;
4116     @@ -84,14 +85,13 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
4117     filter = rcu_dereference(sk->sk_filter);
4118     if (filter) {
4119     unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
4120     -
4121     - err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
4122     + err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
4123     }
4124     rcu_read_unlock();
4125    
4126     return err;
4127     }
4128     -EXPORT_SYMBOL(sk_filter);
4129     +EXPORT_SYMBOL(sk_filter_trim_cap);
4130    
4131     static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
4132     {
4133     diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
4134     index 4ab6ead3d8ee..9aba9e93c0a2 100644
4135     --- a/net/core/flow_dissector.c
4136     +++ b/net/core/flow_dissector.c
4137     @@ -131,7 +131,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
4138     struct flow_dissector_key_tags *key_tags;
4139     struct flow_dissector_key_keyid *key_keyid;
4140     u8 ip_proto = 0;
4141     - bool ret = false;
4142     + bool ret;
4143    
4144     if (!data) {
4145     data = skb->data;
4146     @@ -492,12 +492,17 @@ ip_proto_again:
4147     out_good:
4148     ret = true;
4149    
4150     -out_bad:
4151     + key_control->thoff = (u16)nhoff;
4152     +out:
4153     key_basic->n_proto = proto;
4154     key_basic->ip_proto = ip_proto;
4155     - key_control->thoff = (u16)nhoff;
4156    
4157     return ret;
4158     +
4159     +out_bad:
4160     + ret = false;
4161     + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
4162     + goto out;
4163     }
4164     EXPORT_SYMBOL(__skb_flow_dissect);
4165    
4166     diff --git a/net/core/sock.c b/net/core/sock.c
4167     index 0d91f7dca751..88f017854509 100644
4168     --- a/net/core/sock.c
4169     +++ b/net/core/sock.c
4170     @@ -1562,6 +1562,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
4171     }
4172    
4173     newsk->sk_err = 0;
4174     + newsk->sk_err_soft = 0;
4175     newsk->sk_priority = 0;
4176     newsk->sk_incoming_cpu = raw_smp_processor_id();
4177     atomic64_set(&newsk->sk_cookie, 0);
4178     diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
4179     index 8be8f27bfacc..861e1fa25d5e 100644
4180     --- a/net/dccp/ipv4.c
4181     +++ b/net/dccp/ipv4.c
4182     @@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
4183     {
4184     const struct iphdr *iph = (struct iphdr *)skb->data;
4185     const u8 offset = iph->ihl << 2;
4186     - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
4187     + const struct dccp_hdr *dh;
4188     struct dccp_sock *dp;
4189     struct inet_sock *inet;
4190     const int type = icmp_hdr(skb)->type;
4191     @@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
4192     int err;
4193     struct net *net = dev_net(skb->dev);
4194    
4195     - if (skb->len < offset + sizeof(*dh) ||
4196     - skb->len < offset + __dccp_basic_hdr_len(dh)) {
4197     - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
4198     - return;
4199     - }
4200     + /* Only need dccph_dport & dccph_sport which are the first
4201     + * 4 bytes in dccp header.
4202     + * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
4203     + */
4204     + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
4205     + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
4206     + dh = (struct dccp_hdr *)(skb->data + offset);
4207    
4208     sk = __inet_lookup_established(net, &dccp_hashinfo,
4209     iph->daddr, dh->dccph_dport,
4210     diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
4211     index b8608b71a66d..27c4e81efa24 100644
4212     --- a/net/dccp/ipv6.c
4213     +++ b/net/dccp/ipv6.c
4214     @@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
4215     u8 type, u8 code, int offset, __be32 info)
4216     {
4217     const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
4218     - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
4219     + const struct dccp_hdr *dh;
4220     struct dccp_sock *dp;
4221     struct ipv6_pinfo *np;
4222     struct sock *sk;
4223     @@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
4224     __u64 seq;
4225     struct net *net = dev_net(skb->dev);
4226    
4227     - if (skb->len < offset + sizeof(*dh) ||
4228     - skb->len < offset + __dccp_basic_hdr_len(dh)) {
4229     - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
4230     - ICMP6_MIB_INERRORS);
4231     - return;
4232     - }
4233     + /* Only need dccph_dport & dccph_sport which are the first
4234     + * 4 bytes in dccp header.
4235     + * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
4236     + */
4237     + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
4238     + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
4239     + dh = (struct dccp_hdr *)(skb->data + offset);
4240    
4241     sk = __inet6_lookup_established(net, &dccp_hashinfo,
4242     &hdr->daddr, dh->dccph_dport,
4243     @@ -947,6 +948,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
4244     .getsockopt = ipv6_getsockopt,
4245     .addr2sockaddr = inet6_csk_addr2sockaddr,
4246     .sockaddr_len = sizeof(struct sockaddr_in6),
4247     + .bind_conflict = inet6_csk_bind_conflict,
4248     #ifdef CONFIG_COMPAT
4249     .compat_setsockopt = compat_ipv6_setsockopt,
4250     .compat_getsockopt = compat_ipv6_getsockopt,
4251     diff --git a/net/dccp/proto.c b/net/dccp/proto.c
4252     index 41e65804ddf5..9fe25bf63296 100644
4253     --- a/net/dccp/proto.c
4254     +++ b/net/dccp/proto.c
4255     @@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long timeout)
4256     __kfree_skb(skb);
4257     }
4258    
4259     + /* If socket has been already reset kill it. */
4260     + if (sk->sk_state == DCCP_CLOSED)
4261     + goto adjudge_to_death;
4262     +
4263     if (data_was_unread) {
4264     /* Unread data was tossed, send an appropriate Reset Code */
4265     DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
4266     diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
4267     index e5a3ff210fec..7c52afb98c42 100644
4268     --- a/net/ipv4/fib_trie.c
4269     +++ b/net/ipv4/fib_trie.c
4270     @@ -2456,22 +2456,19 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
4271     struct key_vector *l, **tp = &iter->tnode;
4272     t_key key;
4273    
4274     - /* use cache location of next-to-find key */
4275     + /* use cached location of previously found key */
4276     if (iter->pos > 0 && pos >= iter->pos) {
4277     - pos -= iter->pos;
4278     key = iter->key;
4279     } else {
4280     - iter->pos = 0;
4281     + iter->pos = 1;
4282     key = 0;
4283     }
4284    
4285     - while ((l = leaf_walk_rcu(tp, key)) != NULL) {
4286     + pos -= iter->pos;
4287     +
4288     + while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
4289     key = l->key + 1;
4290     iter->pos++;
4291     -
4292     - if (--pos <= 0)
4293     - break;
4294     -
4295     l = NULL;
4296    
4297     /* handle unlikely case of a key wrap */
4298     @@ -2480,7 +2477,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
4299     }
4300    
4301     if (l)
4302     - iter->key = key; /* remember it */
4303     + iter->key = l->key; /* remember it */
4304     else
4305     iter->pos = 0; /* forget it */
4306    
4307     @@ -2508,7 +2505,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
4308     return fib_route_get_idx(iter, *pos);
4309    
4310     iter->pos = 0;
4311     - iter->key = 0;
4312     + iter->key = KEY_MAX;
4313    
4314     return SEQ_START_TOKEN;
4315     }
4316     @@ -2517,7 +2514,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4317     {
4318     struct fib_route_iter *iter = seq->private;
4319     struct key_vector *l = NULL;
4320     - t_key key = iter->key;
4321     + t_key key = iter->key + 1;
4322    
4323     ++*pos;
4324    
4325     @@ -2526,7 +2523,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4326     l = leaf_walk_rcu(&iter->tnode, key);
4327    
4328     if (l) {
4329     - iter->key = l->key + 1;
4330     + iter->key = l->key;
4331     iter->pos++;
4332     } else {
4333     iter->pos = 0;
4334     diff --git a/net/ipv4/route.c b/net/ipv4/route.c
4335     index 8533a75a9328..7ceb8a574a50 100644
4336     --- a/net/ipv4/route.c
4337     +++ b/net/ipv4/route.c
4338     @@ -747,7 +747,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
4339     goto reject_redirect;
4340     }
4341    
4342     - n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
4343     + n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
4344     + if (!n)
4345     + n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
4346     if (!IS_ERR(n)) {
4347     if (!(n->nud_state & NUD_VALID)) {
4348     neigh_event_send(n, NULL);
4349     diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
4350     index 036a76ba2ac2..69daa81736f6 100644
4351     --- a/net/ipv4/tcp.c
4352     +++ b/net/ipv4/tcp.c
4353     @@ -1212,7 +1212,7 @@ new_segment:
4354    
4355     if (!skb_can_coalesce(skb, i, pfrag->page,
4356     pfrag->offset)) {
4357     - if (i == sysctl_max_skb_frags || !sg) {
4358     + if (i >= sysctl_max_skb_frags || !sg) {
4359     tcp_mark_push(tp, skb);
4360     goto new_segment;
4361     }
4362     diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
4363     index 7e538f71f5fb..55d7da1d2ce9 100644
4364     --- a/net/ipv4/tcp_dctcp.c
4365     +++ b/net/ipv4/tcp_dctcp.c
4366     @@ -56,6 +56,7 @@ struct dctcp {
4367     u32 next_seq;
4368     u32 ce_state;
4369     u32 delayed_ack_reserved;
4370     + u32 loss_cwnd;
4371     };
4372    
4373     static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */
4374     @@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk)
4375     ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
4376    
4377     ca->delayed_ack_reserved = 0;
4378     + ca->loss_cwnd = 0;
4379     ca->ce_state = 0;
4380    
4381     dctcp_reset(tp, ca);
4382     @@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk)
4383    
4384     static u32 dctcp_ssthresh(struct sock *sk)
4385     {
4386     - const struct dctcp *ca = inet_csk_ca(sk);
4387     + struct dctcp *ca = inet_csk_ca(sk);
4388     struct tcp_sock *tp = tcp_sk(sk);
4389    
4390     + ca->loss_cwnd = tp->snd_cwnd;
4391     return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
4392     }
4393    
4394     @@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
4395     return 0;
4396     }
4397    
4398     +static u32 dctcp_cwnd_undo(struct sock *sk)
4399     +{
4400     + const struct dctcp *ca = inet_csk_ca(sk);
4401     +
4402     + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
4403     +}
4404     +
4405     static struct tcp_congestion_ops dctcp __read_mostly = {
4406     .init = dctcp_init,
4407     .in_ack_event = dctcp_update_alpha,
4408     .cwnd_event = dctcp_cwnd_event,
4409     .ssthresh = dctcp_ssthresh,
4410     .cong_avoid = tcp_reno_cong_avoid,
4411     + .undo_cwnd = dctcp_cwnd_undo,
4412     .set_state = dctcp_state,
4413     .get_info = dctcp_get_info,
4414     .flags = TCP_CONG_NEEDS_ECN,
4415     diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
4416     index b5853cac3269..b58a38eea059 100644
4417     --- a/net/ipv4/tcp_ipv4.c
4418     +++ b/net/ipv4/tcp_ipv4.c
4419     @@ -1533,6 +1533,21 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
4420     }
4421     EXPORT_SYMBOL(tcp_prequeue);
4422    
4423     +int tcp_filter(struct sock *sk, struct sk_buff *skb)
4424     +{
4425     + struct tcphdr *th = (struct tcphdr *)skb->data;
4426     + unsigned int eaten = skb->len;
4427     + int err;
4428     +
4429     + err = sk_filter_trim_cap(sk, skb, th->doff * 4);
4430     + if (!err) {
4431     + eaten -= skb->len;
4432     + TCP_SKB_CB(skb)->end_seq -= eaten;
4433     + }
4434     + return err;
4435     +}
4436     +EXPORT_SYMBOL(tcp_filter);
4437     +
4438     /*
4439     * From tcp_input.c
4440     */
4441     @@ -1638,8 +1653,10 @@ process:
4442    
4443     nf_reset(skb);
4444    
4445     - if (sk_filter(sk, skb))
4446     + if (tcp_filter(sk, skb))
4447     goto discard_and_relse;
4448     + th = (const struct tcphdr *)skb->data;
4449     + iph = ip_hdr(skb);
4450    
4451     skb->dev = NULL;
4452    
4453     diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
4454     index fbd521fdae53..5f581616bf6a 100644
4455     --- a/net/ipv6/tcp_ipv6.c
4456     +++ b/net/ipv6/tcp_ipv6.c
4457     @@ -1214,7 +1214,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
4458     if (skb->protocol == htons(ETH_P_IP))
4459     return tcp_v4_do_rcv(sk, skb);
4460    
4461     - if (sk_filter(sk, skb))
4462     + if (tcp_filter(sk, skb))
4463     goto discard;
4464    
4465     /*
4466     @@ -1438,8 +1438,10 @@ process:
4467     if (tcp_v6_inbound_md5_hash(sk, skb))
4468     goto discard_and_relse;
4469    
4470     - if (sk_filter(sk, skb))
4471     + if (tcp_filter(sk, skb))
4472     goto discard_and_relse;
4473     + th = (const struct tcphdr *)skb->data;
4474     + hdr = ipv6_hdr(skb);
4475    
4476     skb->dev = NULL;
4477    
4478     diff --git a/net/sctp/socket.c b/net/sctp/socket.c
4479     index 402817be3873..b5fd4ab56156 100644
4480     --- a/net/sctp/socket.c
4481     +++ b/net/sctp/socket.c
4482     @@ -1212,9 +1212,12 @@ static int __sctp_connect(struct sock *sk,
4483    
4484     timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK);
4485    
4486     - err = sctp_wait_for_connect(asoc, &timeo);
4487     - if ((err == 0 || err == -EINPROGRESS) && assoc_id)
4488     + if (assoc_id)
4489     *assoc_id = asoc->assoc_id;
4490     + err = sctp_wait_for_connect(asoc, &timeo);
4491     + /* Note: the asoc may be freed after the return of
4492     + * sctp_wait_for_connect.
4493     + */
4494    
4495     /* Don't free association on exit. */
4496     asoc = NULL;
4497     diff --git a/net/socket.c b/net/socket.c
4498     index 263b334ec5e4..0090225eeb1e 100644
4499     --- a/net/socket.c
4500     +++ b/net/socket.c
4501     @@ -2041,6 +2041,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
4502     if (err)
4503     break;
4504     ++datagrams;
4505     + if (msg_data_left(&msg_sys))
4506     + break;
4507     }
4508    
4509     fput_light(sock->file, fput_needed);