Magellan Linux

Contents of /trunk/kernel-alx/patches-4.4/0133-4.4.34-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2869 - (show annotations) (download)
Mon Mar 27 13:49:12 2017 UTC (7 years, 1 month ago) by niro
File size: 133458 byte(s)
linux-4.4.34
1 diff --git a/Makefile b/Makefile
2 index a513c045c8de..30924aabf1b4 100644
3 --- a/Makefile
4 +++ b/Makefile
5 @@ -1,6 +1,6 @@
6 VERSION = 4
7 PATCHLEVEL = 4
8 -SUBLEVEL = 33
9 +SUBLEVEL = 34
10 EXTRAVERSION =
11 NAME = Blurry Fish Butt
12
13 diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h
14 index 70067ce184b1..f7de0dbc38af 100644
15 --- a/arch/sparc/include/asm/mmu_64.h
16 +++ b/arch/sparc/include/asm/mmu_64.h
17 @@ -92,7 +92,8 @@ struct tsb_config {
18 typedef struct {
19 spinlock_t lock;
20 unsigned long sparc64_ctx_val;
21 - unsigned long huge_pte_count;
22 + unsigned long hugetlb_pte_count;
23 + unsigned long thp_pte_count;
24 struct tsb_config tsb_block[MM_NUM_TSBS];
25 struct hv_tsb_descr tsb_descr[MM_NUM_TSBS];
26 } mm_context_t;
27 diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
28 index ea6e9a20f3ff..f428512481f9 100644
29 --- a/arch/sparc/include/asm/uaccess_64.h
30 +++ b/arch/sparc/include/asm/uaccess_64.h
31 @@ -98,7 +98,6 @@ struct exception_table_entry {
32 unsigned int insn, fixup;
33 };
34
35 -void __ret_efault(void);
36 void __retl_efault(void);
37
38 /* Uh, these should become the main single-value transfer routines..
39 @@ -179,20 +178,6 @@ int __put_user_bad(void);
40 __gu_ret; \
41 })
42
43 -#define __get_user_nocheck_ret(data, addr, size, type, retval) ({ \
44 - register unsigned long __gu_val __asm__ ("l1"); \
45 - switch (size) { \
46 - case 1: __get_user_asm_ret(__gu_val, ub, addr, retval); break; \
47 - case 2: __get_user_asm_ret(__gu_val, uh, addr, retval); break; \
48 - case 4: __get_user_asm_ret(__gu_val, uw, addr, retval); break; \
49 - case 8: __get_user_asm_ret(__gu_val, x, addr, retval); break; \
50 - default: \
51 - if (__get_user_bad()) \
52 - return retval; \
53 - } \
54 - data = (__force type) __gu_val; \
55 -})
56 -
57 #define __get_user_asm(x, size, addr, ret) \
58 __asm__ __volatile__( \
59 "/* Get user asm, inline. */\n" \
60 @@ -214,80 +199,35 @@ __asm__ __volatile__( \
61 : "=r" (ret), "=r" (x) : "r" (__m(addr)), \
62 "i" (-EFAULT))
63
64 -#define __get_user_asm_ret(x, size, addr, retval) \
65 -if (__builtin_constant_p(retval) && retval == -EFAULT) \
66 - __asm__ __volatile__( \
67 - "/* Get user asm ret, inline. */\n" \
68 - "1:\t" "ld"#size "a [%1] %%asi, %0\n\n\t" \
69 - ".section __ex_table,\"a\"\n\t" \
70 - ".align 4\n\t" \
71 - ".word 1b,__ret_efault\n\n\t" \
72 - ".previous\n\t" \
73 - : "=r" (x) : "r" (__m(addr))); \
74 -else \
75 - __asm__ __volatile__( \
76 - "/* Get user asm ret, inline. */\n" \
77 - "1:\t" "ld"#size "a [%1] %%asi, %0\n\n\t" \
78 - ".section .fixup,#alloc,#execinstr\n\t" \
79 - ".align 4\n" \
80 - "3:\n\t" \
81 - "ret\n\t" \
82 - " restore %%g0, %2, %%o0\n\n\t" \
83 - ".previous\n\t" \
84 - ".section __ex_table,\"a\"\n\t" \
85 - ".align 4\n\t" \
86 - ".word 1b, 3b\n\n\t" \
87 - ".previous\n\t" \
88 - : "=r" (x) : "r" (__m(addr)), "i" (retval))
89 -
90 int __get_user_bad(void);
91
92 unsigned long __must_check ___copy_from_user(void *to,
93 const void __user *from,
94 unsigned long size);
95 -unsigned long copy_from_user_fixup(void *to, const void __user *from,
96 - unsigned long size);
97 static inline unsigned long __must_check
98 copy_from_user(void *to, const void __user *from, unsigned long size)
99 {
100 - unsigned long ret = ___copy_from_user(to, from, size);
101 -
102 - if (unlikely(ret))
103 - ret = copy_from_user_fixup(to, from, size);
104 -
105 - return ret;
106 + return ___copy_from_user(to, from, size);
107 }
108 #define __copy_from_user copy_from_user
109
110 unsigned long __must_check ___copy_to_user(void __user *to,
111 const void *from,
112 unsigned long size);
113 -unsigned long copy_to_user_fixup(void __user *to, const void *from,
114 - unsigned long size);
115 static inline unsigned long __must_check
116 copy_to_user(void __user *to, const void *from, unsigned long size)
117 {
118 - unsigned long ret = ___copy_to_user(to, from, size);
119 -
120 - if (unlikely(ret))
121 - ret = copy_to_user_fixup(to, from, size);
122 - return ret;
123 + return ___copy_to_user(to, from, size);
124 }
125 #define __copy_to_user copy_to_user
126
127 unsigned long __must_check ___copy_in_user(void __user *to,
128 const void __user *from,
129 unsigned long size);
130 -unsigned long copy_in_user_fixup(void __user *to, void __user *from,
131 - unsigned long size);
132 static inline unsigned long __must_check
133 copy_in_user(void __user *to, void __user *from, unsigned long size)
134 {
135 - unsigned long ret = ___copy_in_user(to, from, size);
136 -
137 - if (unlikely(ret))
138 - ret = copy_in_user_fixup(to, from, size);
139 - return ret;
140 + return ___copy_in_user(to, from, size);
141 }
142 #define __copy_in_user copy_in_user
143
144 diff --git a/arch/sparc/kernel/dtlb_prot.S b/arch/sparc/kernel/dtlb_prot.S
145 index d668ca149e64..4087a62f96b0 100644
146 --- a/arch/sparc/kernel/dtlb_prot.S
147 +++ b/arch/sparc/kernel/dtlb_prot.S
148 @@ -25,13 +25,13 @@
149
150 /* PROT ** ICACHE line 2: More real fault processing */
151 ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5
152 + srlx %g5, PAGE_SHIFT, %g5
153 + sllx %g5, PAGE_SHIFT, %g5 ! Clear context ID bits
154 bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup
155 mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4
156 ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault
157 nop
158 nop
159 - nop
160 - nop
161
162 /* PROT ** ICACHE line 3: Unused... */
163 nop
164 diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
165 index 51faf92ace00..7eeeb1d5a410 100644
166 --- a/arch/sparc/kernel/head_64.S
167 +++ b/arch/sparc/kernel/head_64.S
168 @@ -922,47 +922,11 @@ prom_tba: .xword 0
169 tlb_type: .word 0 /* Must NOT end up in BSS */
170 .section ".fixup",#alloc,#execinstr
171
172 - .globl __ret_efault, __retl_efault, __ret_one, __retl_one
173 -ENTRY(__ret_efault)
174 - ret
175 - restore %g0, -EFAULT, %o0
176 -ENDPROC(__ret_efault)
177 -
178 ENTRY(__retl_efault)
179 retl
180 mov -EFAULT, %o0
181 ENDPROC(__retl_efault)
182
183 -ENTRY(__retl_one)
184 - retl
185 - mov 1, %o0
186 -ENDPROC(__retl_one)
187 -
188 -ENTRY(__retl_one_fp)
189 - VISExitHalf
190 - retl
191 - mov 1, %o0
192 -ENDPROC(__retl_one_fp)
193 -
194 -ENTRY(__ret_one_asi)
195 - wr %g0, ASI_AIUS, %asi
196 - ret
197 - restore %g0, 1, %o0
198 -ENDPROC(__ret_one_asi)
199 -
200 -ENTRY(__retl_one_asi)
201 - wr %g0, ASI_AIUS, %asi
202 - retl
203 - mov 1, %o0
204 -ENDPROC(__retl_one_asi)
205 -
206 -ENTRY(__retl_one_asi_fp)
207 - wr %g0, ASI_AIUS, %asi
208 - VISExitHalf
209 - retl
210 - mov 1, %o0
211 -ENDPROC(__retl_one_asi_fp)
212 -
213 ENTRY(__retl_o1)
214 retl
215 mov %o1, %o0
216 diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
217 index 59bbeff55024..07933b9e9ce0 100644
218 --- a/arch/sparc/kernel/jump_label.c
219 +++ b/arch/sparc/kernel/jump_label.c
220 @@ -13,19 +13,30 @@
221 void arch_jump_label_transform(struct jump_entry *entry,
222 enum jump_label_type type)
223 {
224 - u32 val;
225 u32 *insn = (u32 *) (unsigned long) entry->code;
226 + u32 val;
227
228 if (type == JUMP_LABEL_JMP) {
229 s32 off = (s32)entry->target - (s32)entry->code;
230 + bool use_v9_branch = false;
231 +
232 + BUG_ON(off & 3);
233
234 #ifdef CONFIG_SPARC64
235 - /* ba,pt %xcc, . + (off << 2) */
236 - val = 0x10680000 | ((u32) off >> 2);
237 -#else
238 - /* ba . + (off << 2) */
239 - val = 0x10800000 | ((u32) off >> 2);
240 + if (off <= 0xfffff && off >= -0x100000)
241 + use_v9_branch = true;
242 #endif
243 + if (use_v9_branch) {
244 + /* WDISP19 - target is . + immed << 2 */
245 + /* ba,pt %xcc, . + off */
246 + val = 0x10680000 | (((u32) off >> 2) & 0x7ffff);
247 + } else {
248 + /* WDISP22 - target is . + immed << 2 */
249 + BUG_ON(off > 0x7fffff);
250 + BUG_ON(off < -0x800000);
251 + /* ba . + off */
252 + val = 0x10800000 | (((u32) off >> 2) & 0x3fffff);
253 + }
254 } else {
255 val = 0x01000000;
256 }
257 diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S
258 index ef0d8e9e1210..f22bec0db645 100644
259 --- a/arch/sparc/kernel/ktlb.S
260 +++ b/arch/sparc/kernel/ktlb.S
261 @@ -20,6 +20,10 @@ kvmap_itlb:
262 mov TLB_TAG_ACCESS, %g4
263 ldxa [%g4] ASI_IMMU, %g4
264
265 + /* The kernel executes in context zero, therefore we do not
266 + * need to clear the context ID bits out of %g4 here.
267 + */
268 +
269 /* sun4v_itlb_miss branches here with the missing virtual
270 * address already loaded into %g4
271 */
272 @@ -128,6 +132,10 @@ kvmap_dtlb:
273 mov TLB_TAG_ACCESS, %g4
274 ldxa [%g4] ASI_DMMU, %g4
275
276 + /* The kernel executes in context zero, therefore we do not
277 + * need to clear the context ID bits out of %g4 here.
278 + */
279 +
280 /* sun4v_dtlb_miss branches here with the missing virtual
281 * address already loaded into %g4
282 */
283 @@ -251,6 +259,10 @@ kvmap_dtlb_longpath:
284 nop
285 .previous
286
287 + /* The kernel executes in context zero, therefore we do not
288 + * need to clear the context ID bits out of %g5 here.
289 + */
290 +
291 be,pt %xcc, sparc64_realfault_common
292 mov FAULT_CODE_DTLB, %g4
293 ba,pt %xcc, winfix_trampoline
294 diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c
295 index a92d5d2c46a3..51b25325a961 100644
296 --- a/arch/sparc/kernel/sparc_ksyms_64.c
297 +++ b/arch/sparc/kernel/sparc_ksyms_64.c
298 @@ -27,7 +27,6 @@ EXPORT_SYMBOL(__flushw_user);
299 EXPORT_SYMBOL_GPL(real_hard_smp_processor_id);
300
301 /* from head_64.S */
302 -EXPORT_SYMBOL(__ret_efault);
303 EXPORT_SYMBOL(tlb_type);
304 EXPORT_SYMBOL(sun4v_chip_type);
305 EXPORT_SYMBOL(prom_root_node);
306 diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
307 index be98685c14c6..d568c8207af7 100644
308 --- a/arch/sparc/kernel/tsb.S
309 +++ b/arch/sparc/kernel/tsb.S
310 @@ -29,13 +29,17 @@
311 */
312 tsb_miss_dtlb:
313 mov TLB_TAG_ACCESS, %g4
314 + ldxa [%g4] ASI_DMMU, %g4
315 + srlx %g4, PAGE_SHIFT, %g4
316 ba,pt %xcc, tsb_miss_page_table_walk
317 - ldxa [%g4] ASI_DMMU, %g4
318 + sllx %g4, PAGE_SHIFT, %g4
319
320 tsb_miss_itlb:
321 mov TLB_TAG_ACCESS, %g4
322 + ldxa [%g4] ASI_IMMU, %g4
323 + srlx %g4, PAGE_SHIFT, %g4
324 ba,pt %xcc, tsb_miss_page_table_walk
325 - ldxa [%g4] ASI_IMMU, %g4
326 + sllx %g4, PAGE_SHIFT, %g4
327
328 /* At this point we have:
329 * %g1 -- PAGE_SIZE TSB entry address
330 @@ -284,6 +288,10 @@ tsb_do_dtlb_fault:
331 nop
332 .previous
333
334 + /* Clear context ID bits. */
335 + srlx %g5, PAGE_SHIFT, %g5
336 + sllx %g5, PAGE_SHIFT, %g5
337 +
338 be,pt %xcc, sparc64_realfault_common
339 mov FAULT_CODE_DTLB, %g4
340 ba,pt %xcc, winfix_trampoline
341 diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S
342 index b7d0bd6b1406..69a439fa2fc1 100644
343 --- a/arch/sparc/lib/GENcopy_from_user.S
344 +++ b/arch/sparc/lib/GENcopy_from_user.S
345 @@ -3,11 +3,11 @@
346 * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
347 */
348
349 -#define EX_LD(x) \
350 +#define EX_LD(x,y) \
351 98: x; \
352 .section __ex_table,"a";\
353 .align 4; \
354 - .word 98b, __retl_one; \
355 + .word 98b, y; \
356 .text; \
357 .align 4;
358
359 diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S
360 index 780550e1afc7..9947427ce354 100644
361 --- a/arch/sparc/lib/GENcopy_to_user.S
362 +++ b/arch/sparc/lib/GENcopy_to_user.S
363 @@ -3,11 +3,11 @@
364 * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
365 */
366
367 -#define EX_ST(x) \
368 +#define EX_ST(x,y) \
369 98: x; \
370 .section __ex_table,"a";\
371 .align 4; \
372 - .word 98b, __retl_one; \
373 + .word 98b, y; \
374 .text; \
375 .align 4;
376
377 diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S
378 index 89358ee94851..059ea24ad73d 100644
379 --- a/arch/sparc/lib/GENmemcpy.S
380 +++ b/arch/sparc/lib/GENmemcpy.S
381 @@ -4,21 +4,18 @@
382 */
383
384 #ifdef __KERNEL__
385 +#include <linux/linkage.h>
386 #define GLOBAL_SPARE %g7
387 #else
388 #define GLOBAL_SPARE %g5
389 #endif
390
391 #ifndef EX_LD
392 -#define EX_LD(x) x
393 +#define EX_LD(x,y) x
394 #endif
395
396 #ifndef EX_ST
397 -#define EX_ST(x) x
398 -#endif
399 -
400 -#ifndef EX_RETVAL
401 -#define EX_RETVAL(x) x
402 +#define EX_ST(x,y) x
403 #endif
404
405 #ifndef LOAD
406 @@ -45,6 +42,29 @@
407 .register %g3,#scratch
408
409 .text
410 +
411 +#ifndef EX_RETVAL
412 +#define EX_RETVAL(x) x
413 +ENTRY(GEN_retl_o4_1)
414 + add %o4, %o2, %o4
415 + retl
416 + add %o4, 1, %o0
417 +ENDPROC(GEN_retl_o4_1)
418 +ENTRY(GEN_retl_g1_8)
419 + add %g1, %o2, %g1
420 + retl
421 + add %g1, 8, %o0
422 +ENDPROC(GEN_retl_g1_8)
423 +ENTRY(GEN_retl_o2_4)
424 + retl
425 + add %o2, 4, %o0
426 +ENDPROC(GEN_retl_o2_4)
427 +ENTRY(GEN_retl_o2_1)
428 + retl
429 + add %o2, 1, %o0
430 +ENDPROC(GEN_retl_o2_1)
431 +#endif
432 +
433 .align 64
434
435 .globl FUNC_NAME
436 @@ -73,8 +93,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
437 sub %g0, %o4, %o4
438 sub %o2, %o4, %o2
439 1: subcc %o4, 1, %o4
440 - EX_LD(LOAD(ldub, %o1, %g1))
441 - EX_ST(STORE(stb, %g1, %o0))
442 + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1)
443 + EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1)
444 add %o1, 1, %o1
445 bne,pt %XCC, 1b
446 add %o0, 1, %o0
447 @@ -82,8 +102,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
448 andn %o2, 0x7, %g1
449 sub %o2, %g1, %o2
450 1: subcc %g1, 0x8, %g1
451 - EX_LD(LOAD(ldx, %o1, %g2))
452 - EX_ST(STORE(stx, %g2, %o0))
453 + EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8)
454 + EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8)
455 add %o1, 0x8, %o1
456 bne,pt %XCC, 1b
457 add %o0, 0x8, %o0
458 @@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
459
460 1:
461 subcc %o2, 4, %o2
462 - EX_LD(LOAD(lduw, %o1, %g1))
463 - EX_ST(STORE(stw, %g1, %o1 + %o3))
464 + EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4)
465 + EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4)
466 bgu,pt %XCC, 1b
467 add %o1, 4, %o1
468
469 @@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
470 .align 32
471 90:
472 subcc %o2, 1, %o2
473 - EX_LD(LOAD(ldub, %o1, %g1))
474 - EX_ST(STORE(stb, %g1, %o1 + %o3))
475 + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1)
476 + EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1)
477 bgu,pt %XCC, 90b
478 add %o1, 1, %o1
479 retl
480 diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
481 index 3269b0234093..4f2384a4286a 100644
482 --- a/arch/sparc/lib/Makefile
483 +++ b/arch/sparc/lib/Makefile
484 @@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o
485 lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
486 lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
487
488 -lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o
489 +lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o
490 lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
491
492 obj-$(CONFIG_SPARC64) += iomap.o
493 diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S
494 index d5242b8c4f94..b79a6998d87c 100644
495 --- a/arch/sparc/lib/NG2copy_from_user.S
496 +++ b/arch/sparc/lib/NG2copy_from_user.S
497 @@ -3,19 +3,19 @@
498 * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
499 */
500
501 -#define EX_LD(x) \
502 +#define EX_LD(x,y) \
503 98: x; \
504 .section __ex_table,"a";\
505 .align 4; \
506 - .word 98b, __retl_one_asi;\
507 + .word 98b, y; \
508 .text; \
509 .align 4;
510
511 -#define EX_LD_FP(x) \
512 +#define EX_LD_FP(x,y) \
513 98: x; \
514 .section __ex_table,"a";\
515 .align 4; \
516 - .word 98b, __retl_one_asi_fp;\
517 + .word 98b, y##_fp; \
518 .text; \
519 .align 4;
520
521 diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S
522 index 4e962d993b10..dcec55f254ab 100644
523 --- a/arch/sparc/lib/NG2copy_to_user.S
524 +++ b/arch/sparc/lib/NG2copy_to_user.S
525 @@ -3,19 +3,19 @@
526 * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
527 */
528
529 -#define EX_ST(x) \
530 +#define EX_ST(x,y) \
531 98: x; \
532 .section __ex_table,"a";\
533 .align 4; \
534 - .word 98b, __retl_one_asi;\
535 + .word 98b, y; \
536 .text; \
537 .align 4;
538
539 -#define EX_ST_FP(x) \
540 +#define EX_ST_FP(x,y) \
541 98: x; \
542 .section __ex_table,"a";\
543 .align 4; \
544 - .word 98b, __retl_one_asi_fp;\
545 + .word 98b, y##_fp; \
546 .text; \
547 .align 4;
548
549 diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S
550 index d5f585df2f3f..c629dbd121b6 100644
551 --- a/arch/sparc/lib/NG2memcpy.S
552 +++ b/arch/sparc/lib/NG2memcpy.S
553 @@ -4,6 +4,7 @@
554 */
555
556 #ifdef __KERNEL__
557 +#include <linux/linkage.h>
558 #include <asm/visasm.h>
559 #include <asm/asi.h>
560 #define GLOBAL_SPARE %g7
561 @@ -32,21 +33,17 @@
562 #endif
563
564 #ifndef EX_LD
565 -#define EX_LD(x) x
566 +#define EX_LD(x,y) x
567 #endif
568 #ifndef EX_LD_FP
569 -#define EX_LD_FP(x) x
570 +#define EX_LD_FP(x,y) x
571 #endif
572
573 #ifndef EX_ST
574 -#define EX_ST(x) x
575 +#define EX_ST(x,y) x
576 #endif
577 #ifndef EX_ST_FP
578 -#define EX_ST_FP(x) x
579 -#endif
580 -
581 -#ifndef EX_RETVAL
582 -#define EX_RETVAL(x) x
583 +#define EX_ST_FP(x,y) x
584 #endif
585
586 #ifndef LOAD
587 @@ -140,45 +137,110 @@
588 fsrc2 %x6, %f12; \
589 fsrc2 %x7, %f14;
590 #define FREG_LOAD_1(base, x0) \
591 - EX_LD_FP(LOAD(ldd, base + 0x00, %x0))
592 + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1)
593 #define FREG_LOAD_2(base, x0, x1) \
594 - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
595 - EX_LD_FP(LOAD(ldd, base + 0x08, %x1));
596 + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
597 + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1);
598 #define FREG_LOAD_3(base, x0, x1, x2) \
599 - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
600 - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
601 - EX_LD_FP(LOAD(ldd, base + 0x10, %x2));
602 + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
603 + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
604 + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1);
605 #define FREG_LOAD_4(base, x0, x1, x2, x3) \
606 - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
607 - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
608 - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
609 - EX_LD_FP(LOAD(ldd, base + 0x18, %x3));
610 + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
611 + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
612 + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
613 + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1);
614 #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
615 - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
616 - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
617 - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
618 - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
619 - EX_LD_FP(LOAD(ldd, base + 0x20, %x4));
620 + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
621 + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
622 + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
623 + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
624 + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1);
625 #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
626 - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
627 - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
628 - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
629 - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
630 - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
631 - EX_LD_FP(LOAD(ldd, base + 0x28, %x5));
632 + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
633 + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
634 + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
635 + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
636 + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
637 + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1);
638 #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
639 - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
640 - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
641 - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
642 - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
643 - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
644 - EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \
645 - EX_LD_FP(LOAD(ldd, base + 0x30, %x6));
646 + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
647 + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
648 + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
649 + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
650 + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
651 + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \
652 + EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1);
653
654 .register %g2,#scratch
655 .register %g3,#scratch
656
657 .text
658 +#ifndef EX_RETVAL
659 +#define EX_RETVAL(x) x
660 +__restore_fp:
661 + VISExitHalf
662 +__restore_asi:
663 + retl
664 + wr %g0, ASI_AIUS, %asi
665 +ENTRY(NG2_retl_o2)
666 + ba,pt %xcc, __restore_asi
667 + mov %o2, %o0
668 +ENDPROC(NG2_retl_o2)
669 +ENTRY(NG2_retl_o2_plus_1)
670 + ba,pt %xcc, __restore_asi
671 + add %o2, 1, %o0
672 +ENDPROC(NG2_retl_o2_plus_1)
673 +ENTRY(NG2_retl_o2_plus_4)
674 + ba,pt %xcc, __restore_asi
675 + add %o2, 4, %o0
676 +ENDPROC(NG2_retl_o2_plus_4)
677 +ENTRY(NG2_retl_o2_plus_8)
678 + ba,pt %xcc, __restore_asi
679 + add %o2, 8, %o0
680 +ENDPROC(NG2_retl_o2_plus_8)
681 +ENTRY(NG2_retl_o2_plus_o4_plus_1)
682 + add %o4, 1, %o4
683 + ba,pt %xcc, __restore_asi
684 + add %o2, %o4, %o0
685 +ENDPROC(NG2_retl_o2_plus_o4_plus_1)
686 +ENTRY(NG2_retl_o2_plus_o4_plus_8)
687 + add %o4, 8, %o4
688 + ba,pt %xcc, __restore_asi
689 + add %o2, %o4, %o0
690 +ENDPROC(NG2_retl_o2_plus_o4_plus_8)
691 +ENTRY(NG2_retl_o2_plus_o4_plus_16)
692 + add %o4, 16, %o4
693 + ba,pt %xcc, __restore_asi
694 + add %o2, %o4, %o0
695 +ENDPROC(NG2_retl_o2_plus_o4_plus_16)
696 +ENTRY(NG2_retl_o2_plus_g1_fp)
697 + ba,pt %xcc, __restore_fp
698 + add %o2, %g1, %o0
699 +ENDPROC(NG2_retl_o2_plus_g1_fp)
700 +ENTRY(NG2_retl_o2_plus_g1_plus_64_fp)
701 + add %g1, 64, %g1
702 + ba,pt %xcc, __restore_fp
703 + add %o2, %g1, %o0
704 +ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp)
705 +ENTRY(NG2_retl_o2_plus_g1_plus_1)
706 + add %g1, 1, %g1
707 + ba,pt %xcc, __restore_asi
708 + add %o2, %g1, %o0
709 +ENDPROC(NG2_retl_o2_plus_g1_plus_1)
710 +ENTRY(NG2_retl_o2_and_7_plus_o4)
711 + and %o2, 7, %o2
712 + ba,pt %xcc, __restore_asi
713 + add %o2, %o4, %o0
714 +ENDPROC(NG2_retl_o2_and_7_plus_o4)
715 +ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8)
716 + and %o2, 7, %o2
717 + add %o4, 8, %o4
718 + ba,pt %xcc, __restore_asi
719 + add %o2, %o4, %o0
720 +ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8)
721 +#endif
722 +
723 .align 64
724
725 .globl FUNC_NAME
726 @@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
727 sub %g0, %o4, %o4 ! bytes to align dst
728 sub %o2, %o4, %o2
729 1: subcc %o4, 1, %o4
730 - EX_LD(LOAD(ldub, %o1, %g1))
731 - EX_ST(STORE(stb, %g1, %o0))
732 + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1)
733 + EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1)
734 add %o1, 1, %o1
735 bne,pt %XCC, 1b
736 add %o0, 1, %o0
737 @@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
738 nop
739 /* fall through for 0 < low bits < 8 */
740 110: sub %o4, 64, %g2
741 - EX_LD_FP(LOAD_BLK(%g2, %f0))
742 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
743 - EX_LD_FP(LOAD_BLK(%o4, %f16))
744 + EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1)
745 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
746 + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
747 FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
748 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
749 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
750 FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
751 subcc %g1, 64, %g1
752 add %o4, 64, %o4
753 @@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
754
755 120: sub %o4, 56, %g2
756 FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
757 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
758 - EX_LD_FP(LOAD_BLK(%o4, %f16))
759 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
760 + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
761 FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
762 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
763 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
764 FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
765 subcc %g1, 64, %g1
766 add %o4, 64, %o4
767 @@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
768
769 130: sub %o4, 48, %g2
770 FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
771 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
772 - EX_LD_FP(LOAD_BLK(%o4, %f16))
773 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
774 + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
775 FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
776 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
777 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
778 FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
779 subcc %g1, 64, %g1
780 add %o4, 64, %o4
781 @@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
782
783 140: sub %o4, 40, %g2
784 FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
785 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
786 - EX_LD_FP(LOAD_BLK(%o4, %f16))
787 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
788 + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
789 FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
790 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
791 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
792 FREG_MOVE_5(f22, f24, f26, f28, f30)
793 subcc %g1, 64, %g1
794 add %o4, 64, %o4
795 @@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
796
797 150: sub %o4, 32, %g2
798 FREG_LOAD_4(%g2, f0, f2, f4, f6)
799 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
800 - EX_LD_FP(LOAD_BLK(%o4, %f16))
801 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
802 + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
803 FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
804 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
805 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
806 FREG_MOVE_4(f24, f26, f28, f30)
807 subcc %g1, 64, %g1
808 add %o4, 64, %o4
809 @@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
810
811 160: sub %o4, 24, %g2
812 FREG_LOAD_3(%g2, f0, f2, f4)
813 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
814 - EX_LD_FP(LOAD_BLK(%o4, %f16))
815 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
816 + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
817 FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
818 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
819 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
820 FREG_MOVE_3(f26, f28, f30)
821 subcc %g1, 64, %g1
822 add %o4, 64, %o4
823 @@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
824
825 170: sub %o4, 16, %g2
826 FREG_LOAD_2(%g2, f0, f2)
827 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
828 - EX_LD_FP(LOAD_BLK(%o4, %f16))
829 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
830 + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
831 FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
832 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
833 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
834 FREG_MOVE_2(f28, f30)
835 subcc %g1, 64, %g1
836 add %o4, 64, %o4
837 @@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
838
839 180: sub %o4, 8, %g2
840 FREG_LOAD_1(%g2, f0)
841 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
842 - EX_LD_FP(LOAD_BLK(%o4, %f16))
843 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
844 + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
845 FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
846 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
847 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
848 FREG_MOVE_1(f30)
849 subcc %g1, 64, %g1
850 add %o4, 64, %o4
851 @@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
852 nop
853
854 190:
855 -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
856 +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
857 subcc %g1, 64, %g1
858 - EX_LD_FP(LOAD_BLK(%o4, %f0))
859 - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
860 + EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64)
861 + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64)
862 add %o4, 64, %o4
863 bne,pt %xcc, 1b
864 LOAD(prefetch, %o4 + 64, #one_read)
865 @@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
866 andn %o2, 0xf, %o4
867 and %o2, 0xf, %o2
868 1: subcc %o4, 0x10, %o4
869 - EX_LD(LOAD(ldx, %o1, %o5))
870 + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16)
871 add %o1, 0x08, %o1
872 - EX_LD(LOAD(ldx, %o1, %g1))
873 + EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16)
874 sub %o1, 0x08, %o1
875 - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
876 + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16)
877 add %o1, 0x8, %o1
878 - EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE))
879 + EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8)
880 bgu,pt %XCC, 1b
881 add %o1, 0x8, %o1
882 73: andcc %o2, 0x8, %g0
883 be,pt %XCC, 1f
884 nop
885 sub %o2, 0x8, %o2
886 - EX_LD(LOAD(ldx, %o1, %o5))
887 - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
888 + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8)
889 + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8)
890 add %o1, 0x8, %o1
891 1: andcc %o2, 0x4, %g0
892 be,pt %XCC, 1f
893 nop
894 sub %o2, 0x4, %o2
895 - EX_LD(LOAD(lduw, %o1, %o5))
896 - EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE))
897 + EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4)
898 + EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
899 add %o1, 0x4, %o1
900 1: cmp %o2, 0
901 be,pt %XCC, 85f
902 @@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
903 sub %o2, %g1, %o2
904
905 1: subcc %g1, 1, %g1
906 - EX_LD(LOAD(ldub, %o1, %o5))
907 - EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE))
908 + EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1)
909 + EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1)
910 bgu,pt %icc, 1b
911 add %o1, 1, %o1
912
913 @@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
914
915 8: mov 64, GLOBAL_SPARE
916 andn %o1, 0x7, %o1
917 - EX_LD(LOAD(ldx, %o1, %g2))
918 + EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2)
919 sub GLOBAL_SPARE, %g1, GLOBAL_SPARE
920 andn %o2, 0x7, %o4
921 sllx %g2, %g1, %g2
922 1: add %o1, 0x8, %o1
923 - EX_LD(LOAD(ldx, %o1, %g3))
924 + EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4)
925 subcc %o4, 0x8, %o4
926 srlx %g3, GLOBAL_SPARE, %o5
927 or %o5, %g2, %o5
928 - EX_ST(STORE(stx, %o5, %o0))
929 + EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8)
930 add %o0, 0x8, %o0
931 bgu,pt %icc, 1b
932 sllx %g3, %g1, %g2
933 @@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
934
935 1:
936 subcc %o2, 4, %o2
937 - EX_LD(LOAD(lduw, %o1, %g1))
938 - EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE))
939 + EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4)
940 + EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
941 bgu,pt %XCC, 1b
942 add %o1, 4, %o1
943
944 @@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
945 .align 32
946 90:
947 subcc %o2, 1, %o2
948 - EX_LD(LOAD(ldub, %o1, %g1))
949 - EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE))
950 + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1)
951 + EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1)
952 bgu,pt %XCC, 90b
953 add %o1, 1, %o1
954 retl
955 diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S
956 index 2e8ee7ad07a9..16a286c1a528 100644
957 --- a/arch/sparc/lib/NG4copy_from_user.S
958 +++ b/arch/sparc/lib/NG4copy_from_user.S
959 @@ -3,19 +3,19 @@
960 * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
961 */
962
963 -#define EX_LD(x) \
964 +#define EX_LD(x, y) \
965 98: x; \
966 .section __ex_table,"a";\
967 .align 4; \
968 - .word 98b, __retl_one_asi;\
969 + .word 98b, y; \
970 .text; \
971 .align 4;
972
973 -#define EX_LD_FP(x) \
974 +#define EX_LD_FP(x,y) \
975 98: x; \
976 .section __ex_table,"a";\
977 .align 4; \
978 - .word 98b, __retl_one_asi_fp;\
979 + .word 98b, y##_fp; \
980 .text; \
981 .align 4;
982
983 diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S
984 index be0bf4590df8..6b0276ffc858 100644
985 --- a/arch/sparc/lib/NG4copy_to_user.S
986 +++ b/arch/sparc/lib/NG4copy_to_user.S
987 @@ -3,19 +3,19 @@
988 * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
989 */
990
991 -#define EX_ST(x) \
992 +#define EX_ST(x,y) \
993 98: x; \
994 .section __ex_table,"a";\
995 .align 4; \
996 - .word 98b, __retl_one_asi;\
997 + .word 98b, y; \
998 .text; \
999 .align 4;
1000
1001 -#define EX_ST_FP(x) \
1002 +#define EX_ST_FP(x,y) \
1003 98: x; \
1004 .section __ex_table,"a";\
1005 .align 4; \
1006 - .word 98b, __retl_one_asi_fp;\
1007 + .word 98b, y##_fp; \
1008 .text; \
1009 .align 4;
1010
1011 diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
1012 index 8e13ee1f4454..75bb93b1437f 100644
1013 --- a/arch/sparc/lib/NG4memcpy.S
1014 +++ b/arch/sparc/lib/NG4memcpy.S
1015 @@ -4,6 +4,7 @@
1016 */
1017
1018 #ifdef __KERNEL__
1019 +#include <linux/linkage.h>
1020 #include <asm/visasm.h>
1021 #include <asm/asi.h>
1022 #define GLOBAL_SPARE %g7
1023 @@ -46,22 +47,19 @@
1024 #endif
1025
1026 #ifndef EX_LD
1027 -#define EX_LD(x) x
1028 +#define EX_LD(x,y) x
1029 #endif
1030 #ifndef EX_LD_FP
1031 -#define EX_LD_FP(x) x
1032 +#define EX_LD_FP(x,y) x
1033 #endif
1034
1035 #ifndef EX_ST
1036 -#define EX_ST(x) x
1037 +#define EX_ST(x,y) x
1038 #endif
1039 #ifndef EX_ST_FP
1040 -#define EX_ST_FP(x) x
1041 +#define EX_ST_FP(x,y) x
1042 #endif
1043
1044 -#ifndef EX_RETVAL
1045 -#define EX_RETVAL(x) x
1046 -#endif
1047
1048 #ifndef LOAD
1049 #define LOAD(type,addr,dest) type [addr], dest
1050 @@ -94,6 +92,158 @@
1051 .register %g3,#scratch
1052
1053 .text
1054 +#ifndef EX_RETVAL
1055 +#define EX_RETVAL(x) x
1056 +__restore_asi_fp:
1057 + VISExitHalf
1058 +__restore_asi:
1059 + retl
1060 + wr %g0, ASI_AIUS, %asi
1061 +
1062 +ENTRY(NG4_retl_o2)
1063 + ba,pt %xcc, __restore_asi
1064 + mov %o2, %o0
1065 +ENDPROC(NG4_retl_o2)
1066 +ENTRY(NG4_retl_o2_plus_1)
1067 + ba,pt %xcc, __restore_asi
1068 + add %o2, 1, %o0
1069 +ENDPROC(NG4_retl_o2_plus_1)
1070 +ENTRY(NG4_retl_o2_plus_4)
1071 + ba,pt %xcc, __restore_asi
1072 + add %o2, 4, %o0
1073 +ENDPROC(NG4_retl_o2_plus_4)
1074 +ENTRY(NG4_retl_o2_plus_o5)
1075 + ba,pt %xcc, __restore_asi
1076 + add %o2, %o5, %o0
1077 +ENDPROC(NG4_retl_o2_plus_o5)
1078 +ENTRY(NG4_retl_o2_plus_o5_plus_4)
1079 + add %o5, 4, %o5
1080 + ba,pt %xcc, __restore_asi
1081 + add %o2, %o5, %o0
1082 +ENDPROC(NG4_retl_o2_plus_o5_plus_4)
1083 +ENTRY(NG4_retl_o2_plus_o5_plus_8)
1084 + add %o5, 8, %o5
1085 + ba,pt %xcc, __restore_asi
1086 + add %o2, %o5, %o0
1087 +ENDPROC(NG4_retl_o2_plus_o5_plus_8)
1088 +ENTRY(NG4_retl_o2_plus_o5_plus_16)
1089 + add %o5, 16, %o5
1090 + ba,pt %xcc, __restore_asi
1091 + add %o2, %o5, %o0
1092 +ENDPROC(NG4_retl_o2_plus_o5_plus_16)
1093 +ENTRY(NG4_retl_o2_plus_o5_plus_24)
1094 + add %o5, 24, %o5
1095 + ba,pt %xcc, __restore_asi
1096 + add %o2, %o5, %o0
1097 +ENDPROC(NG4_retl_o2_plus_o5_plus_24)
1098 +ENTRY(NG4_retl_o2_plus_o5_plus_32)
1099 + add %o5, 32, %o5
1100 + ba,pt %xcc, __restore_asi
1101 + add %o2, %o5, %o0
1102 +ENDPROC(NG4_retl_o2_plus_o5_plus_32)
1103 +ENTRY(NG4_retl_o2_plus_g1)
1104 + ba,pt %xcc, __restore_asi
1105 + add %o2, %g1, %o0
1106 +ENDPROC(NG4_retl_o2_plus_g1)
1107 +ENTRY(NG4_retl_o2_plus_g1_plus_1)
1108 + add %g1, 1, %g1
1109 + ba,pt %xcc, __restore_asi
1110 + add %o2, %g1, %o0
1111 +ENDPROC(NG4_retl_o2_plus_g1_plus_1)
1112 +ENTRY(NG4_retl_o2_plus_g1_plus_8)
1113 + add %g1, 8, %g1
1114 + ba,pt %xcc, __restore_asi
1115 + add %o2, %g1, %o0
1116 +ENDPROC(NG4_retl_o2_plus_g1_plus_8)
1117 +ENTRY(NG4_retl_o2_plus_o4)
1118 + ba,pt %xcc, __restore_asi
1119 + add %o2, %o4, %o0
1120 +ENDPROC(NG4_retl_o2_plus_o4)
1121 +ENTRY(NG4_retl_o2_plus_o4_plus_8)
1122 + add %o4, 8, %o4
1123 + ba,pt %xcc, __restore_asi
1124 + add %o2, %o4, %o0
1125 +ENDPROC(NG4_retl_o2_plus_o4_plus_8)
1126 +ENTRY(NG4_retl_o2_plus_o4_plus_16)
1127 + add %o4, 16, %o4
1128 + ba,pt %xcc, __restore_asi
1129 + add %o2, %o4, %o0
1130 +ENDPROC(NG4_retl_o2_plus_o4_plus_16)
1131 +ENTRY(NG4_retl_o2_plus_o4_plus_24)
1132 + add %o4, 24, %o4
1133 + ba,pt %xcc, __restore_asi
1134 + add %o2, %o4, %o0
1135 +ENDPROC(NG4_retl_o2_plus_o4_plus_24)
1136 +ENTRY(NG4_retl_o2_plus_o4_plus_32)
1137 + add %o4, 32, %o4
1138 + ba,pt %xcc, __restore_asi
1139 + add %o2, %o4, %o0
1140 +ENDPROC(NG4_retl_o2_plus_o4_plus_32)
1141 +ENTRY(NG4_retl_o2_plus_o4_plus_40)
1142 + add %o4, 40, %o4
1143 + ba,pt %xcc, __restore_asi
1144 + add %o2, %o4, %o0
1145 +ENDPROC(NG4_retl_o2_plus_o4_plus_40)
1146 +ENTRY(NG4_retl_o2_plus_o4_plus_48)
1147 + add %o4, 48, %o4
1148 + ba,pt %xcc, __restore_asi
1149 + add %o2, %o4, %o0
1150 +ENDPROC(NG4_retl_o2_plus_o4_plus_48)
1151 +ENTRY(NG4_retl_o2_plus_o4_plus_56)
1152 + add %o4, 56, %o4
1153 + ba,pt %xcc, __restore_asi
1154 + add %o2, %o4, %o0
1155 +ENDPROC(NG4_retl_o2_plus_o4_plus_56)
1156 +ENTRY(NG4_retl_o2_plus_o4_plus_64)
1157 + add %o4, 64, %o4
1158 + ba,pt %xcc, __restore_asi
1159 + add %o2, %o4, %o0
1160 +ENDPROC(NG4_retl_o2_plus_o4_plus_64)
1161 +ENTRY(NG4_retl_o2_plus_o4_fp)
1162 + ba,pt %xcc, __restore_asi_fp
1163 + add %o2, %o4, %o0
1164 +ENDPROC(NG4_retl_o2_plus_o4_fp)
1165 +ENTRY(NG4_retl_o2_plus_o4_plus_8_fp)
1166 + add %o4, 8, %o4
1167 + ba,pt %xcc, __restore_asi_fp
1168 + add %o2, %o4, %o0
1169 +ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp)
1170 +ENTRY(NG4_retl_o2_plus_o4_plus_16_fp)
1171 + add %o4, 16, %o4
1172 + ba,pt %xcc, __restore_asi_fp
1173 + add %o2, %o4, %o0
1174 +ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp)
1175 +ENTRY(NG4_retl_o2_plus_o4_plus_24_fp)
1176 + add %o4, 24, %o4
1177 + ba,pt %xcc, __restore_asi_fp
1178 + add %o2, %o4, %o0
1179 +ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp)
1180 +ENTRY(NG4_retl_o2_plus_o4_plus_32_fp)
1181 + add %o4, 32, %o4
1182 + ba,pt %xcc, __restore_asi_fp
1183 + add %o2, %o4, %o0
1184 +ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp)
1185 +ENTRY(NG4_retl_o2_plus_o4_plus_40_fp)
1186 + add %o4, 40, %o4
1187 + ba,pt %xcc, __restore_asi_fp
1188 + add %o2, %o4, %o0
1189 +ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp)
1190 +ENTRY(NG4_retl_o2_plus_o4_plus_48_fp)
1191 + add %o4, 48, %o4
1192 + ba,pt %xcc, __restore_asi_fp
1193 + add %o2, %o4, %o0
1194 +ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp)
1195 +ENTRY(NG4_retl_o2_plus_o4_plus_56_fp)
1196 + add %o4, 56, %o4
1197 + ba,pt %xcc, __restore_asi_fp
1198 + add %o2, %o4, %o0
1199 +ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp)
1200 +ENTRY(NG4_retl_o2_plus_o4_plus_64_fp)
1201 + add %o4, 64, %o4
1202 + ba,pt %xcc, __restore_asi_fp
1203 + add %o2, %o4, %o0
1204 +ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp)
1205 +#endif
1206 .align 64
1207
1208 .globl FUNC_NAME
1209 @@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1210 brz,pt %g1, 51f
1211 sub %o2, %g1, %o2
1212
1213 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
1214 +
1215 +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
1216 add %o1, 1, %o1
1217 subcc %g1, 1, %g1
1218 add %o0, 1, %o0
1219 bne,pt %icc, 1b
1220 - EX_ST(STORE(stb, %g2, %o0 - 0x01))
1221 + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
1222
1223 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
1224 LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
1225 @@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1226 brz,pt %g1, .Llarge_aligned
1227 sub %o2, %g1, %o2
1228
1229 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2))
1230 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
1231 add %o1, 8, %o1
1232 subcc %g1, 8, %g1
1233 add %o0, 8, %o0
1234 bne,pt %icc, 1b
1235 - EX_ST(STORE(stx, %g2, %o0 - 0x08))
1236 + EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8)
1237
1238 .Llarge_aligned:
1239 /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
1240 andn %o2, 0x3f, %o4
1241 sub %o2, %o4, %o2
1242
1243 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
1244 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4)
1245 add %o1, 0x40, %o1
1246 - EX_LD(LOAD(ldx, %o1 - 0x38, %g2))
1247 + EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4)
1248 subcc %o4, 0x40, %o4
1249 - EX_LD(LOAD(ldx, %o1 - 0x30, %g3))
1250 - EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE))
1251 - EX_LD(LOAD(ldx, %o1 - 0x20, %o5))
1252 - EX_ST(STORE_INIT(%g1, %o0))
1253 + EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64)
1254 + EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64)
1255 + EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64)
1256 + EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64)
1257 add %o0, 0x08, %o0
1258 - EX_ST(STORE_INIT(%g2, %o0))
1259 + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56)
1260 add %o0, 0x08, %o0
1261 - EX_LD(LOAD(ldx, %o1 - 0x18, %g2))
1262 - EX_ST(STORE_INIT(%g3, %o0))
1263 + EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48)
1264 + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48)
1265 add %o0, 0x08, %o0
1266 - EX_LD(LOAD(ldx, %o1 - 0x10, %g3))
1267 - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
1268 + EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40)
1269 + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40)
1270 add %o0, 0x08, %o0
1271 - EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE))
1272 - EX_ST(STORE_INIT(%o5, %o0))
1273 + EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32)
1274 + EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32)
1275 add %o0, 0x08, %o0
1276 - EX_ST(STORE_INIT(%g2, %o0))
1277 + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24)
1278 add %o0, 0x08, %o0
1279 - EX_ST(STORE_INIT(%g3, %o0))
1280 + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16)
1281 add %o0, 0x08, %o0
1282 - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
1283 + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8)
1284 add %o0, 0x08, %o0
1285 bne,pt %icc, 1b
1286 LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
1287 @@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1288 sub %o2, %o4, %o2
1289 alignaddr %o1, %g0, %g1
1290 add %o1, %o4, %o1
1291 - EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0))
1292 -1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2))
1293 + EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4)
1294 +1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4)
1295 subcc %o4, 0x40, %o4
1296 - EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4))
1297 - EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6))
1298 - EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8))
1299 - EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10))
1300 - EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12))
1301 - EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14))
1302 + EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64)
1303 + EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64)
1304 + EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64)
1305 + EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64)
1306 + EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64)
1307 + EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64)
1308 faligndata %f0, %f2, %f16
1309 - EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0))
1310 + EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64)
1311 faligndata %f2, %f4, %f18
1312 add %g1, 0x40, %g1
1313 faligndata %f4, %f6, %f20
1314 @@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1315 faligndata %f10, %f12, %f26
1316 faligndata %f12, %f14, %f28
1317 faligndata %f14, %f0, %f30
1318 - EX_ST_FP(STORE(std, %f16, %o0 + 0x00))
1319 - EX_ST_FP(STORE(std, %f18, %o0 + 0x08))
1320 - EX_ST_FP(STORE(std, %f20, %o0 + 0x10))
1321 - EX_ST_FP(STORE(std, %f22, %o0 + 0x18))
1322 - EX_ST_FP(STORE(std, %f24, %o0 + 0x20))
1323 - EX_ST_FP(STORE(std, %f26, %o0 + 0x28))
1324 - EX_ST_FP(STORE(std, %f28, %o0 + 0x30))
1325 - EX_ST_FP(STORE(std, %f30, %o0 + 0x38))
1326 + EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64)
1327 + EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56)
1328 + EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48)
1329 + EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40)
1330 + EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32)
1331 + EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24)
1332 + EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16)
1333 + EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8)
1334 add %o0, 0x40, %o0
1335 bne,pt %icc, 1b
1336 LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
1337 @@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1338 andncc %o2, 0x20 - 1, %o5
1339 be,pn %icc, 2f
1340 sub %o2, %o5, %o2
1341 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
1342 - EX_LD(LOAD(ldx, %o1 + 0x08, %g2))
1343 - EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE))
1344 - EX_LD(LOAD(ldx, %o1 + 0x18, %o4))
1345 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
1346 + EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5)
1347 + EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5)
1348 + EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5)
1349 add %o1, 0x20, %o1
1350 subcc %o5, 0x20, %o5
1351 - EX_ST(STORE(stx, %g1, %o0 + 0x00))
1352 - EX_ST(STORE(stx, %g2, %o0 + 0x08))
1353 - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10))
1354 - EX_ST(STORE(stx, %o4, %o0 + 0x18))
1355 + EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32)
1356 + EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24)
1357 + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24)
1358 + EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8)
1359 bne,pt %icc, 1b
1360 add %o0, 0x20, %o0
1361 2: andcc %o2, 0x18, %o5
1362 be,pt %icc, 3f
1363 sub %o2, %o5, %o2
1364 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
1365 +
1366 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
1367 add %o1, 0x08, %o1
1368 add %o0, 0x08, %o0
1369 subcc %o5, 0x08, %o5
1370 bne,pt %icc, 1b
1371 - EX_ST(STORE(stx, %g1, %o0 - 0x08))
1372 + EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8)
1373 3: brz,pt %o2, .Lexit
1374 cmp %o2, 0x04
1375 bl,pn %icc, .Ltiny
1376 nop
1377 - EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
1378 + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2)
1379 add %o1, 0x04, %o1
1380 add %o0, 0x04, %o0
1381 subcc %o2, 0x04, %o2
1382 bne,pn %icc, .Ltiny
1383 - EX_ST(STORE(stw, %g1, %o0 - 0x04))
1384 + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4)
1385 ba,a,pt %icc, .Lexit
1386 .Lmedium_unaligned:
1387 /* First get dest 8 byte aligned. */
1388 @@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1389 brz,pt %g1, 2f
1390 sub %o2, %g1, %o2
1391
1392 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
1393 +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
1394 add %o1, 1, %o1
1395 subcc %g1, 1, %g1
1396 add %o0, 1, %o0
1397 bne,pt %icc, 1b
1398 - EX_ST(STORE(stb, %g2, %o0 - 0x01))
1399 + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
1400 2:
1401 and %o1, 0x7, %g1
1402 brz,pn %g1, .Lmedium_noprefetch
1403 @@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1404 mov 64, %g2
1405 sub %g2, %g1, %g2
1406 andn %o1, 0x7, %o1
1407 - EX_LD(LOAD(ldx, %o1 + 0x00, %o4))
1408 + EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2)
1409 sllx %o4, %g1, %o4
1410 andn %o2, 0x08 - 1, %o5
1411 sub %o2, %o5, %o2
1412 -1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3))
1413 +1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5)
1414 add %o1, 0x08, %o1
1415 subcc %o5, 0x08, %o5
1416 srlx %g3, %g2, GLOBAL_SPARE
1417 or GLOBAL_SPARE, %o4, GLOBAL_SPARE
1418 - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00))
1419 + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8)
1420 add %o0, 0x08, %o0
1421 bne,pt %icc, 1b
1422 sllx %g3, %g1, %o4
1423 @@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1424 ba,pt %icc, .Lsmall_unaligned
1425
1426 .Ltiny:
1427 - EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
1428 + EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
1429 subcc %o2, 1, %o2
1430 be,pn %icc, .Lexit
1431 - EX_ST(STORE(stb, %g1, %o0 + 0x00))
1432 - EX_LD(LOAD(ldub, %o1 + 0x01, %g1))
1433 + EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1)
1434 + EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2)
1435 subcc %o2, 1, %o2
1436 be,pn %icc, .Lexit
1437 - EX_ST(STORE(stb, %g1, %o0 + 0x01))
1438 - EX_LD(LOAD(ldub, %o1 + 0x02, %g1))
1439 + EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1)
1440 + EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2)
1441 ba,pt %icc, .Lexit
1442 - EX_ST(STORE(stb, %g1, %o0 + 0x02))
1443 + EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2)
1444
1445 .Lsmall:
1446 andcc %g2, 0x3, %g0
1447 @@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1448 andn %o2, 0x4 - 1, %o5
1449 sub %o2, %o5, %o2
1450 1:
1451 - EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
1452 + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
1453 add %o1, 0x04, %o1
1454 subcc %o5, 0x04, %o5
1455 add %o0, 0x04, %o0
1456 bne,pt %icc, 1b
1457 - EX_ST(STORE(stw, %g1, %o0 - 0x04))
1458 + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4)
1459 brz,pt %o2, .Lexit
1460 nop
1461 ba,a,pt %icc, .Ltiny
1462
1463 .Lsmall_unaligned:
1464 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
1465 +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
1466 add %o1, 1, %o1
1467 add %o0, 1, %o0
1468 subcc %o2, 1, %o2
1469 bne,pt %icc, 1b
1470 - EX_ST(STORE(stb, %g1, %o0 - 0x01))
1471 + EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
1472 ba,a,pt %icc, .Lexit
1473 .size FUNC_NAME, .-FUNC_NAME
1474 diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S
1475 index 5d1e4d1ac21e..9cd42fcbc781 100644
1476 --- a/arch/sparc/lib/NGcopy_from_user.S
1477 +++ b/arch/sparc/lib/NGcopy_from_user.S
1478 @@ -3,11 +3,11 @@
1479 * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
1480 */
1481
1482 -#define EX_LD(x) \
1483 +#define EX_LD(x,y) \
1484 98: x; \
1485 .section __ex_table,"a";\
1486 .align 4; \
1487 - .word 98b, __ret_one_asi;\
1488 + .word 98b, y; \
1489 .text; \
1490 .align 4;
1491
1492 diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S
1493 index ff630dcb273c..5c358afd464e 100644
1494 --- a/arch/sparc/lib/NGcopy_to_user.S
1495 +++ b/arch/sparc/lib/NGcopy_to_user.S
1496 @@ -3,11 +3,11 @@
1497 * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
1498 */
1499
1500 -#define EX_ST(x) \
1501 +#define EX_ST(x,y) \
1502 98: x; \
1503 .section __ex_table,"a";\
1504 .align 4; \
1505 - .word 98b, __ret_one_asi;\
1506 + .word 98b, y; \
1507 .text; \
1508 .align 4;
1509
1510 diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S
1511 index 96a14caf6966..d88c4ed50a00 100644
1512 --- a/arch/sparc/lib/NGmemcpy.S
1513 +++ b/arch/sparc/lib/NGmemcpy.S
1514 @@ -4,6 +4,7 @@
1515 */
1516
1517 #ifdef __KERNEL__
1518 +#include <linux/linkage.h>
1519 #include <asm/asi.h>
1520 #include <asm/thread_info.h>
1521 #define GLOBAL_SPARE %g7
1522 @@ -27,15 +28,11 @@
1523 #endif
1524
1525 #ifndef EX_LD
1526 -#define EX_LD(x) x
1527 +#define EX_LD(x,y) x
1528 #endif
1529
1530 #ifndef EX_ST
1531 -#define EX_ST(x) x
1532 -#endif
1533 -
1534 -#ifndef EX_RETVAL
1535 -#define EX_RETVAL(x) x
1536 +#define EX_ST(x,y) x
1537 #endif
1538
1539 #ifndef LOAD
1540 @@ -79,6 +76,92 @@
1541 .register %g3,#scratch
1542
1543 .text
1544 +#ifndef EX_RETVAL
1545 +#define EX_RETVAL(x) x
1546 +__restore_asi:
1547 + ret
1548 + wr %g0, ASI_AIUS, %asi
1549 + restore
1550 +ENTRY(NG_ret_i2_plus_i4_plus_1)
1551 + ba,pt %xcc, __restore_asi
1552 + add %i2, %i5, %i0
1553 +ENDPROC(NG_ret_i2_plus_i4_plus_1)
1554 +ENTRY(NG_ret_i2_plus_g1)
1555 + ba,pt %xcc, __restore_asi
1556 + add %i2, %g1, %i0
1557 +ENDPROC(NG_ret_i2_plus_g1)
1558 +ENTRY(NG_ret_i2_plus_g1_minus_8)
1559 + sub %g1, 8, %g1
1560 + ba,pt %xcc, __restore_asi
1561 + add %i2, %g1, %i0
1562 +ENDPROC(NG_ret_i2_plus_g1_minus_8)
1563 +ENTRY(NG_ret_i2_plus_g1_minus_16)
1564 + sub %g1, 16, %g1
1565 + ba,pt %xcc, __restore_asi
1566 + add %i2, %g1, %i0
1567 +ENDPROC(NG_ret_i2_plus_g1_minus_16)
1568 +ENTRY(NG_ret_i2_plus_g1_minus_24)
1569 + sub %g1, 24, %g1
1570 + ba,pt %xcc, __restore_asi
1571 + add %i2, %g1, %i0
1572 +ENDPROC(NG_ret_i2_plus_g1_minus_24)
1573 +ENTRY(NG_ret_i2_plus_g1_minus_32)
1574 + sub %g1, 32, %g1
1575 + ba,pt %xcc, __restore_asi
1576 + add %i2, %g1, %i0
1577 +ENDPROC(NG_ret_i2_plus_g1_minus_32)
1578 +ENTRY(NG_ret_i2_plus_g1_minus_40)
1579 + sub %g1, 40, %g1
1580 + ba,pt %xcc, __restore_asi
1581 + add %i2, %g1, %i0
1582 +ENDPROC(NG_ret_i2_plus_g1_minus_40)
1583 +ENTRY(NG_ret_i2_plus_g1_minus_48)
1584 + sub %g1, 48, %g1
1585 + ba,pt %xcc, __restore_asi
1586 + add %i2, %g1, %i0
1587 +ENDPROC(NG_ret_i2_plus_g1_minus_48)
1588 +ENTRY(NG_ret_i2_plus_g1_minus_56)
1589 + sub %g1, 56, %g1
1590 + ba,pt %xcc, __restore_asi
1591 + add %i2, %g1, %i0
1592 +ENDPROC(NG_ret_i2_plus_g1_minus_56)
1593 +ENTRY(NG_ret_i2_plus_i4)
1594 + ba,pt %xcc, __restore_asi
1595 + add %i2, %i4, %i0
1596 +ENDPROC(NG_ret_i2_plus_i4)
1597 +ENTRY(NG_ret_i2_plus_i4_minus_8)
1598 + sub %i4, 8, %i4
1599 + ba,pt %xcc, __restore_asi
1600 + add %i2, %i4, %i0
1601 +ENDPROC(NG_ret_i2_plus_i4_minus_8)
1602 +ENTRY(NG_ret_i2_plus_8)
1603 + ba,pt %xcc, __restore_asi
1604 + add %i2, 8, %i0
1605 +ENDPROC(NG_ret_i2_plus_8)
1606 +ENTRY(NG_ret_i2_plus_4)
1607 + ba,pt %xcc, __restore_asi
1608 + add %i2, 4, %i0
1609 +ENDPROC(NG_ret_i2_plus_4)
1610 +ENTRY(NG_ret_i2_plus_1)
1611 + ba,pt %xcc, __restore_asi
1612 + add %i2, 1, %i0
1613 +ENDPROC(NG_ret_i2_plus_1)
1614 +ENTRY(NG_ret_i2_plus_g1_plus_1)
1615 + add %g1, 1, %g1
1616 + ba,pt %xcc, __restore_asi
1617 + add %i2, %g1, %i0
1618 +ENDPROC(NG_ret_i2_plus_g1_plus_1)
1619 +ENTRY(NG_ret_i2)
1620 + ba,pt %xcc, __restore_asi
1621 + mov %i2, %i0
1622 +ENDPROC(NG_ret_i2)
1623 +ENTRY(NG_ret_i2_and_7_plus_i4)
1624 + and %i2, 7, %i2
1625 + ba,pt %xcc, __restore_asi
1626 + add %i2, %i4, %i0
1627 +ENDPROC(NG_ret_i2_and_7_plus_i4)
1628 +#endif
1629 +
1630 .align 64
1631
1632 .globl FUNC_NAME
1633 @@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1634 sub %g0, %i4, %i4 ! bytes to align dst
1635 sub %i2, %i4, %i2
1636 1: subcc %i4, 1, %i4
1637 - EX_LD(LOAD(ldub, %i1, %g1))
1638 - EX_ST(STORE(stb, %g1, %o0))
1639 + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1)
1640 + EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1)
1641 add %i1, 1, %i1
1642 bne,pt %XCC, 1b
1643 add %o0, 1, %o0
1644 @@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1645 and %i4, 0x7, GLOBAL_SPARE
1646 sll GLOBAL_SPARE, 3, GLOBAL_SPARE
1647 mov 64, %i5
1648 - EX_LD(LOAD_TWIN(%i1, %g2, %g3))
1649 + EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1)
1650 sub %i5, GLOBAL_SPARE, %i5
1651 mov 16, %o4
1652 mov 32, %o5
1653 @@ -178,31 +261,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1654 srlx WORD3, PRE_SHIFT, TMP; \
1655 or WORD2, TMP, WORD2;
1656
1657 -8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
1658 +8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
1659 MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
1660 LOAD(prefetch, %i1 + %i3, #one_read)
1661
1662 - EX_ST(STORE_INIT(%g2, %o0 + 0x00))
1663 - EX_ST(STORE_INIT(%g3, %o0 + 0x08))
1664 + EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1)
1665 + EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
1666
1667 - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
1668 + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
1669 MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
1670
1671 - EX_ST(STORE_INIT(%o2, %o0 + 0x10))
1672 - EX_ST(STORE_INIT(%o3, %o0 + 0x18))
1673 + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
1674 + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
1675
1676 - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
1677 + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
1678 MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
1679
1680 - EX_ST(STORE_INIT(%g2, %o0 + 0x20))
1681 - EX_ST(STORE_INIT(%g3, %o0 + 0x28))
1682 + EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
1683 + EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
1684
1685 - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
1686 + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
1687 add %i1, 64, %i1
1688 MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
1689
1690 - EX_ST(STORE_INIT(%o2, %o0 + 0x30))
1691 - EX_ST(STORE_INIT(%o3, %o0 + 0x38))
1692 + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
1693 + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
1694
1695 subcc %g1, 64, %g1
1696 bne,pt %XCC, 8b
1697 @@ -211,31 +294,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1698 ba,pt %XCC, 60f
1699 add %i1, %i4, %i1
1700
1701 -9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
1702 +9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
1703 MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
1704 LOAD(prefetch, %i1 + %i3, #one_read)
1705
1706 - EX_ST(STORE_INIT(%g3, %o0 + 0x00))
1707 - EX_ST(STORE_INIT(%o2, %o0 + 0x08))
1708 + EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1)
1709 + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
1710
1711 - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
1712 + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
1713 MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
1714
1715 - EX_ST(STORE_INIT(%o3, %o0 + 0x10))
1716 - EX_ST(STORE_INIT(%g2, %o0 + 0x18))
1717 + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
1718 + EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
1719
1720 - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
1721 + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
1722 MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
1723
1724 - EX_ST(STORE_INIT(%g3, %o0 + 0x20))
1725 - EX_ST(STORE_INIT(%o2, %o0 + 0x28))
1726 + EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
1727 + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
1728
1729 - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
1730 + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
1731 add %i1, 64, %i1
1732 MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
1733
1734 - EX_ST(STORE_INIT(%o3, %o0 + 0x30))
1735 - EX_ST(STORE_INIT(%g2, %o0 + 0x38))
1736 + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
1737 + EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
1738
1739 subcc %g1, 64, %g1
1740 bne,pt %XCC, 9b
1741 @@ -249,25 +332,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1742 * one twin load ahead, then add 8 back into source when
1743 * we finish the loop.
1744 */
1745 - EX_LD(LOAD_TWIN(%i1, %o4, %o5))
1746 + EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1)
1747 mov 16, %o7
1748 mov 32, %g2
1749 mov 48, %g3
1750 mov 64, %o1
1751 -1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
1752 +1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
1753 LOAD(prefetch, %i1 + %o1, #one_read)
1754 - EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line
1755 - EX_ST(STORE_INIT(%o2, %o0 + 0x08))
1756 - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
1757 - EX_ST(STORE_INIT(%o3, %o0 + 0x10))
1758 - EX_ST(STORE_INIT(%o4, %o0 + 0x18))
1759 - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
1760 - EX_ST(STORE_INIT(%o5, %o0 + 0x20))
1761 - EX_ST(STORE_INIT(%o2, %o0 + 0x28))
1762 - EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5))
1763 + EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
1764 + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
1765 + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
1766 + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
1767 + EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
1768 + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
1769 + EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
1770 + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
1771 + EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48)
1772 add %i1, 64, %i1
1773 - EX_ST(STORE_INIT(%o3, %o0 + 0x30))
1774 - EX_ST(STORE_INIT(%o4, %o0 + 0x38))
1775 + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
1776 + EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
1777 subcc %g1, 64, %g1
1778 bne,pt %XCC, 1b
1779 add %o0, 64, %o0
1780 @@ -282,20 +365,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1781 mov 32, %g2
1782 mov 48, %g3
1783 mov 64, %o1
1784 -1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5))
1785 - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
1786 +1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1)
1787 + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
1788 LOAD(prefetch, %i1 + %o1, #one_read)
1789 - EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line
1790 - EX_ST(STORE_INIT(%o5, %o0 + 0x08))
1791 - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
1792 - EX_ST(STORE_INIT(%o2, %o0 + 0x10))
1793 - EX_ST(STORE_INIT(%o3, %o0 + 0x18))
1794 - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
1795 + EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
1796 + EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
1797 + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
1798 + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
1799 + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
1800 + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
1801 add %i1, 64, %i1
1802 - EX_ST(STORE_INIT(%o4, %o0 + 0x20))
1803 - EX_ST(STORE_INIT(%o5, %o0 + 0x28))
1804 - EX_ST(STORE_INIT(%o2, %o0 + 0x30))
1805 - EX_ST(STORE_INIT(%o3, %o0 + 0x38))
1806 + EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
1807 + EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
1808 + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
1809 + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
1810 subcc %g1, 64, %g1
1811 bne,pt %XCC, 1b
1812 add %o0, 64, %o0
1813 @@ -321,28 +404,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1814 andn %i2, 0xf, %i4
1815 and %i2, 0xf, %i2
1816 1: subcc %i4, 0x10, %i4
1817 - EX_LD(LOAD(ldx, %i1, %o4))
1818 + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4)
1819 add %i1, 0x08, %i1
1820 - EX_LD(LOAD(ldx, %i1, %g1))
1821 + EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4)
1822 sub %i1, 0x08, %i1
1823 - EX_ST(STORE(stx, %o4, %i1 + %i3))
1824 + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4)
1825 add %i1, 0x8, %i1
1826 - EX_ST(STORE(stx, %g1, %i1 + %i3))
1827 + EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8)
1828 bgu,pt %XCC, 1b
1829 add %i1, 0x8, %i1
1830 73: andcc %i2, 0x8, %g0
1831 be,pt %XCC, 1f
1832 nop
1833 sub %i2, 0x8, %i2
1834 - EX_LD(LOAD(ldx, %i1, %o4))
1835 - EX_ST(STORE(stx, %o4, %i1 + %i3))
1836 + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8)
1837 + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8)
1838 add %i1, 0x8, %i1
1839 1: andcc %i2, 0x4, %g0
1840 be,pt %XCC, 1f
1841 nop
1842 sub %i2, 0x4, %i2
1843 - EX_LD(LOAD(lduw, %i1, %i5))
1844 - EX_ST(STORE(stw, %i5, %i1 + %i3))
1845 + EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4)
1846 + EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4)
1847 add %i1, 0x4, %i1
1848 1: cmp %i2, 0
1849 be,pt %XCC, 85f
1850 @@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1851 sub %i2, %g1, %i2
1852
1853 1: subcc %g1, 1, %g1
1854 - EX_LD(LOAD(ldub, %i1, %i5))
1855 - EX_ST(STORE(stb, %i5, %i1 + %i3))
1856 + EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1)
1857 + EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1)
1858 bgu,pt %icc, 1b
1859 add %i1, 1, %i1
1860
1861 @@ -375,16 +458,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1862
1863 8: mov 64, %i3
1864 andn %i1, 0x7, %i1
1865 - EX_LD(LOAD(ldx, %i1, %g2))
1866 + EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2)
1867 sub %i3, %g1, %i3
1868 andn %i2, 0x7, %i4
1869 sllx %g2, %g1, %g2
1870 1: add %i1, 0x8, %i1
1871 - EX_LD(LOAD(ldx, %i1, %g3))
1872 + EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4)
1873 subcc %i4, 0x8, %i4
1874 srlx %g3, %i3, %i5
1875 or %i5, %g2, %i5
1876 - EX_ST(STORE(stx, %i5, %o0))
1877 + EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4)
1878 add %o0, 0x8, %o0
1879 bgu,pt %icc, 1b
1880 sllx %g3, %g1, %g2
1881 @@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1882
1883 1:
1884 subcc %i2, 4, %i2
1885 - EX_LD(LOAD(lduw, %i1, %g1))
1886 - EX_ST(STORE(stw, %g1, %i1 + %i3))
1887 + EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4)
1888 + EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4)
1889 bgu,pt %XCC, 1b
1890 add %i1, 4, %i1
1891
1892 @@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1893 .align 32
1894 90:
1895 subcc %i2, 1, %i2
1896 - EX_LD(LOAD(ldub, %i1, %g1))
1897 - EX_ST(STORE(stb, %g1, %i1 + %i3))
1898 + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1)
1899 + EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1)
1900 bgu,pt %XCC, 90b
1901 add %i1, 1, %i1
1902 ret
1903 diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S
1904 index ecc5692fa2b4..bb6ff73229e3 100644
1905 --- a/arch/sparc/lib/U1copy_from_user.S
1906 +++ b/arch/sparc/lib/U1copy_from_user.S
1907 @@ -3,19 +3,19 @@
1908 * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
1909 */
1910
1911 -#define EX_LD(x) \
1912 +#define EX_LD(x,y) \
1913 98: x; \
1914 .section __ex_table,"a";\
1915 .align 4; \
1916 - .word 98b, __retl_one; \
1917 + .word 98b, y; \
1918 .text; \
1919 .align 4;
1920
1921 -#define EX_LD_FP(x) \
1922 +#define EX_LD_FP(x,y) \
1923 98: x; \
1924 .section __ex_table,"a";\
1925 .align 4; \
1926 - .word 98b, __retl_one_fp;\
1927 + .word 98b, y; \
1928 .text; \
1929 .align 4;
1930
1931 diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S
1932 index 9eea392e44d4..ed92ce739558 100644
1933 --- a/arch/sparc/lib/U1copy_to_user.S
1934 +++ b/arch/sparc/lib/U1copy_to_user.S
1935 @@ -3,19 +3,19 @@
1936 * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
1937 */
1938
1939 -#define EX_ST(x) \
1940 +#define EX_ST(x,y) \
1941 98: x; \
1942 .section __ex_table,"a";\
1943 .align 4; \
1944 - .word 98b, __retl_one; \
1945 + .word 98b, y; \
1946 .text; \
1947 .align 4;
1948
1949 -#define EX_ST_FP(x) \
1950 +#define EX_ST_FP(x,y) \
1951 98: x; \
1952 .section __ex_table,"a";\
1953 .align 4; \
1954 - .word 98b, __retl_one_fp;\
1955 + .word 98b, y; \
1956 .text; \
1957 .align 4;
1958
1959 diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S
1960 index 3e6209ebb7d7..f30d2ab2c371 100644
1961 --- a/arch/sparc/lib/U1memcpy.S
1962 +++ b/arch/sparc/lib/U1memcpy.S
1963 @@ -5,6 +5,7 @@
1964 */
1965
1966 #ifdef __KERNEL__
1967 +#include <linux/linkage.h>
1968 #include <asm/visasm.h>
1969 #include <asm/asi.h>
1970 #define GLOBAL_SPARE g7
1971 @@ -23,21 +24,17 @@
1972 #endif
1973
1974 #ifndef EX_LD
1975 -#define EX_LD(x) x
1976 +#define EX_LD(x,y) x
1977 #endif
1978 #ifndef EX_LD_FP
1979 -#define EX_LD_FP(x) x
1980 +#define EX_LD_FP(x,y) x
1981 #endif
1982
1983 #ifndef EX_ST
1984 -#define EX_ST(x) x
1985 +#define EX_ST(x,y) x
1986 #endif
1987 #ifndef EX_ST_FP
1988 -#define EX_ST_FP(x) x
1989 -#endif
1990 -
1991 -#ifndef EX_RETVAL
1992 -#define EX_RETVAL(x) x
1993 +#define EX_ST_FP(x,y) x
1994 #endif
1995
1996 #ifndef LOAD
1997 @@ -78,53 +75,169 @@
1998 faligndata %f7, %f8, %f60; \
1999 faligndata %f8, %f9, %f62;
2000
2001 -#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \
2002 - EX_LD_FP(LOAD_BLK(%src, %fdest)); \
2003 - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
2004 - add %src, 0x40, %src; \
2005 - subcc %len, 0x40, %len; \
2006 - be,pn %xcc, jmptgt; \
2007 - add %dest, 0x40, %dest; \
2008 -
2009 -#define LOOP_CHUNK1(src, dest, len, branch_dest) \
2010 - MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest)
2011 -#define LOOP_CHUNK2(src, dest, len, branch_dest) \
2012 - MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
2013 -#define LOOP_CHUNK3(src, dest, len, branch_dest) \
2014 - MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
2015 +#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \
2016 + EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \
2017 + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
2018 + add %src, 0x40, %src; \
2019 + subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \
2020 + be,pn %xcc, jmptgt; \
2021 + add %dest, 0x40, %dest; \
2022 +
2023 +#define LOOP_CHUNK1(src, dest, branch_dest) \
2024 + MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest)
2025 +#define LOOP_CHUNK2(src, dest, branch_dest) \
2026 + MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest)
2027 +#define LOOP_CHUNK3(src, dest, branch_dest) \
2028 + MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest)
2029
2030 #define DO_SYNC membar #Sync;
2031 #define STORE_SYNC(dest, fsrc) \
2032 - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
2033 + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
2034 add %dest, 0x40, %dest; \
2035 DO_SYNC
2036
2037 #define STORE_JUMP(dest, fsrc, target) \
2038 - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
2039 + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \
2040 add %dest, 0x40, %dest; \
2041 ba,pt %xcc, target; \
2042 nop;
2043
2044 -#define FINISH_VISCHUNK(dest, f0, f1, left) \
2045 - subcc %left, 8, %left;\
2046 - bl,pn %xcc, 95f; \
2047 - faligndata %f0, %f1, %f48; \
2048 - EX_ST_FP(STORE(std, %f48, %dest)); \
2049 +#define FINISH_VISCHUNK(dest, f0, f1) \
2050 + subcc %g3, 8, %g3; \
2051 + bl,pn %xcc, 95f; \
2052 + faligndata %f0, %f1, %f48; \
2053 + EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \
2054 add %dest, 8, %dest;
2055
2056 -#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \
2057 - subcc %left, 8, %left; \
2058 - bl,pn %xcc, 95f; \
2059 +#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
2060 + subcc %g3, 8, %g3; \
2061 + bl,pn %xcc, 95f; \
2062 fsrc2 %f0, %f1;
2063
2064 -#define UNEVEN_VISCHUNK(dest, f0, f1, left) \
2065 - UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \
2066 +#define UNEVEN_VISCHUNK(dest, f0, f1) \
2067 + UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
2068 ba,a,pt %xcc, 93f;
2069
2070 .register %g2,#scratch
2071 .register %g3,#scratch
2072
2073 .text
2074 +#ifndef EX_RETVAL
2075 +#define EX_RETVAL(x) x
2076 +ENTRY(U1_g1_1_fp)
2077 + VISExitHalf
2078 + add %g1, 1, %g1
2079 + add %g1, %g2, %g1
2080 + retl
2081 + add %g1, %o2, %o0
2082 +ENDPROC(U1_g1_1_fp)
2083 +ENTRY(U1_g2_0_fp)
2084 + VISExitHalf
2085 + retl
2086 + add %g2, %o2, %o0
2087 +ENDPROC(U1_g2_0_fp)
2088 +ENTRY(U1_g2_8_fp)
2089 + VISExitHalf
2090 + add %g2, 8, %g2
2091 + retl
2092 + add %g2, %o2, %o0
2093 +ENDPROC(U1_g2_8_fp)
2094 +ENTRY(U1_gs_0_fp)
2095 + VISExitHalf
2096 + add %GLOBAL_SPARE, %g3, %o0
2097 + retl
2098 + add %o0, %o2, %o0
2099 +ENDPROC(U1_gs_0_fp)
2100 +ENTRY(U1_gs_80_fp)
2101 + VISExitHalf
2102 + add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
2103 + add %GLOBAL_SPARE, %g3, %o0
2104 + retl
2105 + add %o0, %o2, %o0
2106 +ENDPROC(U1_gs_80_fp)
2107 +ENTRY(U1_gs_40_fp)
2108 + VISExitHalf
2109 + add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE
2110 + add %GLOBAL_SPARE, %g3, %o0
2111 + retl
2112 + add %o0, %o2, %o0
2113 +ENDPROC(U1_gs_40_fp)
2114 +ENTRY(U1_g3_0_fp)
2115 + VISExitHalf
2116 + retl
2117 + add %g3, %o2, %o0
2118 +ENDPROC(U1_g3_0_fp)
2119 +ENTRY(U1_g3_8_fp)
2120 + VISExitHalf
2121 + add %g3, 8, %g3
2122 + retl
2123 + add %g3, %o2, %o0
2124 +ENDPROC(U1_g3_8_fp)
2125 +ENTRY(U1_o2_0_fp)
2126 + VISExitHalf
2127 + retl
2128 + mov %o2, %o0
2129 +ENDPROC(U1_o2_0_fp)
2130 +ENTRY(U1_o2_1_fp)
2131 + VISExitHalf
2132 + retl
2133 + add %o2, 1, %o0
2134 +ENDPROC(U1_o2_1_fp)
2135 +ENTRY(U1_gs_0)
2136 + VISExitHalf
2137 + retl
2138 + add %GLOBAL_SPARE, %o2, %o0
2139 +ENDPROC(U1_gs_0)
2140 +ENTRY(U1_gs_8)
2141 + VISExitHalf
2142 + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
2143 + retl
2144 + add %GLOBAL_SPARE, 0x8, %o0
2145 +ENDPROC(U1_gs_8)
2146 +ENTRY(U1_gs_10)
2147 + VISExitHalf
2148 + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
2149 + retl
2150 + add %GLOBAL_SPARE, 0x10, %o0
2151 +ENDPROC(U1_gs_10)
2152 +ENTRY(U1_o2_0)
2153 + retl
2154 + mov %o2, %o0
2155 +ENDPROC(U1_o2_0)
2156 +ENTRY(U1_o2_8)
2157 + retl
2158 + add %o2, 8, %o0
2159 +ENDPROC(U1_o2_8)
2160 +ENTRY(U1_o2_4)
2161 + retl
2162 + add %o2, 4, %o0
2163 +ENDPROC(U1_o2_4)
2164 +ENTRY(U1_o2_1)
2165 + retl
2166 + add %o2, 1, %o0
2167 +ENDPROC(U1_o2_1)
2168 +ENTRY(U1_g1_0)
2169 + retl
2170 + add %g1, %o2, %o0
2171 +ENDPROC(U1_g1_0)
2172 +ENTRY(U1_g1_1)
2173 + add %g1, 1, %g1
2174 + retl
2175 + add %g1, %o2, %o0
2176 +ENDPROC(U1_g1_1)
2177 +ENTRY(U1_gs_0_o2_adj)
2178 + and %o2, 7, %o2
2179 + retl
2180 + add %GLOBAL_SPARE, %o2, %o0
2181 +ENDPROC(U1_gs_0_o2_adj)
2182 +ENTRY(U1_gs_8_o2_adj)
2183 + and %o2, 7, %o2
2184 + add %GLOBAL_SPARE, 8, %GLOBAL_SPARE
2185 + retl
2186 + add %GLOBAL_SPARE, %o2, %o0
2187 +ENDPROC(U1_gs_8_o2_adj)
2188 +#endif
2189 +
2190 .align 64
2191
2192 .globl FUNC_NAME
2193 @@ -166,8 +279,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2194 and %g2, 0x38, %g2
2195
2196 1: subcc %g1, 0x1, %g1
2197 - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
2198 - EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE))
2199 + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp)
2200 + EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp)
2201 bgu,pt %XCC, 1b
2202 add %o1, 0x1, %o1
2203
2204 @@ -178,20 +291,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2205 be,pt %icc, 3f
2206 alignaddr %o1, %g0, %o1
2207
2208 - EX_LD_FP(LOAD(ldd, %o1, %f4))
2209 -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
2210 + EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp)
2211 +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp)
2212 add %o1, 0x8, %o1
2213 subcc %g2, 0x8, %g2
2214 faligndata %f4, %f6, %f0
2215 - EX_ST_FP(STORE(std, %f0, %o0))
2216 + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
2217 be,pn %icc, 3f
2218 add %o0, 0x8, %o0
2219
2220 - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
2221 + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp)
2222 add %o1, 0x8, %o1
2223 subcc %g2, 0x8, %g2
2224 faligndata %f6, %f4, %f0
2225 - EX_ST_FP(STORE(std, %f0, %o0))
2226 + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
2227 bne,pt %icc, 1b
2228 add %o0, 0x8, %o0
2229
2230 @@ -214,13 +327,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2231 add %g1, %GLOBAL_SPARE, %g1
2232 subcc %o2, %g3, %o2
2233
2234 - EX_LD_FP(LOAD_BLK(%o1, %f0))
2235 + EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp)
2236 add %o1, 0x40, %o1
2237 add %g1, %g3, %g1
2238 - EX_LD_FP(LOAD_BLK(%o1, %f16))
2239 + EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp)
2240 add %o1, 0x40, %o1
2241 sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
2242 - EX_LD_FP(LOAD_BLK(%o1, %f32))
2243 + EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp)
2244 add %o1, 0x40, %o1
2245
2246 /* There are 8 instances of the unrolled loop,
2247 @@ -240,11 +353,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2248
2249 .align 64
2250 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
2251 - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2252 + LOOP_CHUNK1(o1, o0, 1f)
2253 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
2254 - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2255 + LOOP_CHUNK2(o1, o0, 2f)
2256 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
2257 - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2258 + LOOP_CHUNK3(o1, o0, 3f)
2259 ba,pt %xcc, 1b+4
2260 faligndata %f0, %f2, %f48
2261 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
2262 @@ -261,11 +374,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2263 STORE_JUMP(o0, f48, 56f)
2264
2265 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
2266 - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2267 + LOOP_CHUNK1(o1, o0, 1f)
2268 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
2269 - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2270 + LOOP_CHUNK2(o1, o0, 2f)
2271 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
2272 - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2273 + LOOP_CHUNK3(o1, o0, 3f)
2274 ba,pt %xcc, 1b+4
2275 faligndata %f2, %f4, %f48
2276 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
2277 @@ -282,11 +395,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2278 STORE_JUMP(o0, f48, 57f)
2279
2280 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
2281 - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2282 + LOOP_CHUNK1(o1, o0, 1f)
2283 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
2284 - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2285 + LOOP_CHUNK2(o1, o0, 2f)
2286 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
2287 - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2288 + LOOP_CHUNK3(o1, o0, 3f)
2289 ba,pt %xcc, 1b+4
2290 faligndata %f4, %f6, %f48
2291 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
2292 @@ -303,11 +416,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2293 STORE_JUMP(o0, f48, 58f)
2294
2295 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
2296 - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2297 + LOOP_CHUNK1(o1, o0, 1f)
2298 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
2299 - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2300 + LOOP_CHUNK2(o1, o0, 2f)
2301 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
2302 - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2303 + LOOP_CHUNK3(o1, o0, 3f)
2304 ba,pt %xcc, 1b+4
2305 faligndata %f6, %f8, %f48
2306 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
2307 @@ -324,11 +437,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2308 STORE_JUMP(o0, f48, 59f)
2309
2310 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
2311 - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2312 + LOOP_CHUNK1(o1, o0, 1f)
2313 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
2314 - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2315 + LOOP_CHUNK2(o1, o0, 2f)
2316 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
2317 - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2318 + LOOP_CHUNK3(o1, o0, 3f)
2319 ba,pt %xcc, 1b+4
2320 faligndata %f8, %f10, %f48
2321 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
2322 @@ -345,11 +458,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2323 STORE_JUMP(o0, f48, 60f)
2324
2325 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
2326 - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2327 + LOOP_CHUNK1(o1, o0, 1f)
2328 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
2329 - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2330 + LOOP_CHUNK2(o1, o0, 2f)
2331 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
2332 - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2333 + LOOP_CHUNK3(o1, o0, 3f)
2334 ba,pt %xcc, 1b+4
2335 faligndata %f10, %f12, %f48
2336 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
2337 @@ -366,11 +479,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2338 STORE_JUMP(o0, f48, 61f)
2339
2340 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
2341 - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2342 + LOOP_CHUNK1(o1, o0, 1f)
2343 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
2344 - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2345 + LOOP_CHUNK2(o1, o0, 2f)
2346 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
2347 - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2348 + LOOP_CHUNK3(o1, o0, 3f)
2349 ba,pt %xcc, 1b+4
2350 faligndata %f12, %f14, %f48
2351 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
2352 @@ -387,11 +500,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2353 STORE_JUMP(o0, f48, 62f)
2354
2355 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
2356 - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
2357 + LOOP_CHUNK1(o1, o0, 1f)
2358 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
2359 - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
2360 + LOOP_CHUNK2(o1, o0, 2f)
2361 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
2362 - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
2363 + LOOP_CHUNK3(o1, o0, 3f)
2364 ba,pt %xcc, 1b+4
2365 faligndata %f14, %f16, %f48
2366 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
2367 @@ -407,53 +520,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2368 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
2369 STORE_JUMP(o0, f48, 63f)
2370
2371 -40: FINISH_VISCHUNK(o0, f0, f2, g3)
2372 -41: FINISH_VISCHUNK(o0, f2, f4, g3)
2373 -42: FINISH_VISCHUNK(o0, f4, f6, g3)
2374 -43: FINISH_VISCHUNK(o0, f6, f8, g3)
2375 -44: FINISH_VISCHUNK(o0, f8, f10, g3)
2376 -45: FINISH_VISCHUNK(o0, f10, f12, g3)
2377 -46: FINISH_VISCHUNK(o0, f12, f14, g3)
2378 -47: UNEVEN_VISCHUNK(o0, f14, f0, g3)
2379 -48: FINISH_VISCHUNK(o0, f16, f18, g3)
2380 -49: FINISH_VISCHUNK(o0, f18, f20, g3)
2381 -50: FINISH_VISCHUNK(o0, f20, f22, g3)
2382 -51: FINISH_VISCHUNK(o0, f22, f24, g3)
2383 -52: FINISH_VISCHUNK(o0, f24, f26, g3)
2384 -53: FINISH_VISCHUNK(o0, f26, f28, g3)
2385 -54: FINISH_VISCHUNK(o0, f28, f30, g3)
2386 -55: UNEVEN_VISCHUNK(o0, f30, f0, g3)
2387 -56: FINISH_VISCHUNK(o0, f32, f34, g3)
2388 -57: FINISH_VISCHUNK(o0, f34, f36, g3)
2389 -58: FINISH_VISCHUNK(o0, f36, f38, g3)
2390 -59: FINISH_VISCHUNK(o0, f38, f40, g3)
2391 -60: FINISH_VISCHUNK(o0, f40, f42, g3)
2392 -61: FINISH_VISCHUNK(o0, f42, f44, g3)
2393 -62: FINISH_VISCHUNK(o0, f44, f46, g3)
2394 -63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3)
2395 -
2396 -93: EX_LD_FP(LOAD(ldd, %o1, %f2))
2397 +40: FINISH_VISCHUNK(o0, f0, f2)
2398 +41: FINISH_VISCHUNK(o0, f2, f4)
2399 +42: FINISH_VISCHUNK(o0, f4, f6)
2400 +43: FINISH_VISCHUNK(o0, f6, f8)
2401 +44: FINISH_VISCHUNK(o0, f8, f10)
2402 +45: FINISH_VISCHUNK(o0, f10, f12)
2403 +46: FINISH_VISCHUNK(o0, f12, f14)
2404 +47: UNEVEN_VISCHUNK(o0, f14, f0)
2405 +48: FINISH_VISCHUNK(o0, f16, f18)
2406 +49: FINISH_VISCHUNK(o0, f18, f20)
2407 +50: FINISH_VISCHUNK(o0, f20, f22)
2408 +51: FINISH_VISCHUNK(o0, f22, f24)
2409 +52: FINISH_VISCHUNK(o0, f24, f26)
2410 +53: FINISH_VISCHUNK(o0, f26, f28)
2411 +54: FINISH_VISCHUNK(o0, f28, f30)
2412 +55: UNEVEN_VISCHUNK(o0, f30, f0)
2413 +56: FINISH_VISCHUNK(o0, f32, f34)
2414 +57: FINISH_VISCHUNK(o0, f34, f36)
2415 +58: FINISH_VISCHUNK(o0, f36, f38)
2416 +59: FINISH_VISCHUNK(o0, f38, f40)
2417 +60: FINISH_VISCHUNK(o0, f40, f42)
2418 +61: FINISH_VISCHUNK(o0, f42, f44)
2419 +62: FINISH_VISCHUNK(o0, f44, f46)
2420 +63: UNEVEN_VISCHUNK_LAST(o0, f46, f0)
2421 +
2422 +93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp)
2423 add %o1, 8, %o1
2424 subcc %g3, 8, %g3
2425 faligndata %f0, %f2, %f8
2426 - EX_ST_FP(STORE(std, %f8, %o0))
2427 + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
2428 bl,pn %xcc, 95f
2429 add %o0, 8, %o0
2430 - EX_LD_FP(LOAD(ldd, %o1, %f0))
2431 + EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp)
2432 add %o1, 8, %o1
2433 subcc %g3, 8, %g3
2434 faligndata %f2, %f0, %f8
2435 - EX_ST_FP(STORE(std, %f8, %o0))
2436 + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
2437 bge,pt %xcc, 93b
2438 add %o0, 8, %o0
2439
2440 95: brz,pt %o2, 2f
2441 mov %g1, %o1
2442
2443 -1: EX_LD_FP(LOAD(ldub, %o1, %o3))
2444 +1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp)
2445 add %o1, 1, %o1
2446 subcc %o2, 1, %o2
2447 - EX_ST_FP(STORE(stb, %o3, %o0))
2448 + EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp)
2449 bne,pt %xcc, 1b
2450 add %o0, 1, %o0
2451
2452 @@ -469,27 +582,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2453
2454 72: andn %o2, 0xf, %GLOBAL_SPARE
2455 and %o2, 0xf, %o2
2456 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
2457 - EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
2458 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0)
2459 + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0)
2460 subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE
2461 - EX_ST(STORE(stx, %o5, %o1 + %o3))
2462 + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10)
2463 add %o1, 0x8, %o1
2464 - EX_ST(STORE(stx, %g1, %o1 + %o3))
2465 + EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8)
2466 bgu,pt %XCC, 1b
2467 add %o1, 0x8, %o1
2468 73: andcc %o2, 0x8, %g0
2469 be,pt %XCC, 1f
2470 nop
2471 - EX_LD(LOAD(ldx, %o1, %o5))
2472 + EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)
2473 sub %o2, 0x8, %o2
2474 - EX_ST(STORE(stx, %o5, %o1 + %o3))
2475 + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8)
2476 add %o1, 0x8, %o1
2477 1: andcc %o2, 0x4, %g0
2478 be,pt %XCC, 1f
2479 nop
2480 - EX_LD(LOAD(lduw, %o1, %o5))
2481 + EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)
2482 sub %o2, 0x4, %o2
2483 - EX_ST(STORE(stw, %o5, %o1 + %o3))
2484 + EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4)
2485 add %o1, 0x4, %o1
2486 1: cmp %o2, 0
2487 be,pt %XCC, 85f
2488 @@ -503,9 +616,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2489 sub %g0, %g1, %g1
2490 sub %o2, %g1, %o2
2491
2492 -1: EX_LD(LOAD(ldub, %o1, %o5))
2493 +1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)
2494 subcc %g1, 1, %g1
2495 - EX_ST(STORE(stb, %o5, %o1 + %o3))
2496 + EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1)
2497 bgu,pt %icc, 1b
2498 add %o1, 1, %o1
2499
2500 @@ -521,16 +634,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2501
2502 8: mov 64, %o3
2503 andn %o1, 0x7, %o1
2504 - EX_LD(LOAD(ldx, %o1, %g2))
2505 + EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)
2506 sub %o3, %g1, %o3
2507 andn %o2, 0x7, %GLOBAL_SPARE
2508 sllx %g2, %g1, %g2
2509 -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
2510 +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj)
2511 subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE
2512 add %o1, 0x8, %o1
2513 srlx %g3, %o3, %o5
2514 or %o5, %g2, %o5
2515 - EX_ST(STORE(stx, %o5, %o0))
2516 + EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj)
2517 add %o0, 0x8, %o0
2518 bgu,pt %icc, 1b
2519 sllx %g3, %g1, %g2
2520 @@ -548,9 +661,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2521 bne,pn %XCC, 90f
2522 sub %o0, %o1, %o3
2523
2524 -1: EX_LD(LOAD(lduw, %o1, %g1))
2525 +1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)
2526 subcc %o2, 4, %o2
2527 - EX_ST(STORE(stw, %g1, %o1 + %o3))
2528 + EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4)
2529 bgu,pt %XCC, 1b
2530 add %o1, 4, %o1
2531
2532 @@ -558,9 +671,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2533 mov EX_RETVAL(%o4), %o0
2534
2535 .align 32
2536 -90: EX_LD(LOAD(ldub, %o1, %g1))
2537 +90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)
2538 subcc %o2, 1, %o2
2539 - EX_ST(STORE(stb, %g1, %o1 + %o3))
2540 + EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1)
2541 bgu,pt %XCC, 90b
2542 add %o1, 1, %o1
2543 retl
2544 diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S
2545 index 88ad73d86fe4..db73010a1af8 100644
2546 --- a/arch/sparc/lib/U3copy_from_user.S
2547 +++ b/arch/sparc/lib/U3copy_from_user.S
2548 @@ -3,19 +3,19 @@
2549 * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
2550 */
2551
2552 -#define EX_LD(x) \
2553 +#define EX_LD(x,y) \
2554 98: x; \
2555 .section __ex_table,"a";\
2556 .align 4; \
2557 - .word 98b, __retl_one; \
2558 + .word 98b, y; \
2559 .text; \
2560 .align 4;
2561
2562 -#define EX_LD_FP(x) \
2563 +#define EX_LD_FP(x,y) \
2564 98: x; \
2565 .section __ex_table,"a";\
2566 .align 4; \
2567 - .word 98b, __retl_one_fp;\
2568 + .word 98b, y##_fp; \
2569 .text; \
2570 .align 4;
2571
2572 diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S
2573 index 845139d75537..c4ee858e352a 100644
2574 --- a/arch/sparc/lib/U3copy_to_user.S
2575 +++ b/arch/sparc/lib/U3copy_to_user.S
2576 @@ -3,19 +3,19 @@
2577 * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
2578 */
2579
2580 -#define EX_ST(x) \
2581 +#define EX_ST(x,y) \
2582 98: x; \
2583 .section __ex_table,"a";\
2584 .align 4; \
2585 - .word 98b, __retl_one; \
2586 + .word 98b, y; \
2587 .text; \
2588 .align 4;
2589
2590 -#define EX_ST_FP(x) \
2591 +#define EX_ST_FP(x,y) \
2592 98: x; \
2593 .section __ex_table,"a";\
2594 .align 4; \
2595 - .word 98b, __retl_one_fp;\
2596 + .word 98b, y##_fp; \
2597 .text; \
2598 .align 4;
2599
2600 diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S
2601 index 491ee69e4995..54f98706b03b 100644
2602 --- a/arch/sparc/lib/U3memcpy.S
2603 +++ b/arch/sparc/lib/U3memcpy.S
2604 @@ -4,6 +4,7 @@
2605 */
2606
2607 #ifdef __KERNEL__
2608 +#include <linux/linkage.h>
2609 #include <asm/visasm.h>
2610 #include <asm/asi.h>
2611 #define GLOBAL_SPARE %g7
2612 @@ -22,21 +23,17 @@
2613 #endif
2614
2615 #ifndef EX_LD
2616 -#define EX_LD(x) x
2617 +#define EX_LD(x,y) x
2618 #endif
2619 #ifndef EX_LD_FP
2620 -#define EX_LD_FP(x) x
2621 +#define EX_LD_FP(x,y) x
2622 #endif
2623
2624 #ifndef EX_ST
2625 -#define EX_ST(x) x
2626 +#define EX_ST(x,y) x
2627 #endif
2628 #ifndef EX_ST_FP
2629 -#define EX_ST_FP(x) x
2630 -#endif
2631 -
2632 -#ifndef EX_RETVAL
2633 -#define EX_RETVAL(x) x
2634 +#define EX_ST_FP(x,y) x
2635 #endif
2636
2637 #ifndef LOAD
2638 @@ -77,6 +74,87 @@
2639 */
2640
2641 .text
2642 +#ifndef EX_RETVAL
2643 +#define EX_RETVAL(x) x
2644 +__restore_fp:
2645 + VISExitHalf
2646 + retl
2647 + nop
2648 +ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
2649 + add %g1, 1, %g1
2650 + add %g2, %g1, %g2
2651 + ba,pt %xcc, __restore_fp
2652 + add %o2, %g2, %o0
2653 +ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
2654 +ENTRY(U3_retl_o2_plus_g2_fp)
2655 + ba,pt %xcc, __restore_fp
2656 + add %o2, %g2, %o0
2657 +ENDPROC(U3_retl_o2_plus_g2_fp)
2658 +ENTRY(U3_retl_o2_plus_g2_plus_8_fp)
2659 + add %g2, 8, %g2
2660 + ba,pt %xcc, __restore_fp
2661 + add %o2, %g2, %o0
2662 +ENDPROC(U3_retl_o2_plus_g2_plus_8_fp)
2663 +ENTRY(U3_retl_o2)
2664 + retl
2665 + mov %o2, %o0
2666 +ENDPROC(U3_retl_o2)
2667 +ENTRY(U3_retl_o2_plus_1)
2668 + retl
2669 + add %o2, 1, %o0
2670 +ENDPROC(U3_retl_o2_plus_1)
2671 +ENTRY(U3_retl_o2_plus_4)
2672 + retl
2673 + add %o2, 4, %o0
2674 +ENDPROC(U3_retl_o2_plus_4)
2675 +ENTRY(U3_retl_o2_plus_8)
2676 + retl
2677 + add %o2, 8, %o0
2678 +ENDPROC(U3_retl_o2_plus_8)
2679 +ENTRY(U3_retl_o2_plus_g1_plus_1)
2680 + add %g1, 1, %g1
2681 + retl
2682 + add %o2, %g1, %o0
2683 +ENDPROC(U3_retl_o2_plus_g1_plus_1)
2684 +ENTRY(U3_retl_o2_fp)
2685 + ba,pt %xcc, __restore_fp
2686 + mov %o2, %o0
2687 +ENDPROC(U3_retl_o2_fp)
2688 +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
2689 + sll %o3, 6, %o3
2690 + add %o3, 0x80, %o3
2691 + ba,pt %xcc, __restore_fp
2692 + add %o2, %o3, %o0
2693 +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
2694 +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
2695 + sll %o3, 6, %o3
2696 + add %o3, 0x40, %o3
2697 + ba,pt %xcc, __restore_fp
2698 + add %o2, %o3, %o0
2699 +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
2700 +ENTRY(U3_retl_o2_plus_GS_plus_0x10)
2701 + add GLOBAL_SPARE, 0x10, GLOBAL_SPARE
2702 + retl
2703 + add %o2, GLOBAL_SPARE, %o0
2704 +ENDPROC(U3_retl_o2_plus_GS_plus_0x10)
2705 +ENTRY(U3_retl_o2_plus_GS_plus_0x08)
2706 + add GLOBAL_SPARE, 0x08, GLOBAL_SPARE
2707 + retl
2708 + add %o2, GLOBAL_SPARE, %o0
2709 +ENDPROC(U3_retl_o2_plus_GS_plus_0x08)
2710 +ENTRY(U3_retl_o2_and_7_plus_GS)
2711 + and %o2, 7, %o2
2712 + retl
2713 + add %o2, GLOBAL_SPARE, %o2
2714 +ENDPROC(U3_retl_o2_and_7_plus_GS)
2715 +ENTRY(U3_retl_o2_and_7_plus_GS_plus_8)
2716 + add GLOBAL_SPARE, 8, GLOBAL_SPARE
2717 + and %o2, 7, %o2
2718 + retl
2719 + add %o2, GLOBAL_SPARE, %o2
2720 +ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8)
2721 +#endif
2722 +
2723 .align 64
2724
2725 /* The cheetah's flexible spine, oversized liver, enlarged heart,
2726 @@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2727 and %g2, 0x38, %g2
2728
2729 1: subcc %g1, 0x1, %g1
2730 - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
2731 - EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE))
2732 + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1)
2733 + EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1)
2734 bgu,pt %XCC, 1b
2735 add %o1, 0x1, %o1
2736
2737 @@ -138,20 +216,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2738 be,pt %icc, 3f
2739 alignaddr %o1, %g0, %o1
2740
2741 - EX_LD_FP(LOAD(ldd, %o1, %f4))
2742 -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
2743 + EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2)
2744 +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2)
2745 add %o1, 0x8, %o1
2746 subcc %g2, 0x8, %g2
2747 faligndata %f4, %f6, %f0
2748 - EX_ST_FP(STORE(std, %f0, %o0))
2749 + EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8)
2750 be,pn %icc, 3f
2751 add %o0, 0x8, %o0
2752
2753 - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
2754 + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2)
2755 add %o1, 0x8, %o1
2756 subcc %g2, 0x8, %g2
2757 faligndata %f6, %f4, %f2
2758 - EX_ST_FP(STORE(std, %f2, %o0))
2759 + EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8)
2760 bne,pt %icc, 1b
2761 add %o0, 0x8, %o0
2762
2763 @@ -161,25 +239,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2764 LOAD(prefetch, %o1 + 0x080, #one_read)
2765 LOAD(prefetch, %o1 + 0x0c0, #one_read)
2766 LOAD(prefetch, %o1 + 0x100, #one_read)
2767 - EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0))
2768 + EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2)
2769 LOAD(prefetch, %o1 + 0x140, #one_read)
2770 - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
2771 + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2)
2772 LOAD(prefetch, %o1 + 0x180, #one_read)
2773 - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
2774 + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2)
2775 LOAD(prefetch, %o1 + 0x1c0, #one_read)
2776 faligndata %f0, %f2, %f16
2777 - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
2778 + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2)
2779 faligndata %f2, %f4, %f18
2780 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
2781 + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2)
2782 faligndata %f4, %f6, %f20
2783 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
2784 + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2)
2785 faligndata %f6, %f8, %f22
2786
2787 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
2788 + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2)
2789 faligndata %f8, %f10, %f24
2790 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
2791 + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2)
2792 faligndata %f10, %f12, %f26
2793 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
2794 + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2)
2795
2796 subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE
2797 add %o1, 0x40, %o1
2798 @@ -190,26 +268,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2799
2800 .align 64
2801 1:
2802 - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
2803 + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2804 faligndata %f12, %f14, %f28
2805 - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
2806 + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2807 faligndata %f14, %f0, %f30
2808 - EX_ST_FP(STORE_BLK(%f16, %o0))
2809 - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
2810 + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2811 + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2812 faligndata %f0, %f2, %f16
2813 add %o0, 0x40, %o0
2814
2815 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
2816 + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2817 faligndata %f2, %f4, %f18
2818 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
2819 + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2820 faligndata %f4, %f6, %f20
2821 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
2822 + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2823 subcc %o3, 0x01, %o3
2824 faligndata %f6, %f8, %f22
2825 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
2826 + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2827
2828 faligndata %f8, %f10, %f24
2829 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
2830 + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2831 LOAD(prefetch, %o1 + 0x1c0, #one_read)
2832 faligndata %f10, %f12, %f26
2833 bg,pt %XCC, 1b
2834 @@ -217,29 +295,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2835
2836 /* Finally we copy the last full 64-byte block. */
2837 2:
2838 - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
2839 + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2840 faligndata %f12, %f14, %f28
2841 - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
2842 + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2843 faligndata %f14, %f0, %f30
2844 - EX_ST_FP(STORE_BLK(%f16, %o0))
2845 - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
2846 + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
2847 + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2848 faligndata %f0, %f2, %f16
2849 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
2850 + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2851 faligndata %f2, %f4, %f18
2852 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
2853 + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2854 faligndata %f4, %f6, %f20
2855 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
2856 + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2857 faligndata %f6, %f8, %f22
2858 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
2859 + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2860 faligndata %f8, %f10, %f24
2861 cmp %g1, 0
2862 be,pt %XCC, 1f
2863 add %o0, 0x40, %o0
2864 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
2865 + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2866 1: faligndata %f10, %f12, %f26
2867 faligndata %f12, %f14, %f28
2868 faligndata %f14, %f0, %f30
2869 - EX_ST_FP(STORE_BLK(%f16, %o0))
2870 + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2871 add %o0, 0x40, %o0
2872 add %o1, 0x40, %o1
2873 membar #Sync
2874 @@ -259,20 +337,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2875
2876 sub %o2, %g2, %o2
2877 be,a,pt %XCC, 1f
2878 - EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0))
2879 + EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2)
2880
2881 -1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2))
2882 +1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2)
2883 add %o1, 0x8, %o1
2884 subcc %g2, 0x8, %g2
2885 faligndata %f0, %f2, %f8
2886 - EX_ST_FP(STORE(std, %f8, %o0))
2887 + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
2888 be,pn %XCC, 2f
2889 add %o0, 0x8, %o0
2890 - EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0))
2891 + EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2)
2892 add %o1, 0x8, %o1
2893 subcc %g2, 0x8, %g2
2894 faligndata %f2, %f0, %f8
2895 - EX_ST_FP(STORE(std, %f8, %o0))
2896 + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
2897 bne,pn %XCC, 1b
2898 add %o0, 0x8, %o0
2899
2900 @@ -292,30 +370,33 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2901 andcc %o2, 0x8, %g0
2902 be,pt %icc, 1f
2903 nop
2904 - EX_LD(LOAD(ldx, %o1, %o5))
2905 - EX_ST(STORE(stx, %o5, %o1 + %o3))
2906 + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2)
2907 + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2)
2908 add %o1, 0x8, %o1
2909 + sub %o2, 8, %o2
2910
2911 1: andcc %o2, 0x4, %g0
2912 be,pt %icc, 1f
2913 nop
2914 - EX_LD(LOAD(lduw, %o1, %o5))
2915 - EX_ST(STORE(stw, %o5, %o1 + %o3))
2916 + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2)
2917 + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2)
2918 add %o1, 0x4, %o1
2919 + sub %o2, 4, %o2
2920
2921 1: andcc %o2, 0x2, %g0
2922 be,pt %icc, 1f
2923 nop
2924 - EX_LD(LOAD(lduh, %o1, %o5))
2925 - EX_ST(STORE(sth, %o5, %o1 + %o3))
2926 + EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2)
2927 + EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2)
2928 add %o1, 0x2, %o1
2929 + sub %o2, 2, %o2
2930
2931 1: andcc %o2, 0x1, %g0
2932 be,pt %icc, 85f
2933 nop
2934 - EX_LD(LOAD(ldub, %o1, %o5))
2935 + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2)
2936 ba,pt %xcc, 85f
2937 - EX_ST(STORE(stb, %o5, %o1 + %o3))
2938 + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2)
2939
2940 .align 64
2941 70: /* 16 < len <= 64 */
2942 @@ -326,26 +407,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2943 andn %o2, 0xf, GLOBAL_SPARE
2944 and %o2, 0xf, %o2
2945 1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE
2946 - EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
2947 - EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
2948 - EX_ST(STORE(stx, %o5, %o1 + %o3))
2949 + EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10)
2950 + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10)
2951 + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10)
2952 add %o1, 0x8, %o1
2953 - EX_ST(STORE(stx, %g1, %o1 + %o3))
2954 + EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08)
2955 bgu,pt %XCC, 1b
2956 add %o1, 0x8, %o1
2957 73: andcc %o2, 0x8, %g0
2958 be,pt %XCC, 1f
2959 nop
2960 sub %o2, 0x8, %o2
2961 - EX_LD(LOAD(ldx, %o1, %o5))
2962 - EX_ST(STORE(stx, %o5, %o1 + %o3))
2963 + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8)
2964 + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8)
2965 add %o1, 0x8, %o1
2966 1: andcc %o2, 0x4, %g0
2967 be,pt %XCC, 1f
2968 nop
2969 sub %o2, 0x4, %o2
2970 - EX_LD(LOAD(lduw, %o1, %o5))
2971 - EX_ST(STORE(stw, %o5, %o1 + %o3))
2972 + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4)
2973 + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4)
2974 add %o1, 0x4, %o1
2975 1: cmp %o2, 0
2976 be,pt %XCC, 85f
2977 @@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2978 sub %o2, %g1, %o2
2979
2980 1: subcc %g1, 1, %g1
2981 - EX_LD(LOAD(ldub, %o1, %o5))
2982 - EX_ST(STORE(stb, %o5, %o1 + %o3))
2983 + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1)
2984 + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1)
2985 bgu,pt %icc, 1b
2986 add %o1, 1, %o1
2987
2988 @@ -378,16 +459,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
2989
2990 8: mov 64, %o3
2991 andn %o1, 0x7, %o1
2992 - EX_LD(LOAD(ldx, %o1, %g2))
2993 + EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2)
2994 sub %o3, %g1, %o3
2995 andn %o2, 0x7, GLOBAL_SPARE
2996 sllx %g2, %g1, %g2
2997 -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
2998 +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS)
2999 subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE
3000 add %o1, 0x8, %o1
3001 srlx %g3, %o3, %o5
3002 or %o5, %g2, %o5
3003 - EX_ST(STORE(stx, %o5, %o0))
3004 + EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8)
3005 add %o0, 0x8, %o0
3006 bgu,pt %icc, 1b
3007 sllx %g3, %g1, %g2
3008 @@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
3009
3010 1:
3011 subcc %o2, 4, %o2
3012 - EX_LD(LOAD(lduw, %o1, %g1))
3013 - EX_ST(STORE(stw, %g1, %o1 + %o3))
3014 + EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4)
3015 + EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4)
3016 bgu,pt %XCC, 1b
3017 add %o1, 4, %o1
3018
3019 @@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
3020 .align 32
3021 90:
3022 subcc %o2, 1, %o2
3023 - EX_LD(LOAD(ldub, %o1, %g1))
3024 - EX_ST(STORE(stb, %g1, %o1 + %o3))
3025 + EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1)
3026 + EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1)
3027 bgu,pt %XCC, 90b
3028 add %o1, 1, %o1
3029 retl
3030 diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S
3031 index 302c0e60dc2c..4c89b486fa0d 100644
3032 --- a/arch/sparc/lib/copy_in_user.S
3033 +++ b/arch/sparc/lib/copy_in_user.S
3034 @@ -8,18 +8,33 @@
3035
3036 #define XCC xcc
3037
3038 -#define EX(x,y) \
3039 +#define EX(x,y,z) \
3040 98: x,y; \
3041 .section __ex_table,"a";\
3042 .align 4; \
3043 - .word 98b, __retl_one; \
3044 + .word 98b, z; \
3045 .text; \
3046 .align 4;
3047
3048 +#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8)
3049 +#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4)
3050 +#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1)
3051 +
3052 .register %g2,#scratch
3053 .register %g3,#scratch
3054
3055 .text
3056 +__retl_o4_plus_8:
3057 + add %o4, %o2, %o4
3058 + retl
3059 + add %o4, 8, %o0
3060 +__retl_o2_plus_4:
3061 + retl
3062 + add %o2, 4, %o0
3063 +__retl_o2_plus_1:
3064 + retl
3065 + add %o2, 1, %o0
3066 +
3067 .align 32
3068
3069 /* Don't try to get too fancy here, just nice and
3070 @@ -44,8 +59,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
3071 andn %o2, 0x7, %o4
3072 and %o2, 0x7, %o2
3073 1: subcc %o4, 0x8, %o4
3074 - EX(ldxa [%o1] %asi, %o5)
3075 - EX(stxa %o5, [%o0] %asi)
3076 + EX_O4(ldxa [%o1] %asi, %o5)
3077 + EX_O4(stxa %o5, [%o0] %asi)
3078 add %o1, 0x8, %o1
3079 bgu,pt %XCC, 1b
3080 add %o0, 0x8, %o0
3081 @@ -53,8 +68,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
3082 be,pt %XCC, 1f
3083 nop
3084 sub %o2, 0x4, %o2
3085 - EX(lduwa [%o1] %asi, %o5)
3086 - EX(stwa %o5, [%o0] %asi)
3087 + EX_O2_4(lduwa [%o1] %asi, %o5)
3088 + EX_O2_4(stwa %o5, [%o0] %asi)
3089 add %o1, 0x4, %o1
3090 add %o0, 0x4, %o0
3091 1: cmp %o2, 0
3092 @@ -70,8 +85,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
3093
3094 82:
3095 subcc %o2, 4, %o2
3096 - EX(lduwa [%o1] %asi, %g1)
3097 - EX(stwa %g1, [%o0] %asi)
3098 + EX_O2_4(lduwa [%o1] %asi, %g1)
3099 + EX_O2_4(stwa %g1, [%o0] %asi)
3100 add %o1, 4, %o1
3101 bgu,pt %XCC, 82b
3102 add %o0, 4, %o0
3103 @@ -82,8 +97,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
3104 .align 32
3105 90:
3106 subcc %o2, 1, %o2
3107 - EX(lduba [%o1] %asi, %g1)
3108 - EX(stba %g1, [%o0] %asi)
3109 + EX_O2_1(lduba [%o1] %asi, %g1)
3110 + EX_O2_1(stba %g1, [%o0] %asi)
3111 add %o1, 1, %o1
3112 bgu,pt %XCC, 90b
3113 add %o0, 1, %o0
3114 diff --git a/arch/sparc/lib/user_fixup.c b/arch/sparc/lib/user_fixup.c
3115 deleted file mode 100644
3116 index ac96ae236709..000000000000
3117 --- a/arch/sparc/lib/user_fixup.c
3118 +++ /dev/null
3119 @@ -1,71 +0,0 @@
3120 -/* user_fixup.c: Fix up user copy faults.
3121 - *
3122 - * Copyright (C) 2004 David S. Miller <davem@redhat.com>
3123 - */
3124 -
3125 -#include <linux/compiler.h>
3126 -#include <linux/kernel.h>
3127 -#include <linux/string.h>
3128 -#include <linux/errno.h>
3129 -#include <linux/module.h>
3130 -
3131 -#include <asm/uaccess.h>
3132 -
3133 -/* Calculating the exact fault address when using
3134 - * block loads and stores can be very complicated.
3135 - *
3136 - * Instead of trying to be clever and handling all
3137 - * of the cases, just fix things up simply here.
3138 - */
3139 -
3140 -static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset)
3141 -{
3142 - unsigned long fault_addr = current_thread_info()->fault_address;
3143 - unsigned long end = start + size;
3144 -
3145 - if (fault_addr < start || fault_addr >= end) {
3146 - *offset = 0;
3147 - } else {
3148 - *offset = fault_addr - start;
3149 - size = end - fault_addr;
3150 - }
3151 - return size;
3152 -}
3153 -
3154 -unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size)
3155 -{
3156 - unsigned long offset;
3157 -
3158 - size = compute_size((unsigned long) from, size, &offset);
3159 - if (likely(size))
3160 - memset(to + offset, 0, size);
3161 -
3162 - return size;
3163 -}
3164 -EXPORT_SYMBOL(copy_from_user_fixup);
3165 -
3166 -unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size)
3167 -{
3168 - unsigned long offset;
3169 -
3170 - return compute_size((unsigned long) to, size, &offset);
3171 -}
3172 -EXPORT_SYMBOL(copy_to_user_fixup);
3173 -
3174 -unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size)
3175 -{
3176 - unsigned long fault_addr = current_thread_info()->fault_address;
3177 - unsigned long start = (unsigned long) to;
3178 - unsigned long end = start + size;
3179 -
3180 - if (fault_addr >= start && fault_addr < end)
3181 - return end - fault_addr;
3182 -
3183 - start = (unsigned long) from;
3184 - end = start + size;
3185 - if (fault_addr >= start && fault_addr < end)
3186 - return end - fault_addr;
3187 -
3188 - return size;
3189 -}
3190 -EXPORT_SYMBOL(copy_in_user_fixup);
3191 diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
3192 index dbabe5713a15..e15f33715103 100644
3193 --- a/arch/sparc/mm/fault_64.c
3194 +++ b/arch/sparc/mm/fault_64.c
3195 @@ -479,14 +479,14 @@ good_area:
3196 up_read(&mm->mmap_sem);
3197
3198 mm_rss = get_mm_rss(mm);
3199 -#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
3200 - mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE));
3201 +#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
3202 + mm_rss -= (mm->context.thp_pte_count * (HPAGE_SIZE / PAGE_SIZE));
3203 #endif
3204 if (unlikely(mm_rss >
3205 mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit))
3206 tsb_grow(mm, MM_TSB_BASE, mm_rss);
3207 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
3208 - mm_rss = mm->context.huge_pte_count;
3209 + mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count;
3210 if (unlikely(mm_rss >
3211 mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) {
3212 if (mm->context.tsb_block[MM_TSB_HUGE].tsb)
3213 diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
3214 index 364d093f46c6..da1142401bf4 100644
3215 --- a/arch/sparc/mm/hugetlbpage.c
3216 +++ b/arch/sparc/mm/hugetlbpage.c
3217 @@ -180,7 +180,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
3218 unsigned long nptes;
3219
3220 if (!pte_present(*ptep) && pte_present(entry))
3221 - mm->context.huge_pte_count++;
3222 + mm->context.hugetlb_pte_count++;
3223
3224 addr &= HPAGE_MASK;
3225
3226 @@ -212,7 +212,7 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
3227
3228 entry = *ptep;
3229 if (pte_present(entry))
3230 - mm->context.huge_pte_count--;
3231 + mm->context.hugetlb_pte_count--;
3232
3233 addr &= HPAGE_MASK;
3234 nptes = 1 << HUGETLB_PAGE_ORDER;
3235 diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
3236 index 3c4b8975fa76..a5331c336b2a 100644
3237 --- a/arch/sparc/mm/init_64.c
3238 +++ b/arch/sparc/mm/init_64.c
3239 @@ -346,7 +346,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
3240 spin_lock_irqsave(&mm->context.lock, flags);
3241
3242 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
3243 - if (mm->context.huge_pte_count && is_hugetlb_pte(pte))
3244 + if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) &&
3245 + is_hugetlb_pte(pte))
3246 __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
3247 address, pte_val(pte));
3248 else
3249 diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
3250 index f81cd9736700..3659d37b4d81 100644
3251 --- a/arch/sparc/mm/tlb.c
3252 +++ b/arch/sparc/mm/tlb.c
3253 @@ -175,9 +175,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
3254
3255 if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) {
3256 if (pmd_val(pmd) & _PAGE_PMD_HUGE)
3257 - mm->context.huge_pte_count++;
3258 + mm->context.thp_pte_count++;
3259 else
3260 - mm->context.huge_pte_count--;
3261 + mm->context.thp_pte_count--;
3262
3263 /* Do not try to allocate the TSB hash table if we
3264 * don't have one already. We have various locks held
3265 diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
3266 index a0604a493a36..9cdeca0fa955 100644
3267 --- a/arch/sparc/mm/tsb.c
3268 +++ b/arch/sparc/mm/tsb.c
3269 @@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned long vaddr)
3270 return (tag == (vaddr >> 22));
3271 }
3272
3273 +static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end)
3274 +{
3275 + unsigned long idx;
3276 +
3277 + for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) {
3278 + struct tsb *ent = &swapper_tsb[idx];
3279 + unsigned long match = idx << 13;
3280 +
3281 + match |= (ent->tag << 22);
3282 + if (match >= start && match < end)
3283 + ent->tag = (1UL << TSB_TAG_INVALID_BIT);
3284 + }
3285 +}
3286 +
3287 /* TSB flushes need only occur on the processor initiating the address
3288 * space modification, not on each cpu the address space has run on.
3289 * Only the TLB flush needs that treatment.
3290 @@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
3291 {
3292 unsigned long v;
3293
3294 + if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES)
3295 + return flush_tsb_kernel_range_scan(start, end);
3296 +
3297 for (v = start; v < end; v += PAGE_SIZE) {
3298 unsigned long hash = tsb_hash(v, PAGE_SHIFT,
3299 KERNEL_TSB_NENTRIES);
3300 @@ -470,7 +487,7 @@ retry_tsb_alloc:
3301 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
3302 {
3303 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
3304 - unsigned long huge_pte_count;
3305 + unsigned long total_huge_pte_count;
3306 #endif
3307 unsigned int i;
3308
3309 @@ -479,12 +496,14 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
3310 mm->context.sparc64_ctx_val = 0UL;
3311
3312 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
3313 - /* We reset it to zero because the fork() page copying
3314 + /* We reset them to zero because the fork() page copying
3315 * will re-increment the counters as the parent PTEs are
3316 * copied into the child address space.
3317 */
3318 - huge_pte_count = mm->context.huge_pte_count;
3319 - mm->context.huge_pte_count = 0;
3320 + total_huge_pte_count = mm->context.hugetlb_pte_count +
3321 + mm->context.thp_pte_count;
3322 + mm->context.hugetlb_pte_count = 0;
3323 + mm->context.thp_pte_count = 0;
3324 #endif
3325
3326 /* copy_mm() copies over the parent's mm_struct before calling
3327 @@ -500,8 +519,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
3328 tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
3329
3330 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
3331 - if (unlikely(huge_pte_count))
3332 - tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
3333 + if (unlikely(total_huge_pte_count))
3334 + tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count);
3335 #endif
3336
3337 if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
3338 diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S
3339 index b4f4733abc6e..5d2fd6cd3189 100644
3340 --- a/arch/sparc/mm/ultra.S
3341 +++ b/arch/sparc/mm/ultra.S
3342 @@ -30,7 +30,7 @@
3343 .text
3344 .align 32
3345 .globl __flush_tlb_mm
3346 -__flush_tlb_mm: /* 18 insns */
3347 +__flush_tlb_mm: /* 19 insns */
3348 /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
3349 ldxa [%o1] ASI_DMMU, %g2
3350 cmp %g2, %o0
3351 @@ -81,7 +81,7 @@ __flush_tlb_page: /* 22 insns */
3352
3353 .align 32
3354 .globl __flush_tlb_pending
3355 -__flush_tlb_pending: /* 26 insns */
3356 +__flush_tlb_pending: /* 27 insns */
3357 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
3358 rdpr %pstate, %g7
3359 sllx %o1, 3, %o1
3360 @@ -113,12 +113,14 @@ __flush_tlb_pending: /* 26 insns */
3361
3362 .align 32
3363 .globl __flush_tlb_kernel_range
3364 -__flush_tlb_kernel_range: /* 16 insns */
3365 +__flush_tlb_kernel_range: /* 31 insns */
3366 /* %o0=start, %o1=end */
3367 cmp %o0, %o1
3368 be,pn %xcc, 2f
3369 + sub %o1, %o0, %o3
3370 + srlx %o3, 18, %o4
3371 + brnz,pn %o4, __spitfire_flush_tlb_kernel_range_slow
3372 sethi %hi(PAGE_SIZE), %o4
3373 - sub %o1, %o0, %o3
3374 sub %o3, %o4, %o3
3375 or %o0, 0x20, %o0 ! Nucleus
3376 1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
3377 @@ -131,6 +133,41 @@ __flush_tlb_kernel_range: /* 16 insns */
3378 retl
3379 nop
3380 nop
3381 + nop
3382 + nop
3383 + nop
3384 + nop
3385 + nop
3386 + nop
3387 + nop
3388 + nop
3389 + nop
3390 + nop
3391 + nop
3392 + nop
3393 + nop
3394 +
3395 +__spitfire_flush_tlb_kernel_range_slow:
3396 + mov 63 * 8, %o4
3397 +1: ldxa [%o4] ASI_ITLB_DATA_ACCESS, %o3
3398 + andcc %o3, 0x40, %g0 /* _PAGE_L_4U */
3399 + bne,pn %xcc, 2f
3400 + mov TLB_TAG_ACCESS, %o3
3401 + stxa %g0, [%o3] ASI_IMMU
3402 + stxa %g0, [%o4] ASI_ITLB_DATA_ACCESS
3403 + membar #Sync
3404 +2: ldxa [%o4] ASI_DTLB_DATA_ACCESS, %o3
3405 + andcc %o3, 0x40, %g0
3406 + bne,pn %xcc, 2f
3407 + mov TLB_TAG_ACCESS, %o3
3408 + stxa %g0, [%o3] ASI_DMMU
3409 + stxa %g0, [%o4] ASI_DTLB_DATA_ACCESS
3410 + membar #Sync
3411 +2: sub %o4, 8, %o4
3412 + brgez,pt %o4, 1b
3413 + nop
3414 + retl
3415 + nop
3416
3417 __spitfire_flush_tlb_mm_slow:
3418 rdpr %pstate, %g1
3419 @@ -285,6 +322,40 @@ __cheetah_flush_tlb_pending: /* 27 insns */
3420 retl
3421 wrpr %g7, 0x0, %pstate
3422
3423 +__cheetah_flush_tlb_kernel_range: /* 31 insns */
3424 + /* %o0=start, %o1=end */
3425 + cmp %o0, %o1
3426 + be,pn %xcc, 2f
3427 + sub %o1, %o0, %o3
3428 + srlx %o3, 18, %o4
3429 + brnz,pn %o4, 3f
3430 + sethi %hi(PAGE_SIZE), %o4
3431 + sub %o3, %o4, %o3
3432 + or %o0, 0x20, %o0 ! Nucleus
3433 +1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
3434 + stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP
3435 + membar #Sync
3436 + brnz,pt %o3, 1b
3437 + sub %o3, %o4, %o3
3438 +2: sethi %hi(KERNBASE), %o3
3439 + flush %o3
3440 + retl
3441 + nop
3442 +3: mov 0x80, %o4
3443 + stxa %g0, [%o4] ASI_DMMU_DEMAP
3444 + membar #Sync
3445 + stxa %g0, [%o4] ASI_IMMU_DEMAP
3446 + membar #Sync
3447 + retl
3448 + nop
3449 + nop
3450 + nop
3451 + nop
3452 + nop
3453 + nop
3454 + nop
3455 + nop
3456 +
3457 #ifdef DCACHE_ALIASING_POSSIBLE
3458 __cheetah_flush_dcache_page: /* 11 insns */
3459 sethi %hi(PAGE_OFFSET), %g1
3460 @@ -309,19 +380,28 @@ __hypervisor_tlb_tl0_error:
3461 ret
3462 restore
3463
3464 -__hypervisor_flush_tlb_mm: /* 10 insns */
3465 +__hypervisor_flush_tlb_mm: /* 19 insns */
3466 mov %o0, %o2 /* ARG2: mmu context */
3467 mov 0, %o0 /* ARG0: CPU lists unimplemented */
3468 mov 0, %o1 /* ARG1: CPU lists unimplemented */
3469 mov HV_MMU_ALL, %o3 /* ARG3: flags */
3470 mov HV_FAST_MMU_DEMAP_CTX, %o5
3471 ta HV_FAST_TRAP
3472 - brnz,pn %o0, __hypervisor_tlb_tl0_error
3473 + brnz,pn %o0, 1f
3474 mov HV_FAST_MMU_DEMAP_CTX, %o1
3475 retl
3476 nop
3477 +1: sethi %hi(__hypervisor_tlb_tl0_error), %o5
3478 + jmpl %o5 + %lo(__hypervisor_tlb_tl0_error), %g0
3479 + nop
3480 + nop
3481 + nop
3482 + nop
3483 + nop
3484 + nop
3485 + nop
3486
3487 -__hypervisor_flush_tlb_page: /* 11 insns */
3488 +__hypervisor_flush_tlb_page: /* 22 insns */
3489 /* %o0 = context, %o1 = vaddr */
3490 mov %o0, %g2
3491 mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */
3492 @@ -330,12 +410,23 @@ __hypervisor_flush_tlb_page: /* 11 insns */
3493 srlx %o0, PAGE_SHIFT, %o0
3494 sllx %o0, PAGE_SHIFT, %o0
3495 ta HV_MMU_UNMAP_ADDR_TRAP
3496 - brnz,pn %o0, __hypervisor_tlb_tl0_error
3497 + brnz,pn %o0, 1f
3498 mov HV_MMU_UNMAP_ADDR_TRAP, %o1
3499 retl
3500 nop
3501 +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2
3502 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
3503 + nop
3504 + nop
3505 + nop
3506 + nop
3507 + nop
3508 + nop
3509 + nop
3510 + nop
3511 + nop
3512
3513 -__hypervisor_flush_tlb_pending: /* 16 insns */
3514 +__hypervisor_flush_tlb_pending: /* 27 insns */
3515 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
3516 sllx %o1, 3, %g1
3517 mov %o2, %g2
3518 @@ -347,31 +438,57 @@ __hypervisor_flush_tlb_pending: /* 16 insns */
3519 srlx %o0, PAGE_SHIFT, %o0
3520 sllx %o0, PAGE_SHIFT, %o0
3521 ta HV_MMU_UNMAP_ADDR_TRAP
3522 - brnz,pn %o0, __hypervisor_tlb_tl0_error
3523 + brnz,pn %o0, 1f
3524 mov HV_MMU_UNMAP_ADDR_TRAP, %o1
3525 brnz,pt %g1, 1b
3526 nop
3527 retl
3528 nop
3529 +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2
3530 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
3531 + nop
3532 + nop
3533 + nop
3534 + nop
3535 + nop
3536 + nop
3537 + nop
3538 + nop
3539 + nop
3540
3541 -__hypervisor_flush_tlb_kernel_range: /* 16 insns */
3542 +__hypervisor_flush_tlb_kernel_range: /* 31 insns */
3543 /* %o0=start, %o1=end */
3544 cmp %o0, %o1
3545 be,pn %xcc, 2f
3546 - sethi %hi(PAGE_SIZE), %g3
3547 - mov %o0, %g1
3548 - sub %o1, %g1, %g2
3549 + sub %o1, %o0, %g2
3550 + srlx %g2, 18, %g3
3551 + brnz,pn %g3, 4f
3552 + mov %o0, %g1
3553 + sethi %hi(PAGE_SIZE), %g3
3554 sub %g2, %g3, %g2
3555 1: add %g1, %g2, %o0 /* ARG0: virtual address */
3556 mov 0, %o1 /* ARG1: mmu context */
3557 mov HV_MMU_ALL, %o2 /* ARG2: flags */
3558 ta HV_MMU_UNMAP_ADDR_TRAP
3559 - brnz,pn %o0, __hypervisor_tlb_tl0_error
3560 + brnz,pn %o0, 3f
3561 mov HV_MMU_UNMAP_ADDR_TRAP, %o1
3562 brnz,pt %g2, 1b
3563 sub %g2, %g3, %g2
3564 2: retl
3565 nop
3566 +3: sethi %hi(__hypervisor_tlb_tl0_error), %o2
3567 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
3568 + nop
3569 +4: mov 0, %o0 /* ARG0: CPU lists unimplemented */
3570 + mov 0, %o1 /* ARG1: CPU lists unimplemented */
3571 + mov 0, %o2 /* ARG2: mmu context == nucleus */
3572 + mov HV_MMU_ALL, %o3 /* ARG3: flags */
3573 + mov HV_FAST_MMU_DEMAP_CTX, %o5
3574 + ta HV_FAST_TRAP
3575 + brnz,pn %o0, 3b
3576 + mov HV_FAST_MMU_DEMAP_CTX, %o1
3577 + retl
3578 + nop
3579
3580 #ifdef DCACHE_ALIASING_POSSIBLE
3581 /* XXX Niagara and friends have an 8K cache, so no aliasing is
3582 @@ -394,43 +511,6 @@ tlb_patch_one:
3583 retl
3584 nop
3585
3586 - .globl cheetah_patch_cachetlbops
3587 -cheetah_patch_cachetlbops:
3588 - save %sp, -128, %sp
3589 -
3590 - sethi %hi(__flush_tlb_mm), %o0
3591 - or %o0, %lo(__flush_tlb_mm), %o0
3592 - sethi %hi(__cheetah_flush_tlb_mm), %o1
3593 - or %o1, %lo(__cheetah_flush_tlb_mm), %o1
3594 - call tlb_patch_one
3595 - mov 19, %o2
3596 -
3597 - sethi %hi(__flush_tlb_page), %o0
3598 - or %o0, %lo(__flush_tlb_page), %o0
3599 - sethi %hi(__cheetah_flush_tlb_page), %o1
3600 - or %o1, %lo(__cheetah_flush_tlb_page), %o1
3601 - call tlb_patch_one
3602 - mov 22, %o2
3603 -
3604 - sethi %hi(__flush_tlb_pending), %o0
3605 - or %o0, %lo(__flush_tlb_pending), %o0
3606 - sethi %hi(__cheetah_flush_tlb_pending), %o1
3607 - or %o1, %lo(__cheetah_flush_tlb_pending), %o1
3608 - call tlb_patch_one
3609 - mov 27, %o2
3610 -
3611 -#ifdef DCACHE_ALIASING_POSSIBLE
3612 - sethi %hi(__flush_dcache_page), %o0
3613 - or %o0, %lo(__flush_dcache_page), %o0
3614 - sethi %hi(__cheetah_flush_dcache_page), %o1
3615 - or %o1, %lo(__cheetah_flush_dcache_page), %o1
3616 - call tlb_patch_one
3617 - mov 11, %o2
3618 -#endif /* DCACHE_ALIASING_POSSIBLE */
3619 -
3620 - ret
3621 - restore
3622 -
3623 #ifdef CONFIG_SMP
3624 /* These are all called by the slaves of a cross call, at
3625 * trap level 1, with interrupts fully disabled.
3626 @@ -447,7 +527,7 @@ cheetah_patch_cachetlbops:
3627 */
3628 .align 32
3629 .globl xcall_flush_tlb_mm
3630 -xcall_flush_tlb_mm: /* 21 insns */
3631 +xcall_flush_tlb_mm: /* 24 insns */
3632 mov PRIMARY_CONTEXT, %g2
3633 ldxa [%g2] ASI_DMMU, %g3
3634 srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4
3635 @@ -469,9 +549,12 @@ xcall_flush_tlb_mm: /* 21 insns */
3636 nop
3637 nop
3638 nop
3639 + nop
3640 + nop
3641 + nop
3642
3643 .globl xcall_flush_tlb_page
3644 -xcall_flush_tlb_page: /* 17 insns */
3645 +xcall_flush_tlb_page: /* 20 insns */
3646 /* %g5=context, %g1=vaddr */
3647 mov PRIMARY_CONTEXT, %g4
3648 ldxa [%g4] ASI_DMMU, %g2
3649 @@ -490,15 +573,20 @@ xcall_flush_tlb_page: /* 17 insns */
3650 retry
3651 nop
3652 nop
3653 + nop
3654 + nop
3655 + nop
3656
3657 .globl xcall_flush_tlb_kernel_range
3658 -xcall_flush_tlb_kernel_range: /* 25 insns */
3659 +xcall_flush_tlb_kernel_range: /* 44 insns */
3660 sethi %hi(PAGE_SIZE - 1), %g2
3661 or %g2, %lo(PAGE_SIZE - 1), %g2
3662 andn %g1, %g2, %g1
3663 andn %g7, %g2, %g7
3664 sub %g7, %g1, %g3
3665 - add %g2, 1, %g2
3666 + srlx %g3, 18, %g2
3667 + brnz,pn %g2, 2f
3668 + add %g2, 1, %g2
3669 sub %g3, %g2, %g3
3670 or %g1, 0x20, %g1 ! Nucleus
3671 1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
3672 @@ -507,8 +595,25 @@ xcall_flush_tlb_kernel_range: /* 25 insns */
3673 brnz,pt %g3, 1b
3674 sub %g3, %g2, %g3
3675 retry
3676 - nop
3677 - nop
3678 +2: mov 63 * 8, %g1
3679 +1: ldxa [%g1] ASI_ITLB_DATA_ACCESS, %g2
3680 + andcc %g2, 0x40, %g0 /* _PAGE_L_4U */
3681 + bne,pn %xcc, 2f
3682 + mov TLB_TAG_ACCESS, %g2
3683 + stxa %g0, [%g2] ASI_IMMU
3684 + stxa %g0, [%g1] ASI_ITLB_DATA_ACCESS
3685 + membar #Sync
3686 +2: ldxa [%g1] ASI_DTLB_DATA_ACCESS, %g2
3687 + andcc %g2, 0x40, %g0
3688 + bne,pn %xcc, 2f
3689 + mov TLB_TAG_ACCESS, %g2
3690 + stxa %g0, [%g2] ASI_DMMU
3691 + stxa %g0, [%g1] ASI_DTLB_DATA_ACCESS
3692 + membar #Sync
3693 +2: sub %g1, 8, %g1
3694 + brgez,pt %g1, 1b
3695 + nop
3696 + retry
3697 nop
3698 nop
3699 nop
3700 @@ -637,6 +742,52 @@ xcall_fetch_glob_pmu_n4:
3701
3702 retry
3703
3704 +__cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */
3705 + sethi %hi(PAGE_SIZE - 1), %g2
3706 + or %g2, %lo(PAGE_SIZE - 1), %g2
3707 + andn %g1, %g2, %g1
3708 + andn %g7, %g2, %g7
3709 + sub %g7, %g1, %g3
3710 + srlx %g3, 18, %g2
3711 + brnz,pn %g2, 2f
3712 + add %g2, 1, %g2
3713 + sub %g3, %g2, %g3
3714 + or %g1, 0x20, %g1 ! Nucleus
3715 +1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
3716 + stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP
3717 + membar #Sync
3718 + brnz,pt %g3, 1b
3719 + sub %g3, %g2, %g3
3720 + retry
3721 +2: mov 0x80, %g2
3722 + stxa %g0, [%g2] ASI_DMMU_DEMAP
3723 + membar #Sync
3724 + stxa %g0, [%g2] ASI_IMMU_DEMAP
3725 + membar #Sync
3726 + retry
3727 + nop
3728 + nop
3729 + nop
3730 + nop
3731 + nop
3732 + nop
3733 + nop
3734 + nop
3735 + nop
3736 + nop
3737 + nop
3738 + nop
3739 + nop
3740 + nop
3741 + nop
3742 + nop
3743 + nop
3744 + nop
3745 + nop
3746 + nop
3747 + nop
3748 + nop
3749 +
3750 #ifdef DCACHE_ALIASING_POSSIBLE
3751 .align 32
3752 .globl xcall_flush_dcache_page_cheetah
3753 @@ -700,7 +851,7 @@ __hypervisor_tlb_xcall_error:
3754 ba,a,pt %xcc, rtrap
3755
3756 .globl __hypervisor_xcall_flush_tlb_mm
3757 -__hypervisor_xcall_flush_tlb_mm: /* 21 insns */
3758 +__hypervisor_xcall_flush_tlb_mm: /* 24 insns */
3759 /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */
3760 mov %o0, %g2
3761 mov %o1, %g3
3762 @@ -714,7 +865,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
3763 mov HV_FAST_MMU_DEMAP_CTX, %o5
3764 ta HV_FAST_TRAP
3765 mov HV_FAST_MMU_DEMAP_CTX, %g6
3766 - brnz,pn %o0, __hypervisor_tlb_xcall_error
3767 + brnz,pn %o0, 1f
3768 mov %o0, %g5
3769 mov %g2, %o0
3770 mov %g3, %o1
3771 @@ -723,9 +874,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
3772 mov %g7, %o5
3773 membar #Sync
3774 retry
3775 +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
3776 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
3777 + nop
3778
3779 .globl __hypervisor_xcall_flush_tlb_page
3780 -__hypervisor_xcall_flush_tlb_page: /* 17 insns */
3781 +__hypervisor_xcall_flush_tlb_page: /* 20 insns */
3782 /* %g5=ctx, %g1=vaddr */
3783 mov %o0, %g2
3784 mov %o1, %g3
3785 @@ -737,42 +891,64 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */
3786 sllx %o0, PAGE_SHIFT, %o0
3787 ta HV_MMU_UNMAP_ADDR_TRAP
3788 mov HV_MMU_UNMAP_ADDR_TRAP, %g6
3789 - brnz,a,pn %o0, __hypervisor_tlb_xcall_error
3790 + brnz,a,pn %o0, 1f
3791 mov %o0, %g5
3792 mov %g2, %o0
3793 mov %g3, %o1
3794 mov %g4, %o2
3795 membar #Sync
3796 retry
3797 +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
3798 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
3799 + nop
3800
3801 .globl __hypervisor_xcall_flush_tlb_kernel_range
3802 -__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */
3803 +__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */
3804 /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */
3805 sethi %hi(PAGE_SIZE - 1), %g2
3806 or %g2, %lo(PAGE_SIZE - 1), %g2
3807 andn %g1, %g2, %g1
3808 andn %g7, %g2, %g7
3809 sub %g7, %g1, %g3
3810 + srlx %g3, 18, %g7
3811 add %g2, 1, %g2
3812 sub %g3, %g2, %g3
3813 mov %o0, %g2
3814 mov %o1, %g4
3815 - mov %o2, %g7
3816 + brnz,pn %g7, 2f
3817 + mov %o2, %g7
3818 1: add %g1, %g3, %o0 /* ARG0: virtual address */
3819 mov 0, %o1 /* ARG1: mmu context */
3820 mov HV_MMU_ALL, %o2 /* ARG2: flags */
3821 ta HV_MMU_UNMAP_ADDR_TRAP
3822 mov HV_MMU_UNMAP_ADDR_TRAP, %g6
3823 - brnz,pn %o0, __hypervisor_tlb_xcall_error
3824 + brnz,pn %o0, 1f
3825 mov %o0, %g5
3826 sethi %hi(PAGE_SIZE), %o2
3827 brnz,pt %g3, 1b
3828 sub %g3, %o2, %g3
3829 - mov %g2, %o0
3830 +5: mov %g2, %o0
3831 mov %g4, %o1
3832 mov %g7, %o2
3833 membar #Sync
3834 retry
3835 +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
3836 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
3837 + nop
3838 +2: mov %o3, %g1
3839 + mov %o5, %g3
3840 + mov 0, %o0 /* ARG0: CPU lists unimplemented */
3841 + mov 0, %o1 /* ARG1: CPU lists unimplemented */
3842 + mov 0, %o2 /* ARG2: mmu context == nucleus */
3843 + mov HV_MMU_ALL, %o3 /* ARG3: flags */
3844 + mov HV_FAST_MMU_DEMAP_CTX, %o5
3845 + ta HV_FAST_TRAP
3846 + mov %g1, %o3
3847 + brz,pt %o0, 5b
3848 + mov %g3, %o5
3849 + mov HV_FAST_MMU_DEMAP_CTX, %g6
3850 + ba,pt %xcc, 1b
3851 + clr %g5
3852
3853 /* These just get rescheduled to PIL vectors. */
3854 .globl xcall_call_function
3855 @@ -809,6 +985,58 @@ xcall_kgdb_capture:
3856
3857 #endif /* CONFIG_SMP */
3858
3859 + .globl cheetah_patch_cachetlbops
3860 +cheetah_patch_cachetlbops:
3861 + save %sp, -128, %sp
3862 +
3863 + sethi %hi(__flush_tlb_mm), %o0
3864 + or %o0, %lo(__flush_tlb_mm), %o0
3865 + sethi %hi(__cheetah_flush_tlb_mm), %o1
3866 + or %o1, %lo(__cheetah_flush_tlb_mm), %o1
3867 + call tlb_patch_one
3868 + mov 19, %o2
3869 +
3870 + sethi %hi(__flush_tlb_page), %o0
3871 + or %o0, %lo(__flush_tlb_page), %o0
3872 + sethi %hi(__cheetah_flush_tlb_page), %o1
3873 + or %o1, %lo(__cheetah_flush_tlb_page), %o1
3874 + call tlb_patch_one
3875 + mov 22, %o2
3876 +
3877 + sethi %hi(__flush_tlb_pending), %o0
3878 + or %o0, %lo(__flush_tlb_pending), %o0
3879 + sethi %hi(__cheetah_flush_tlb_pending), %o1
3880 + or %o1, %lo(__cheetah_flush_tlb_pending), %o1
3881 + call tlb_patch_one
3882 + mov 27, %o2
3883 +
3884 + sethi %hi(__flush_tlb_kernel_range), %o0
3885 + or %o0, %lo(__flush_tlb_kernel_range), %o0
3886 + sethi %hi(__cheetah_flush_tlb_kernel_range), %o1
3887 + or %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1
3888 + call tlb_patch_one
3889 + mov 31, %o2
3890 +
3891 +#ifdef DCACHE_ALIASING_POSSIBLE
3892 + sethi %hi(__flush_dcache_page), %o0
3893 + or %o0, %lo(__flush_dcache_page), %o0
3894 + sethi %hi(__cheetah_flush_dcache_page), %o1
3895 + or %o1, %lo(__cheetah_flush_dcache_page), %o1
3896 + call tlb_patch_one
3897 + mov 11, %o2
3898 +#endif /* DCACHE_ALIASING_POSSIBLE */
3899 +
3900 +#ifdef CONFIG_SMP
3901 + sethi %hi(xcall_flush_tlb_kernel_range), %o0
3902 + or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
3903 + sethi %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1
3904 + or %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1
3905 + call tlb_patch_one
3906 + mov 44, %o2
3907 +#endif /* CONFIG_SMP */
3908 +
3909 + ret
3910 + restore
3911
3912 .globl hypervisor_patch_cachetlbops
3913 hypervisor_patch_cachetlbops:
3914 @@ -819,28 +1047,28 @@ hypervisor_patch_cachetlbops:
3915 sethi %hi(__hypervisor_flush_tlb_mm), %o1
3916 or %o1, %lo(__hypervisor_flush_tlb_mm), %o1
3917 call tlb_patch_one
3918 - mov 10, %o2
3919 + mov 19, %o2
3920
3921 sethi %hi(__flush_tlb_page), %o0
3922 or %o0, %lo(__flush_tlb_page), %o0
3923 sethi %hi(__hypervisor_flush_tlb_page), %o1
3924 or %o1, %lo(__hypervisor_flush_tlb_page), %o1
3925 call tlb_patch_one
3926 - mov 11, %o2
3927 + mov 22, %o2
3928
3929 sethi %hi(__flush_tlb_pending), %o0
3930 or %o0, %lo(__flush_tlb_pending), %o0
3931 sethi %hi(__hypervisor_flush_tlb_pending), %o1
3932 or %o1, %lo(__hypervisor_flush_tlb_pending), %o1
3933 call tlb_patch_one
3934 - mov 16, %o2
3935 + mov 27, %o2
3936
3937 sethi %hi(__flush_tlb_kernel_range), %o0
3938 or %o0, %lo(__flush_tlb_kernel_range), %o0
3939 sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1
3940 or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1
3941 call tlb_patch_one
3942 - mov 16, %o2
3943 + mov 31, %o2
3944
3945 #ifdef DCACHE_ALIASING_POSSIBLE
3946 sethi %hi(__flush_dcache_page), %o0
3947 @@ -857,21 +1085,21 @@ hypervisor_patch_cachetlbops:
3948 sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1
3949 or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1
3950 call tlb_patch_one
3951 - mov 21, %o2
3952 + mov 24, %o2
3953
3954 sethi %hi(xcall_flush_tlb_page), %o0
3955 or %o0, %lo(xcall_flush_tlb_page), %o0
3956 sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1
3957 or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1
3958 call tlb_patch_one
3959 - mov 17, %o2
3960 + mov 20, %o2
3961
3962 sethi %hi(xcall_flush_tlb_kernel_range), %o0
3963 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
3964 sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1
3965 or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1
3966 call tlb_patch_one
3967 - mov 25, %o2
3968 + mov 44, %o2
3969 #endif /* CONFIG_SMP */
3970
3971 ret
3972 diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
3973 index c32f5d32f811..b56c9c581359 100644
3974 --- a/drivers/net/ethernet/broadcom/bgmac.c
3975 +++ b/drivers/net/ethernet/broadcom/bgmac.c
3976 @@ -314,6 +314,10 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
3977 u32 ctl;
3978
3979 ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL);
3980 +
3981 + /* preserve ONLY bits 16-17 from current hardware value */
3982 + ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
3983 +
3984 if (bgmac->core->id.rev >= 4) {
3985 ctl &= ~BGMAC_DMA_RX_BL_MASK;
3986 ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT;
3987 @@ -324,7 +328,6 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
3988 ctl &= ~BGMAC_DMA_RX_PT_MASK;
3989 ctl |= BGMAC_DMA_RX_PT_1 << BGMAC_DMA_RX_PT_SHIFT;
3990 }
3991 - ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
3992 ctl |= BGMAC_DMA_RX_ENABLE;
3993 ctl |= BGMAC_DMA_RX_PARITY_DISABLE;
3994 ctl |= BGMAC_DMA_RX_OVERFLOW_CONT;
3995 diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
3996 index ca0d3802f2af..4e603d060e80 100644
3997 --- a/drivers/tty/serial/sunhv.c
3998 +++ b/drivers/tty/serial/sunhv.c
3999 @@ -490,12 +490,6 @@ static void sunhv_console_write_bychar(struct console *con, const char *s, unsig
4000 locked = spin_trylock_irqsave(&port->lock, flags);
4001 else
4002 spin_lock_irqsave(&port->lock, flags);
4003 - if (port->sysrq) {
4004 - locked = 0;
4005 - } else if (oops_in_progress) {
4006 - locked = spin_trylock(&port->lock);
4007 - } else
4008 - spin_lock(&port->lock);
4009
4010 for (i = 0; i < n; i++) {
4011 if (*s == '\n')
4012 diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
4013 index 629e3c865072..9bee25cfa0be 100644
4014 --- a/drivers/tty/tty_ldisc.c
4015 +++ b/drivers/tty/tty_ldisc.c
4016 @@ -417,6 +417,10 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush);
4017 * they are not on hot paths so a little discipline won't do
4018 * any harm.
4019 *
4020 + * The line discipline-related tty_struct fields are reset to
4021 + * prevent the ldisc driver from re-using stale information for
4022 + * the new ldisc instance.
4023 + *
4024 * Locking: takes termios_rwsem
4025 */
4026
4027 @@ -425,6 +429,9 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
4028 down_write(&tty->termios_rwsem);
4029 tty->termios.c_line = num;
4030 up_write(&tty->termios_rwsem);
4031 +
4032 + tty->disc_data = NULL;
4033 + tty->receive_room = 0;
4034 }
4035
4036 /**
4037 diff --git a/include/linux/filter.h b/include/linux/filter.h
4038 index 5110d4211866..ccb98b459c59 100644
4039 --- a/include/linux/filter.h
4040 +++ b/include/linux/filter.h
4041 @@ -421,7 +421,11 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
4042 }
4043 #endif /* CONFIG_DEBUG_SET_MODULE_RONX */
4044
4045 -int sk_filter(struct sock *sk, struct sk_buff *skb);
4046 +int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
4047 +static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
4048 +{
4049 + return sk_filter_trim_cap(sk, skb, 1);
4050 +}
4051
4052 int bpf_prog_select_runtime(struct bpf_prog *fp);
4053 void bpf_prog_free(struct bpf_prog *fp);
4054 diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
4055 index ff788b665277..9c2c044153f6 100644
4056 --- a/include/net/ip6_tunnel.h
4057 +++ b/include/net/ip6_tunnel.h
4058 @@ -86,6 +86,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
4059 struct net_device_stats *stats = &dev->stats;
4060 int pkt_len, err;
4061
4062 + memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
4063 pkt_len = skb->len - skb_inner_network_offset(skb);
4064 err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
4065
4066 diff --git a/include/net/tcp.h b/include/net/tcp.h
4067 index 9c3ab544d3a8..e9d7a8ef9a6d 100644
4068 --- a/include/net/tcp.h
4069 +++ b/include/net/tcp.h
4070 @@ -1156,6 +1156,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp)
4071 }
4072
4073 bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
4074 +int tcp_filter(struct sock *sk, struct sk_buff *skb);
4075
4076 #undef STATE_TRACE
4077
4078 diff --git a/net/core/dev.c b/net/core/dev.c
4079 index b3fa4b86ab4c..9ca749c81b6c 100644
4080 --- a/net/core/dev.c
4081 +++ b/net/core/dev.c
4082 @@ -2462,7 +2462,7 @@ int skb_checksum_help(struct sk_buff *skb)
4083 goto out;
4084 }
4085
4086 - *(__sum16 *)(skb->data + offset) = csum_fold(csum);
4087 + *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
4088 out_set_summed:
4089 skb->ip_summed = CHECKSUM_NONE;
4090 out:
4091 diff --git a/net/core/filter.c b/net/core/filter.c
4092 index 75e9b2b2336d..e94355452166 100644
4093 --- a/net/core/filter.c
4094 +++ b/net/core/filter.c
4095 @@ -52,9 +52,10 @@
4096 #include <net/dst.h>
4097
4098 /**
4099 - * sk_filter - run a packet through a socket filter
4100 + * sk_filter_trim_cap - run a packet through a socket filter
4101 * @sk: sock associated with &sk_buff
4102 * @skb: buffer to filter
4103 + * @cap: limit on how short the eBPF program may trim the packet
4104 *
4105 * Run the eBPF program and then cut skb->data to correct size returned by
4106 * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
4107 @@ -63,7 +64,7 @@
4108 * be accepted or -EPERM if the packet should be tossed.
4109 *
4110 */
4111 -int sk_filter(struct sock *sk, struct sk_buff *skb)
4112 +int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
4113 {
4114 int err;
4115 struct sk_filter *filter;
4116 @@ -84,14 +85,13 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
4117 filter = rcu_dereference(sk->sk_filter);
4118 if (filter) {
4119 unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
4120 -
4121 - err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
4122 + err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
4123 }
4124 rcu_read_unlock();
4125
4126 return err;
4127 }
4128 -EXPORT_SYMBOL(sk_filter);
4129 +EXPORT_SYMBOL(sk_filter_trim_cap);
4130
4131 static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
4132 {
4133 diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
4134 index 4ab6ead3d8ee..9aba9e93c0a2 100644
4135 --- a/net/core/flow_dissector.c
4136 +++ b/net/core/flow_dissector.c
4137 @@ -131,7 +131,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
4138 struct flow_dissector_key_tags *key_tags;
4139 struct flow_dissector_key_keyid *key_keyid;
4140 u8 ip_proto = 0;
4141 - bool ret = false;
4142 + bool ret;
4143
4144 if (!data) {
4145 data = skb->data;
4146 @@ -492,12 +492,17 @@ ip_proto_again:
4147 out_good:
4148 ret = true;
4149
4150 -out_bad:
4151 + key_control->thoff = (u16)nhoff;
4152 +out:
4153 key_basic->n_proto = proto;
4154 key_basic->ip_proto = ip_proto;
4155 - key_control->thoff = (u16)nhoff;
4156
4157 return ret;
4158 +
4159 +out_bad:
4160 + ret = false;
4161 + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
4162 + goto out;
4163 }
4164 EXPORT_SYMBOL(__skb_flow_dissect);
4165
4166 diff --git a/net/core/sock.c b/net/core/sock.c
4167 index 0d91f7dca751..88f017854509 100644
4168 --- a/net/core/sock.c
4169 +++ b/net/core/sock.c
4170 @@ -1562,6 +1562,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
4171 }
4172
4173 newsk->sk_err = 0;
4174 + newsk->sk_err_soft = 0;
4175 newsk->sk_priority = 0;
4176 newsk->sk_incoming_cpu = raw_smp_processor_id();
4177 atomic64_set(&newsk->sk_cookie, 0);
4178 diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
4179 index 8be8f27bfacc..861e1fa25d5e 100644
4180 --- a/net/dccp/ipv4.c
4181 +++ b/net/dccp/ipv4.c
4182 @@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
4183 {
4184 const struct iphdr *iph = (struct iphdr *)skb->data;
4185 const u8 offset = iph->ihl << 2;
4186 - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
4187 + const struct dccp_hdr *dh;
4188 struct dccp_sock *dp;
4189 struct inet_sock *inet;
4190 const int type = icmp_hdr(skb)->type;
4191 @@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
4192 int err;
4193 struct net *net = dev_net(skb->dev);
4194
4195 - if (skb->len < offset + sizeof(*dh) ||
4196 - skb->len < offset + __dccp_basic_hdr_len(dh)) {
4197 - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
4198 - return;
4199 - }
4200 + /* Only need dccph_dport & dccph_sport which are the first
4201 + * 4 bytes in dccp header.
4202 + * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
4203 + */
4204 + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
4205 + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
4206 + dh = (struct dccp_hdr *)(skb->data + offset);
4207
4208 sk = __inet_lookup_established(net, &dccp_hashinfo,
4209 iph->daddr, dh->dccph_dport,
4210 diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
4211 index b8608b71a66d..27c4e81efa24 100644
4212 --- a/net/dccp/ipv6.c
4213 +++ b/net/dccp/ipv6.c
4214 @@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
4215 u8 type, u8 code, int offset, __be32 info)
4216 {
4217 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
4218 - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
4219 + const struct dccp_hdr *dh;
4220 struct dccp_sock *dp;
4221 struct ipv6_pinfo *np;
4222 struct sock *sk;
4223 @@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
4224 __u64 seq;
4225 struct net *net = dev_net(skb->dev);
4226
4227 - if (skb->len < offset + sizeof(*dh) ||
4228 - skb->len < offset + __dccp_basic_hdr_len(dh)) {
4229 - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
4230 - ICMP6_MIB_INERRORS);
4231 - return;
4232 - }
4233 + /* Only need dccph_dport & dccph_sport which are the first
4234 + * 4 bytes in dccp header.
4235 + * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
4236 + */
4237 + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
4238 + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
4239 + dh = (struct dccp_hdr *)(skb->data + offset);
4240
4241 sk = __inet6_lookup_established(net, &dccp_hashinfo,
4242 &hdr->daddr, dh->dccph_dport,
4243 @@ -947,6 +948,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
4244 .getsockopt = ipv6_getsockopt,
4245 .addr2sockaddr = inet6_csk_addr2sockaddr,
4246 .sockaddr_len = sizeof(struct sockaddr_in6),
4247 + .bind_conflict = inet6_csk_bind_conflict,
4248 #ifdef CONFIG_COMPAT
4249 .compat_setsockopt = compat_ipv6_setsockopt,
4250 .compat_getsockopt = compat_ipv6_getsockopt,
4251 diff --git a/net/dccp/proto.c b/net/dccp/proto.c
4252 index 41e65804ddf5..9fe25bf63296 100644
4253 --- a/net/dccp/proto.c
4254 +++ b/net/dccp/proto.c
4255 @@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long timeout)
4256 __kfree_skb(skb);
4257 }
4258
4259 + /* If socket has been already reset kill it. */
4260 + if (sk->sk_state == DCCP_CLOSED)
4261 + goto adjudge_to_death;
4262 +
4263 if (data_was_unread) {
4264 /* Unread data was tossed, send an appropriate Reset Code */
4265 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
4266 diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
4267 index e5a3ff210fec..7c52afb98c42 100644
4268 --- a/net/ipv4/fib_trie.c
4269 +++ b/net/ipv4/fib_trie.c
4270 @@ -2456,22 +2456,19 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
4271 struct key_vector *l, **tp = &iter->tnode;
4272 t_key key;
4273
4274 - /* use cache location of next-to-find key */
4275 + /* use cached location of previously found key */
4276 if (iter->pos > 0 && pos >= iter->pos) {
4277 - pos -= iter->pos;
4278 key = iter->key;
4279 } else {
4280 - iter->pos = 0;
4281 + iter->pos = 1;
4282 key = 0;
4283 }
4284
4285 - while ((l = leaf_walk_rcu(tp, key)) != NULL) {
4286 + pos -= iter->pos;
4287 +
4288 + while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
4289 key = l->key + 1;
4290 iter->pos++;
4291 -
4292 - if (--pos <= 0)
4293 - break;
4294 -
4295 l = NULL;
4296
4297 /* handle unlikely case of a key wrap */
4298 @@ -2480,7 +2477,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
4299 }
4300
4301 if (l)
4302 - iter->key = key; /* remember it */
4303 + iter->key = l->key; /* remember it */
4304 else
4305 iter->pos = 0; /* forget it */
4306
4307 @@ -2508,7 +2505,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
4308 return fib_route_get_idx(iter, *pos);
4309
4310 iter->pos = 0;
4311 - iter->key = 0;
4312 + iter->key = KEY_MAX;
4313
4314 return SEQ_START_TOKEN;
4315 }
4316 @@ -2517,7 +2514,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4317 {
4318 struct fib_route_iter *iter = seq->private;
4319 struct key_vector *l = NULL;
4320 - t_key key = iter->key;
4321 + t_key key = iter->key + 1;
4322
4323 ++*pos;
4324
4325 @@ -2526,7 +2523,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4326 l = leaf_walk_rcu(&iter->tnode, key);
4327
4328 if (l) {
4329 - iter->key = l->key + 1;
4330 + iter->key = l->key;
4331 iter->pos++;
4332 } else {
4333 iter->pos = 0;
4334 diff --git a/net/ipv4/route.c b/net/ipv4/route.c
4335 index 8533a75a9328..7ceb8a574a50 100644
4336 --- a/net/ipv4/route.c
4337 +++ b/net/ipv4/route.c
4338 @@ -747,7 +747,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
4339 goto reject_redirect;
4340 }
4341
4342 - n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
4343 + n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
4344 + if (!n)
4345 + n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
4346 if (!IS_ERR(n)) {
4347 if (!(n->nud_state & NUD_VALID)) {
4348 neigh_event_send(n, NULL);
4349 diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
4350 index 036a76ba2ac2..69daa81736f6 100644
4351 --- a/net/ipv4/tcp.c
4352 +++ b/net/ipv4/tcp.c
4353 @@ -1212,7 +1212,7 @@ new_segment:
4354
4355 if (!skb_can_coalesce(skb, i, pfrag->page,
4356 pfrag->offset)) {
4357 - if (i == sysctl_max_skb_frags || !sg) {
4358 + if (i >= sysctl_max_skb_frags || !sg) {
4359 tcp_mark_push(tp, skb);
4360 goto new_segment;
4361 }
4362 diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
4363 index 7e538f71f5fb..55d7da1d2ce9 100644
4364 --- a/net/ipv4/tcp_dctcp.c
4365 +++ b/net/ipv4/tcp_dctcp.c
4366 @@ -56,6 +56,7 @@ struct dctcp {
4367 u32 next_seq;
4368 u32 ce_state;
4369 u32 delayed_ack_reserved;
4370 + u32 loss_cwnd;
4371 };
4372
4373 static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */
4374 @@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk)
4375 ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
4376
4377 ca->delayed_ack_reserved = 0;
4378 + ca->loss_cwnd = 0;
4379 ca->ce_state = 0;
4380
4381 dctcp_reset(tp, ca);
4382 @@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk)
4383
4384 static u32 dctcp_ssthresh(struct sock *sk)
4385 {
4386 - const struct dctcp *ca = inet_csk_ca(sk);
4387 + struct dctcp *ca = inet_csk_ca(sk);
4388 struct tcp_sock *tp = tcp_sk(sk);
4389
4390 + ca->loss_cwnd = tp->snd_cwnd;
4391 return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
4392 }
4393
4394 @@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
4395 return 0;
4396 }
4397
4398 +static u32 dctcp_cwnd_undo(struct sock *sk)
4399 +{
4400 + const struct dctcp *ca = inet_csk_ca(sk);
4401 +
4402 + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
4403 +}
4404 +
4405 static struct tcp_congestion_ops dctcp __read_mostly = {
4406 .init = dctcp_init,
4407 .in_ack_event = dctcp_update_alpha,
4408 .cwnd_event = dctcp_cwnd_event,
4409 .ssthresh = dctcp_ssthresh,
4410 .cong_avoid = tcp_reno_cong_avoid,
4411 + .undo_cwnd = dctcp_cwnd_undo,
4412 .set_state = dctcp_state,
4413 .get_info = dctcp_get_info,
4414 .flags = TCP_CONG_NEEDS_ECN,
4415 diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
4416 index b5853cac3269..b58a38eea059 100644
4417 --- a/net/ipv4/tcp_ipv4.c
4418 +++ b/net/ipv4/tcp_ipv4.c
4419 @@ -1533,6 +1533,21 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
4420 }
4421 EXPORT_SYMBOL(tcp_prequeue);
4422
4423 +int tcp_filter(struct sock *sk, struct sk_buff *skb)
4424 +{
4425 + struct tcphdr *th = (struct tcphdr *)skb->data;
4426 + unsigned int eaten = skb->len;
4427 + int err;
4428 +
4429 + err = sk_filter_trim_cap(sk, skb, th->doff * 4);
4430 + if (!err) {
4431 + eaten -= skb->len;
4432 + TCP_SKB_CB(skb)->end_seq -= eaten;
4433 + }
4434 + return err;
4435 +}
4436 +EXPORT_SYMBOL(tcp_filter);
4437 +
4438 /*
4439 * From tcp_input.c
4440 */
4441 @@ -1638,8 +1653,10 @@ process:
4442
4443 nf_reset(skb);
4444
4445 - if (sk_filter(sk, skb))
4446 + if (tcp_filter(sk, skb))
4447 goto discard_and_relse;
4448 + th = (const struct tcphdr *)skb->data;
4449 + iph = ip_hdr(skb);
4450
4451 skb->dev = NULL;
4452
4453 diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
4454 index fbd521fdae53..5f581616bf6a 100644
4455 --- a/net/ipv6/tcp_ipv6.c
4456 +++ b/net/ipv6/tcp_ipv6.c
4457 @@ -1214,7 +1214,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
4458 if (skb->protocol == htons(ETH_P_IP))
4459 return tcp_v4_do_rcv(sk, skb);
4460
4461 - if (sk_filter(sk, skb))
4462 + if (tcp_filter(sk, skb))
4463 goto discard;
4464
4465 /*
4466 @@ -1438,8 +1438,10 @@ process:
4467 if (tcp_v6_inbound_md5_hash(sk, skb))
4468 goto discard_and_relse;
4469
4470 - if (sk_filter(sk, skb))
4471 + if (tcp_filter(sk, skb))
4472 goto discard_and_relse;
4473 + th = (const struct tcphdr *)skb->data;
4474 + hdr = ipv6_hdr(skb);
4475
4476 skb->dev = NULL;
4477
4478 diff --git a/net/sctp/socket.c b/net/sctp/socket.c
4479 index 402817be3873..b5fd4ab56156 100644
4480 --- a/net/sctp/socket.c
4481 +++ b/net/sctp/socket.c
4482 @@ -1212,9 +1212,12 @@ static int __sctp_connect(struct sock *sk,
4483
4484 timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK);
4485
4486 - err = sctp_wait_for_connect(asoc, &timeo);
4487 - if ((err == 0 || err == -EINPROGRESS) && assoc_id)
4488 + if (assoc_id)
4489 *assoc_id = asoc->assoc_id;
4490 + err = sctp_wait_for_connect(asoc, &timeo);
4491 + /* Note: the asoc may be freed after the return of
4492 + * sctp_wait_for_connect.
4493 + */
4494
4495 /* Don't free association on exit. */
4496 asoc = NULL;
4497 diff --git a/net/socket.c b/net/socket.c
4498 index 263b334ec5e4..0090225eeb1e 100644
4499 --- a/net/socket.c
4500 +++ b/net/socket.c
4501 @@ -2041,6 +2041,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
4502 if (err)
4503 break;
4504 ++datagrams;
4505 + if (msg_data_left(&msg_sys))
4506 + break;
4507 }
4508
4509 fput_light(sock->file, fput_needed);