Annotation of /trunk/kernel-alx/patches-4.4/0133-4.4.34-all-fixes.patch
Parent Directory | Revision Log
Revision 2869 -
(hide annotations)
(download)
Mon Mar 27 13:49:12 2017 UTC (7 years, 6 months ago) by niro
File size: 133458 byte(s)
Mon Mar 27 13:49:12 2017 UTC (7 years, 6 months ago) by niro
File size: 133458 byte(s)
linux-4.4.34
1 | niro | 2869 | diff --git a/Makefile b/Makefile |
2 | index a513c045c8de..30924aabf1b4 100644 | ||
3 | --- a/Makefile | ||
4 | +++ b/Makefile | ||
5 | @@ -1,6 +1,6 @@ | ||
6 | VERSION = 4 | ||
7 | PATCHLEVEL = 4 | ||
8 | -SUBLEVEL = 33 | ||
9 | +SUBLEVEL = 34 | ||
10 | EXTRAVERSION = | ||
11 | NAME = Blurry Fish Butt | ||
12 | |||
13 | diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h | ||
14 | index 70067ce184b1..f7de0dbc38af 100644 | ||
15 | --- a/arch/sparc/include/asm/mmu_64.h | ||
16 | +++ b/arch/sparc/include/asm/mmu_64.h | ||
17 | @@ -92,7 +92,8 @@ struct tsb_config { | ||
18 | typedef struct { | ||
19 | spinlock_t lock; | ||
20 | unsigned long sparc64_ctx_val; | ||
21 | - unsigned long huge_pte_count; | ||
22 | + unsigned long hugetlb_pte_count; | ||
23 | + unsigned long thp_pte_count; | ||
24 | struct tsb_config tsb_block[MM_NUM_TSBS]; | ||
25 | struct hv_tsb_descr tsb_descr[MM_NUM_TSBS]; | ||
26 | } mm_context_t; | ||
27 | diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h | ||
28 | index ea6e9a20f3ff..f428512481f9 100644 | ||
29 | --- a/arch/sparc/include/asm/uaccess_64.h | ||
30 | +++ b/arch/sparc/include/asm/uaccess_64.h | ||
31 | @@ -98,7 +98,6 @@ struct exception_table_entry { | ||
32 | unsigned int insn, fixup; | ||
33 | }; | ||
34 | |||
35 | -void __ret_efault(void); | ||
36 | void __retl_efault(void); | ||
37 | |||
38 | /* Uh, these should become the main single-value transfer routines.. | ||
39 | @@ -179,20 +178,6 @@ int __put_user_bad(void); | ||
40 | __gu_ret; \ | ||
41 | }) | ||
42 | |||
43 | -#define __get_user_nocheck_ret(data, addr, size, type, retval) ({ \ | ||
44 | - register unsigned long __gu_val __asm__ ("l1"); \ | ||
45 | - switch (size) { \ | ||
46 | - case 1: __get_user_asm_ret(__gu_val, ub, addr, retval); break; \ | ||
47 | - case 2: __get_user_asm_ret(__gu_val, uh, addr, retval); break; \ | ||
48 | - case 4: __get_user_asm_ret(__gu_val, uw, addr, retval); break; \ | ||
49 | - case 8: __get_user_asm_ret(__gu_val, x, addr, retval); break; \ | ||
50 | - default: \ | ||
51 | - if (__get_user_bad()) \ | ||
52 | - return retval; \ | ||
53 | - } \ | ||
54 | - data = (__force type) __gu_val; \ | ||
55 | -}) | ||
56 | - | ||
57 | #define __get_user_asm(x, size, addr, ret) \ | ||
58 | __asm__ __volatile__( \ | ||
59 | "/* Get user asm, inline. */\n" \ | ||
60 | @@ -214,80 +199,35 @@ __asm__ __volatile__( \ | ||
61 | : "=r" (ret), "=r" (x) : "r" (__m(addr)), \ | ||
62 | "i" (-EFAULT)) | ||
63 | |||
64 | -#define __get_user_asm_ret(x, size, addr, retval) \ | ||
65 | -if (__builtin_constant_p(retval) && retval == -EFAULT) \ | ||
66 | - __asm__ __volatile__( \ | ||
67 | - "/* Get user asm ret, inline. */\n" \ | ||
68 | - "1:\t" "ld"#size "a [%1] %%asi, %0\n\n\t" \ | ||
69 | - ".section __ex_table,\"a\"\n\t" \ | ||
70 | - ".align 4\n\t" \ | ||
71 | - ".word 1b,__ret_efault\n\n\t" \ | ||
72 | - ".previous\n\t" \ | ||
73 | - : "=r" (x) : "r" (__m(addr))); \ | ||
74 | -else \ | ||
75 | - __asm__ __volatile__( \ | ||
76 | - "/* Get user asm ret, inline. */\n" \ | ||
77 | - "1:\t" "ld"#size "a [%1] %%asi, %0\n\n\t" \ | ||
78 | - ".section .fixup,#alloc,#execinstr\n\t" \ | ||
79 | - ".align 4\n" \ | ||
80 | - "3:\n\t" \ | ||
81 | - "ret\n\t" \ | ||
82 | - " restore %%g0, %2, %%o0\n\n\t" \ | ||
83 | - ".previous\n\t" \ | ||
84 | - ".section __ex_table,\"a\"\n\t" \ | ||
85 | - ".align 4\n\t" \ | ||
86 | - ".word 1b, 3b\n\n\t" \ | ||
87 | - ".previous\n\t" \ | ||
88 | - : "=r" (x) : "r" (__m(addr)), "i" (retval)) | ||
89 | - | ||
90 | int __get_user_bad(void); | ||
91 | |||
92 | unsigned long __must_check ___copy_from_user(void *to, | ||
93 | const void __user *from, | ||
94 | unsigned long size); | ||
95 | -unsigned long copy_from_user_fixup(void *to, const void __user *from, | ||
96 | - unsigned long size); | ||
97 | static inline unsigned long __must_check | ||
98 | copy_from_user(void *to, const void __user *from, unsigned long size) | ||
99 | { | ||
100 | - unsigned long ret = ___copy_from_user(to, from, size); | ||
101 | - | ||
102 | - if (unlikely(ret)) | ||
103 | - ret = copy_from_user_fixup(to, from, size); | ||
104 | - | ||
105 | - return ret; | ||
106 | + return ___copy_from_user(to, from, size); | ||
107 | } | ||
108 | #define __copy_from_user copy_from_user | ||
109 | |||
110 | unsigned long __must_check ___copy_to_user(void __user *to, | ||
111 | const void *from, | ||
112 | unsigned long size); | ||
113 | -unsigned long copy_to_user_fixup(void __user *to, const void *from, | ||
114 | - unsigned long size); | ||
115 | static inline unsigned long __must_check | ||
116 | copy_to_user(void __user *to, const void *from, unsigned long size) | ||
117 | { | ||
118 | - unsigned long ret = ___copy_to_user(to, from, size); | ||
119 | - | ||
120 | - if (unlikely(ret)) | ||
121 | - ret = copy_to_user_fixup(to, from, size); | ||
122 | - return ret; | ||
123 | + return ___copy_to_user(to, from, size); | ||
124 | } | ||
125 | #define __copy_to_user copy_to_user | ||
126 | |||
127 | unsigned long __must_check ___copy_in_user(void __user *to, | ||
128 | const void __user *from, | ||
129 | unsigned long size); | ||
130 | -unsigned long copy_in_user_fixup(void __user *to, void __user *from, | ||
131 | - unsigned long size); | ||
132 | static inline unsigned long __must_check | ||
133 | copy_in_user(void __user *to, void __user *from, unsigned long size) | ||
134 | { | ||
135 | - unsigned long ret = ___copy_in_user(to, from, size); | ||
136 | - | ||
137 | - if (unlikely(ret)) | ||
138 | - ret = copy_in_user_fixup(to, from, size); | ||
139 | - return ret; | ||
140 | + return ___copy_in_user(to, from, size); | ||
141 | } | ||
142 | #define __copy_in_user copy_in_user | ||
143 | |||
144 | diff --git a/arch/sparc/kernel/dtlb_prot.S b/arch/sparc/kernel/dtlb_prot.S | ||
145 | index d668ca149e64..4087a62f96b0 100644 | ||
146 | --- a/arch/sparc/kernel/dtlb_prot.S | ||
147 | +++ b/arch/sparc/kernel/dtlb_prot.S | ||
148 | @@ -25,13 +25,13 @@ | ||
149 | |||
150 | /* PROT ** ICACHE line 2: More real fault processing */ | ||
151 | ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5 | ||
152 | + srlx %g5, PAGE_SHIFT, %g5 | ||
153 | + sllx %g5, PAGE_SHIFT, %g5 ! Clear context ID bits | ||
154 | bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup | ||
155 | mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4 | ||
156 | ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault | ||
157 | nop | ||
158 | nop | ||
159 | - nop | ||
160 | - nop | ||
161 | |||
162 | /* PROT ** ICACHE line 3: Unused... */ | ||
163 | nop | ||
164 | diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S | ||
165 | index 51faf92ace00..7eeeb1d5a410 100644 | ||
166 | --- a/arch/sparc/kernel/head_64.S | ||
167 | +++ b/arch/sparc/kernel/head_64.S | ||
168 | @@ -922,47 +922,11 @@ prom_tba: .xword 0 | ||
169 | tlb_type: .word 0 /* Must NOT end up in BSS */ | ||
170 | .section ".fixup",#alloc,#execinstr | ||
171 | |||
172 | - .globl __ret_efault, __retl_efault, __ret_one, __retl_one | ||
173 | -ENTRY(__ret_efault) | ||
174 | - ret | ||
175 | - restore %g0, -EFAULT, %o0 | ||
176 | -ENDPROC(__ret_efault) | ||
177 | - | ||
178 | ENTRY(__retl_efault) | ||
179 | retl | ||
180 | mov -EFAULT, %o0 | ||
181 | ENDPROC(__retl_efault) | ||
182 | |||
183 | -ENTRY(__retl_one) | ||
184 | - retl | ||
185 | - mov 1, %o0 | ||
186 | -ENDPROC(__retl_one) | ||
187 | - | ||
188 | -ENTRY(__retl_one_fp) | ||
189 | - VISExitHalf | ||
190 | - retl | ||
191 | - mov 1, %o0 | ||
192 | -ENDPROC(__retl_one_fp) | ||
193 | - | ||
194 | -ENTRY(__ret_one_asi) | ||
195 | - wr %g0, ASI_AIUS, %asi | ||
196 | - ret | ||
197 | - restore %g0, 1, %o0 | ||
198 | -ENDPROC(__ret_one_asi) | ||
199 | - | ||
200 | -ENTRY(__retl_one_asi) | ||
201 | - wr %g0, ASI_AIUS, %asi | ||
202 | - retl | ||
203 | - mov 1, %o0 | ||
204 | -ENDPROC(__retl_one_asi) | ||
205 | - | ||
206 | -ENTRY(__retl_one_asi_fp) | ||
207 | - wr %g0, ASI_AIUS, %asi | ||
208 | - VISExitHalf | ||
209 | - retl | ||
210 | - mov 1, %o0 | ||
211 | -ENDPROC(__retl_one_asi_fp) | ||
212 | - | ||
213 | ENTRY(__retl_o1) | ||
214 | retl | ||
215 | mov %o1, %o0 | ||
216 | diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c | ||
217 | index 59bbeff55024..07933b9e9ce0 100644 | ||
218 | --- a/arch/sparc/kernel/jump_label.c | ||
219 | +++ b/arch/sparc/kernel/jump_label.c | ||
220 | @@ -13,19 +13,30 @@ | ||
221 | void arch_jump_label_transform(struct jump_entry *entry, | ||
222 | enum jump_label_type type) | ||
223 | { | ||
224 | - u32 val; | ||
225 | u32 *insn = (u32 *) (unsigned long) entry->code; | ||
226 | + u32 val; | ||
227 | |||
228 | if (type == JUMP_LABEL_JMP) { | ||
229 | s32 off = (s32)entry->target - (s32)entry->code; | ||
230 | + bool use_v9_branch = false; | ||
231 | + | ||
232 | + BUG_ON(off & 3); | ||
233 | |||
234 | #ifdef CONFIG_SPARC64 | ||
235 | - /* ba,pt %xcc, . + (off << 2) */ | ||
236 | - val = 0x10680000 | ((u32) off >> 2); | ||
237 | -#else | ||
238 | - /* ba . + (off << 2) */ | ||
239 | - val = 0x10800000 | ((u32) off >> 2); | ||
240 | + if (off <= 0xfffff && off >= -0x100000) | ||
241 | + use_v9_branch = true; | ||
242 | #endif | ||
243 | + if (use_v9_branch) { | ||
244 | + /* WDISP19 - target is . + immed << 2 */ | ||
245 | + /* ba,pt %xcc, . + off */ | ||
246 | + val = 0x10680000 | (((u32) off >> 2) & 0x7ffff); | ||
247 | + } else { | ||
248 | + /* WDISP22 - target is . + immed << 2 */ | ||
249 | + BUG_ON(off > 0x7fffff); | ||
250 | + BUG_ON(off < -0x800000); | ||
251 | + /* ba . + off */ | ||
252 | + val = 0x10800000 | (((u32) off >> 2) & 0x3fffff); | ||
253 | + } | ||
254 | } else { | ||
255 | val = 0x01000000; | ||
256 | } | ||
257 | diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S | ||
258 | index ef0d8e9e1210..f22bec0db645 100644 | ||
259 | --- a/arch/sparc/kernel/ktlb.S | ||
260 | +++ b/arch/sparc/kernel/ktlb.S | ||
261 | @@ -20,6 +20,10 @@ kvmap_itlb: | ||
262 | mov TLB_TAG_ACCESS, %g4 | ||
263 | ldxa [%g4] ASI_IMMU, %g4 | ||
264 | |||
265 | + /* The kernel executes in context zero, therefore we do not | ||
266 | + * need to clear the context ID bits out of %g4 here. | ||
267 | + */ | ||
268 | + | ||
269 | /* sun4v_itlb_miss branches here with the missing virtual | ||
270 | * address already loaded into %g4 | ||
271 | */ | ||
272 | @@ -128,6 +132,10 @@ kvmap_dtlb: | ||
273 | mov TLB_TAG_ACCESS, %g4 | ||
274 | ldxa [%g4] ASI_DMMU, %g4 | ||
275 | |||
276 | + /* The kernel executes in context zero, therefore we do not | ||
277 | + * need to clear the context ID bits out of %g4 here. | ||
278 | + */ | ||
279 | + | ||
280 | /* sun4v_dtlb_miss branches here with the missing virtual | ||
281 | * address already loaded into %g4 | ||
282 | */ | ||
283 | @@ -251,6 +259,10 @@ kvmap_dtlb_longpath: | ||
284 | nop | ||
285 | .previous | ||
286 | |||
287 | + /* The kernel executes in context zero, therefore we do not | ||
288 | + * need to clear the context ID bits out of %g5 here. | ||
289 | + */ | ||
290 | + | ||
291 | be,pt %xcc, sparc64_realfault_common | ||
292 | mov FAULT_CODE_DTLB, %g4 | ||
293 | ba,pt %xcc, winfix_trampoline | ||
294 | diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c | ||
295 | index a92d5d2c46a3..51b25325a961 100644 | ||
296 | --- a/arch/sparc/kernel/sparc_ksyms_64.c | ||
297 | +++ b/arch/sparc/kernel/sparc_ksyms_64.c | ||
298 | @@ -27,7 +27,6 @@ EXPORT_SYMBOL(__flushw_user); | ||
299 | EXPORT_SYMBOL_GPL(real_hard_smp_processor_id); | ||
300 | |||
301 | /* from head_64.S */ | ||
302 | -EXPORT_SYMBOL(__ret_efault); | ||
303 | EXPORT_SYMBOL(tlb_type); | ||
304 | EXPORT_SYMBOL(sun4v_chip_type); | ||
305 | EXPORT_SYMBOL(prom_root_node); | ||
306 | diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S | ||
307 | index be98685c14c6..d568c8207af7 100644 | ||
308 | --- a/arch/sparc/kernel/tsb.S | ||
309 | +++ b/arch/sparc/kernel/tsb.S | ||
310 | @@ -29,13 +29,17 @@ | ||
311 | */ | ||
312 | tsb_miss_dtlb: | ||
313 | mov TLB_TAG_ACCESS, %g4 | ||
314 | + ldxa [%g4] ASI_DMMU, %g4 | ||
315 | + srlx %g4, PAGE_SHIFT, %g4 | ||
316 | ba,pt %xcc, tsb_miss_page_table_walk | ||
317 | - ldxa [%g4] ASI_DMMU, %g4 | ||
318 | + sllx %g4, PAGE_SHIFT, %g4 | ||
319 | |||
320 | tsb_miss_itlb: | ||
321 | mov TLB_TAG_ACCESS, %g4 | ||
322 | + ldxa [%g4] ASI_IMMU, %g4 | ||
323 | + srlx %g4, PAGE_SHIFT, %g4 | ||
324 | ba,pt %xcc, tsb_miss_page_table_walk | ||
325 | - ldxa [%g4] ASI_IMMU, %g4 | ||
326 | + sllx %g4, PAGE_SHIFT, %g4 | ||
327 | |||
328 | /* At this point we have: | ||
329 | * %g1 -- PAGE_SIZE TSB entry address | ||
330 | @@ -284,6 +288,10 @@ tsb_do_dtlb_fault: | ||
331 | nop | ||
332 | .previous | ||
333 | |||
334 | + /* Clear context ID bits. */ | ||
335 | + srlx %g5, PAGE_SHIFT, %g5 | ||
336 | + sllx %g5, PAGE_SHIFT, %g5 | ||
337 | + | ||
338 | be,pt %xcc, sparc64_realfault_common | ||
339 | mov FAULT_CODE_DTLB, %g4 | ||
340 | ba,pt %xcc, winfix_trampoline | ||
341 | diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S | ||
342 | index b7d0bd6b1406..69a439fa2fc1 100644 | ||
343 | --- a/arch/sparc/lib/GENcopy_from_user.S | ||
344 | +++ b/arch/sparc/lib/GENcopy_from_user.S | ||
345 | @@ -3,11 +3,11 @@ | ||
346 | * Copyright (C) 2007 David S. Miller (davem@davemloft.net) | ||
347 | */ | ||
348 | |||
349 | -#define EX_LD(x) \ | ||
350 | +#define EX_LD(x,y) \ | ||
351 | 98: x; \ | ||
352 | .section __ex_table,"a";\ | ||
353 | .align 4; \ | ||
354 | - .word 98b, __retl_one; \ | ||
355 | + .word 98b, y; \ | ||
356 | .text; \ | ||
357 | .align 4; | ||
358 | |||
359 | diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S | ||
360 | index 780550e1afc7..9947427ce354 100644 | ||
361 | --- a/arch/sparc/lib/GENcopy_to_user.S | ||
362 | +++ b/arch/sparc/lib/GENcopy_to_user.S | ||
363 | @@ -3,11 +3,11 @@ | ||
364 | * Copyright (C) 2007 David S. Miller (davem@davemloft.net) | ||
365 | */ | ||
366 | |||
367 | -#define EX_ST(x) \ | ||
368 | +#define EX_ST(x,y) \ | ||
369 | 98: x; \ | ||
370 | .section __ex_table,"a";\ | ||
371 | .align 4; \ | ||
372 | - .word 98b, __retl_one; \ | ||
373 | + .word 98b, y; \ | ||
374 | .text; \ | ||
375 | .align 4; | ||
376 | |||
377 | diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S | ||
378 | index 89358ee94851..059ea24ad73d 100644 | ||
379 | --- a/arch/sparc/lib/GENmemcpy.S | ||
380 | +++ b/arch/sparc/lib/GENmemcpy.S | ||
381 | @@ -4,21 +4,18 @@ | ||
382 | */ | ||
383 | |||
384 | #ifdef __KERNEL__ | ||
385 | +#include <linux/linkage.h> | ||
386 | #define GLOBAL_SPARE %g7 | ||
387 | #else | ||
388 | #define GLOBAL_SPARE %g5 | ||
389 | #endif | ||
390 | |||
391 | #ifndef EX_LD | ||
392 | -#define EX_LD(x) x | ||
393 | +#define EX_LD(x,y) x | ||
394 | #endif | ||
395 | |||
396 | #ifndef EX_ST | ||
397 | -#define EX_ST(x) x | ||
398 | -#endif | ||
399 | - | ||
400 | -#ifndef EX_RETVAL | ||
401 | -#define EX_RETVAL(x) x | ||
402 | +#define EX_ST(x,y) x | ||
403 | #endif | ||
404 | |||
405 | #ifndef LOAD | ||
406 | @@ -45,6 +42,29 @@ | ||
407 | .register %g3,#scratch | ||
408 | |||
409 | .text | ||
410 | + | ||
411 | +#ifndef EX_RETVAL | ||
412 | +#define EX_RETVAL(x) x | ||
413 | +ENTRY(GEN_retl_o4_1) | ||
414 | + add %o4, %o2, %o4 | ||
415 | + retl | ||
416 | + add %o4, 1, %o0 | ||
417 | +ENDPROC(GEN_retl_o4_1) | ||
418 | +ENTRY(GEN_retl_g1_8) | ||
419 | + add %g1, %o2, %g1 | ||
420 | + retl | ||
421 | + add %g1, 8, %o0 | ||
422 | +ENDPROC(GEN_retl_g1_8) | ||
423 | +ENTRY(GEN_retl_o2_4) | ||
424 | + retl | ||
425 | + add %o2, 4, %o0 | ||
426 | +ENDPROC(GEN_retl_o2_4) | ||
427 | +ENTRY(GEN_retl_o2_1) | ||
428 | + retl | ||
429 | + add %o2, 1, %o0 | ||
430 | +ENDPROC(GEN_retl_o2_1) | ||
431 | +#endif | ||
432 | + | ||
433 | .align 64 | ||
434 | |||
435 | .globl FUNC_NAME | ||
436 | @@ -73,8 +93,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
437 | sub %g0, %o4, %o4 | ||
438 | sub %o2, %o4, %o2 | ||
439 | 1: subcc %o4, 1, %o4 | ||
440 | - EX_LD(LOAD(ldub, %o1, %g1)) | ||
441 | - EX_ST(STORE(stb, %g1, %o0)) | ||
442 | + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1) | ||
443 | + EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1) | ||
444 | add %o1, 1, %o1 | ||
445 | bne,pt %XCC, 1b | ||
446 | add %o0, 1, %o0 | ||
447 | @@ -82,8 +102,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
448 | andn %o2, 0x7, %g1 | ||
449 | sub %o2, %g1, %o2 | ||
450 | 1: subcc %g1, 0x8, %g1 | ||
451 | - EX_LD(LOAD(ldx, %o1, %g2)) | ||
452 | - EX_ST(STORE(stx, %g2, %o0)) | ||
453 | + EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8) | ||
454 | + EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8) | ||
455 | add %o1, 0x8, %o1 | ||
456 | bne,pt %XCC, 1b | ||
457 | add %o0, 0x8, %o0 | ||
458 | @@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
459 | |||
460 | 1: | ||
461 | subcc %o2, 4, %o2 | ||
462 | - EX_LD(LOAD(lduw, %o1, %g1)) | ||
463 | - EX_ST(STORE(stw, %g1, %o1 + %o3)) | ||
464 | + EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4) | ||
465 | + EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4) | ||
466 | bgu,pt %XCC, 1b | ||
467 | add %o1, 4, %o1 | ||
468 | |||
469 | @@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
470 | .align 32 | ||
471 | 90: | ||
472 | subcc %o2, 1, %o2 | ||
473 | - EX_LD(LOAD(ldub, %o1, %g1)) | ||
474 | - EX_ST(STORE(stb, %g1, %o1 + %o3)) | ||
475 | + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1) | ||
476 | + EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1) | ||
477 | bgu,pt %XCC, 90b | ||
478 | add %o1, 1, %o1 | ||
479 | retl | ||
480 | diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile | ||
481 | index 3269b0234093..4f2384a4286a 100644 | ||
482 | --- a/arch/sparc/lib/Makefile | ||
483 | +++ b/arch/sparc/lib/Makefile | ||
484 | @@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o | ||
485 | lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o | ||
486 | lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o | ||
487 | |||
488 | -lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o | ||
489 | +lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o | ||
490 | lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o | ||
491 | |||
492 | obj-$(CONFIG_SPARC64) += iomap.o | ||
493 | diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S | ||
494 | index d5242b8c4f94..b79a6998d87c 100644 | ||
495 | --- a/arch/sparc/lib/NG2copy_from_user.S | ||
496 | +++ b/arch/sparc/lib/NG2copy_from_user.S | ||
497 | @@ -3,19 +3,19 @@ | ||
498 | * Copyright (C) 2007 David S. Miller (davem@davemloft.net) | ||
499 | */ | ||
500 | |||
501 | -#define EX_LD(x) \ | ||
502 | +#define EX_LD(x,y) \ | ||
503 | 98: x; \ | ||
504 | .section __ex_table,"a";\ | ||
505 | .align 4; \ | ||
506 | - .word 98b, __retl_one_asi;\ | ||
507 | + .word 98b, y; \ | ||
508 | .text; \ | ||
509 | .align 4; | ||
510 | |||
511 | -#define EX_LD_FP(x) \ | ||
512 | +#define EX_LD_FP(x,y) \ | ||
513 | 98: x; \ | ||
514 | .section __ex_table,"a";\ | ||
515 | .align 4; \ | ||
516 | - .word 98b, __retl_one_asi_fp;\ | ||
517 | + .word 98b, y##_fp; \ | ||
518 | .text; \ | ||
519 | .align 4; | ||
520 | |||
521 | diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S | ||
522 | index 4e962d993b10..dcec55f254ab 100644 | ||
523 | --- a/arch/sparc/lib/NG2copy_to_user.S | ||
524 | +++ b/arch/sparc/lib/NG2copy_to_user.S | ||
525 | @@ -3,19 +3,19 @@ | ||
526 | * Copyright (C) 2007 David S. Miller (davem@davemloft.net) | ||
527 | */ | ||
528 | |||
529 | -#define EX_ST(x) \ | ||
530 | +#define EX_ST(x,y) \ | ||
531 | 98: x; \ | ||
532 | .section __ex_table,"a";\ | ||
533 | .align 4; \ | ||
534 | - .word 98b, __retl_one_asi;\ | ||
535 | + .word 98b, y; \ | ||
536 | .text; \ | ||
537 | .align 4; | ||
538 | |||
539 | -#define EX_ST_FP(x) \ | ||
540 | +#define EX_ST_FP(x,y) \ | ||
541 | 98: x; \ | ||
542 | .section __ex_table,"a";\ | ||
543 | .align 4; \ | ||
544 | - .word 98b, __retl_one_asi_fp;\ | ||
545 | + .word 98b, y##_fp; \ | ||
546 | .text; \ | ||
547 | .align 4; | ||
548 | |||
549 | diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S | ||
550 | index d5f585df2f3f..c629dbd121b6 100644 | ||
551 | --- a/arch/sparc/lib/NG2memcpy.S | ||
552 | +++ b/arch/sparc/lib/NG2memcpy.S | ||
553 | @@ -4,6 +4,7 @@ | ||
554 | */ | ||
555 | |||
556 | #ifdef __KERNEL__ | ||
557 | +#include <linux/linkage.h> | ||
558 | #include <asm/visasm.h> | ||
559 | #include <asm/asi.h> | ||
560 | #define GLOBAL_SPARE %g7 | ||
561 | @@ -32,21 +33,17 @@ | ||
562 | #endif | ||
563 | |||
564 | #ifndef EX_LD | ||
565 | -#define EX_LD(x) x | ||
566 | +#define EX_LD(x,y) x | ||
567 | #endif | ||
568 | #ifndef EX_LD_FP | ||
569 | -#define EX_LD_FP(x) x | ||
570 | +#define EX_LD_FP(x,y) x | ||
571 | #endif | ||
572 | |||
573 | #ifndef EX_ST | ||
574 | -#define EX_ST(x) x | ||
575 | +#define EX_ST(x,y) x | ||
576 | #endif | ||
577 | #ifndef EX_ST_FP | ||
578 | -#define EX_ST_FP(x) x | ||
579 | -#endif | ||
580 | - | ||
581 | -#ifndef EX_RETVAL | ||
582 | -#define EX_RETVAL(x) x | ||
583 | +#define EX_ST_FP(x,y) x | ||
584 | #endif | ||
585 | |||
586 | #ifndef LOAD | ||
587 | @@ -140,45 +137,110 @@ | ||
588 | fsrc2 %x6, %f12; \ | ||
589 | fsrc2 %x7, %f14; | ||
590 | #define FREG_LOAD_1(base, x0) \ | ||
591 | - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)) | ||
592 | + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1) | ||
593 | #define FREG_LOAD_2(base, x0, x1) \ | ||
594 | - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ | ||
595 | - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); | ||
596 | + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ | ||
597 | + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); | ||
598 | #define FREG_LOAD_3(base, x0, x1, x2) \ | ||
599 | - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ | ||
600 | - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ | ||
601 | - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); | ||
602 | + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ | ||
603 | + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ | ||
604 | + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); | ||
605 | #define FREG_LOAD_4(base, x0, x1, x2, x3) \ | ||
606 | - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ | ||
607 | - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ | ||
608 | - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ | ||
609 | - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); | ||
610 | + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ | ||
611 | + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ | ||
612 | + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ | ||
613 | + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); | ||
614 | #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ | ||
615 | - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ | ||
616 | - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ | ||
617 | - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ | ||
618 | - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ | ||
619 | - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); | ||
620 | + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ | ||
621 | + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ | ||
622 | + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ | ||
623 | + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ | ||
624 | + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); | ||
625 | #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ | ||
626 | - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ | ||
627 | - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ | ||
628 | - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ | ||
629 | - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ | ||
630 | - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ | ||
631 | - EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); | ||
632 | + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ | ||
633 | + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ | ||
634 | + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ | ||
635 | + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ | ||
636 | + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ | ||
637 | + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); | ||
638 | #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ | ||
639 | - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ | ||
640 | - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ | ||
641 | - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ | ||
642 | - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ | ||
643 | - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ | ||
644 | - EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \ | ||
645 | - EX_LD_FP(LOAD(ldd, base + 0x30, %x6)); | ||
646 | + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ | ||
647 | + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ | ||
648 | + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ | ||
649 | + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ | ||
650 | + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ | ||
651 | + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \ | ||
652 | + EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1); | ||
653 | |||
654 | .register %g2,#scratch | ||
655 | .register %g3,#scratch | ||
656 | |||
657 | .text | ||
658 | +#ifndef EX_RETVAL | ||
659 | +#define EX_RETVAL(x) x | ||
660 | +__restore_fp: | ||
661 | + VISExitHalf | ||
662 | +__restore_asi: | ||
663 | + retl | ||
664 | + wr %g0, ASI_AIUS, %asi | ||
665 | +ENTRY(NG2_retl_o2) | ||
666 | + ba,pt %xcc, __restore_asi | ||
667 | + mov %o2, %o0 | ||
668 | +ENDPROC(NG2_retl_o2) | ||
669 | +ENTRY(NG2_retl_o2_plus_1) | ||
670 | + ba,pt %xcc, __restore_asi | ||
671 | + add %o2, 1, %o0 | ||
672 | +ENDPROC(NG2_retl_o2_plus_1) | ||
673 | +ENTRY(NG2_retl_o2_plus_4) | ||
674 | + ba,pt %xcc, __restore_asi | ||
675 | + add %o2, 4, %o0 | ||
676 | +ENDPROC(NG2_retl_o2_plus_4) | ||
677 | +ENTRY(NG2_retl_o2_plus_8) | ||
678 | + ba,pt %xcc, __restore_asi | ||
679 | + add %o2, 8, %o0 | ||
680 | +ENDPROC(NG2_retl_o2_plus_8) | ||
681 | +ENTRY(NG2_retl_o2_plus_o4_plus_1) | ||
682 | + add %o4, 1, %o4 | ||
683 | + ba,pt %xcc, __restore_asi | ||
684 | + add %o2, %o4, %o0 | ||
685 | +ENDPROC(NG2_retl_o2_plus_o4_plus_1) | ||
686 | +ENTRY(NG2_retl_o2_plus_o4_plus_8) | ||
687 | + add %o4, 8, %o4 | ||
688 | + ba,pt %xcc, __restore_asi | ||
689 | + add %o2, %o4, %o0 | ||
690 | +ENDPROC(NG2_retl_o2_plus_o4_plus_8) | ||
691 | +ENTRY(NG2_retl_o2_plus_o4_plus_16) | ||
692 | + add %o4, 16, %o4 | ||
693 | + ba,pt %xcc, __restore_asi | ||
694 | + add %o2, %o4, %o0 | ||
695 | +ENDPROC(NG2_retl_o2_plus_o4_plus_16) | ||
696 | +ENTRY(NG2_retl_o2_plus_g1_fp) | ||
697 | + ba,pt %xcc, __restore_fp | ||
698 | + add %o2, %g1, %o0 | ||
699 | +ENDPROC(NG2_retl_o2_plus_g1_fp) | ||
700 | +ENTRY(NG2_retl_o2_plus_g1_plus_64_fp) | ||
701 | + add %g1, 64, %g1 | ||
702 | + ba,pt %xcc, __restore_fp | ||
703 | + add %o2, %g1, %o0 | ||
704 | +ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp) | ||
705 | +ENTRY(NG2_retl_o2_plus_g1_plus_1) | ||
706 | + add %g1, 1, %g1 | ||
707 | + ba,pt %xcc, __restore_asi | ||
708 | + add %o2, %g1, %o0 | ||
709 | +ENDPROC(NG2_retl_o2_plus_g1_plus_1) | ||
710 | +ENTRY(NG2_retl_o2_and_7_plus_o4) | ||
711 | + and %o2, 7, %o2 | ||
712 | + ba,pt %xcc, __restore_asi | ||
713 | + add %o2, %o4, %o0 | ||
714 | +ENDPROC(NG2_retl_o2_and_7_plus_o4) | ||
715 | +ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8) | ||
716 | + and %o2, 7, %o2 | ||
717 | + add %o4, 8, %o4 | ||
718 | + ba,pt %xcc, __restore_asi | ||
719 | + add %o2, %o4, %o0 | ||
720 | +ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8) | ||
721 | +#endif | ||
722 | + | ||
723 | .align 64 | ||
724 | |||
725 | .globl FUNC_NAME | ||
726 | @@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
727 | sub %g0, %o4, %o4 ! bytes to align dst | ||
728 | sub %o2, %o4, %o2 | ||
729 | 1: subcc %o4, 1, %o4 | ||
730 | - EX_LD(LOAD(ldub, %o1, %g1)) | ||
731 | - EX_ST(STORE(stb, %g1, %o0)) | ||
732 | + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1) | ||
733 | + EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1) | ||
734 | add %o1, 1, %o1 | ||
735 | bne,pt %XCC, 1b | ||
736 | add %o0, 1, %o0 | ||
737 | @@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
738 | nop | ||
739 | /* fall through for 0 < low bits < 8 */ | ||
740 | 110: sub %o4, 64, %g2 | ||
741 | - EX_LD_FP(LOAD_BLK(%g2, %f0)) | ||
742 | -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | ||
743 | - EX_LD_FP(LOAD_BLK(%o4, %f16)) | ||
744 | + EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1) | ||
745 | +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) | ||
746 | + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) | ||
747 | FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) | ||
748 | - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | ||
749 | + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) | ||
750 | FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) | ||
751 | subcc %g1, 64, %g1 | ||
752 | add %o4, 64, %o4 | ||
753 | @@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
754 | |||
755 | 120: sub %o4, 56, %g2 | ||
756 | FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) | ||
757 | -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | ||
758 | - EX_LD_FP(LOAD_BLK(%o4, %f16)) | ||
759 | +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) | ||
760 | + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) | ||
761 | FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) | ||
762 | - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | ||
763 | + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) | ||
764 | FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) | ||
765 | subcc %g1, 64, %g1 | ||
766 | add %o4, 64, %o4 | ||
767 | @@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
768 | |||
769 | 130: sub %o4, 48, %g2 | ||
770 | FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) | ||
771 | -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | ||
772 | - EX_LD_FP(LOAD_BLK(%o4, %f16)) | ||
773 | +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) | ||
774 | + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) | ||
775 | FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) | ||
776 | - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | ||
777 | + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) | ||
778 | FREG_MOVE_6(f20, f22, f24, f26, f28, f30) | ||
779 | subcc %g1, 64, %g1 | ||
780 | add %o4, 64, %o4 | ||
781 | @@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
782 | |||
783 | 140: sub %o4, 40, %g2 | ||
784 | FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) | ||
785 | -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | ||
786 | - EX_LD_FP(LOAD_BLK(%o4, %f16)) | ||
787 | +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) | ||
788 | + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) | ||
789 | FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) | ||
790 | - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | ||
791 | + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) | ||
792 | FREG_MOVE_5(f22, f24, f26, f28, f30) | ||
793 | subcc %g1, 64, %g1 | ||
794 | add %o4, 64, %o4 | ||
795 | @@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
796 | |||
797 | 150: sub %o4, 32, %g2 | ||
798 | FREG_LOAD_4(%g2, f0, f2, f4, f6) | ||
799 | -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | ||
800 | - EX_LD_FP(LOAD_BLK(%o4, %f16)) | ||
801 | +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) | ||
802 | + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) | ||
803 | FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) | ||
804 | - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | ||
805 | + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) | ||
806 | FREG_MOVE_4(f24, f26, f28, f30) | ||
807 | subcc %g1, 64, %g1 | ||
808 | add %o4, 64, %o4 | ||
809 | @@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
810 | |||
811 | 160: sub %o4, 24, %g2 | ||
812 | FREG_LOAD_3(%g2, f0, f2, f4) | ||
813 | -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | ||
814 | - EX_LD_FP(LOAD_BLK(%o4, %f16)) | ||
815 | +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) | ||
816 | + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) | ||
817 | FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) | ||
818 | - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | ||
819 | + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) | ||
820 | FREG_MOVE_3(f26, f28, f30) | ||
821 | subcc %g1, 64, %g1 | ||
822 | add %o4, 64, %o4 | ||
823 | @@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
824 | |||
825 | 170: sub %o4, 16, %g2 | ||
826 | FREG_LOAD_2(%g2, f0, f2) | ||
827 | -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | ||
828 | - EX_LD_FP(LOAD_BLK(%o4, %f16)) | ||
829 | +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) | ||
830 | + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) | ||
831 | FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) | ||
832 | - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | ||
833 | + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) | ||
834 | FREG_MOVE_2(f28, f30) | ||
835 | subcc %g1, 64, %g1 | ||
836 | add %o4, 64, %o4 | ||
837 | @@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
838 | |||
839 | 180: sub %o4, 8, %g2 | ||
840 | FREG_LOAD_1(%g2, f0) | ||
841 | -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | ||
842 | - EX_LD_FP(LOAD_BLK(%o4, %f16)) | ||
843 | +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) | ||
844 | + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) | ||
845 | FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) | ||
846 | - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | ||
847 | + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) | ||
848 | FREG_MOVE_1(f30) | ||
849 | subcc %g1, 64, %g1 | ||
850 | add %o4, 64, %o4 | ||
851 | @@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
852 | nop | ||
853 | |||
854 | 190: | ||
855 | -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | ||
856 | +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) | ||
857 | subcc %g1, 64, %g1 | ||
858 | - EX_LD_FP(LOAD_BLK(%o4, %f0)) | ||
859 | - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | ||
860 | + EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64) | ||
861 | + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64) | ||
862 | add %o4, 64, %o4 | ||
863 | bne,pt %xcc, 1b | ||
864 | LOAD(prefetch, %o4 + 64, #one_read) | ||
865 | @@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
866 | andn %o2, 0xf, %o4 | ||
867 | and %o2, 0xf, %o2 | ||
868 | 1: subcc %o4, 0x10, %o4 | ||
869 | - EX_LD(LOAD(ldx, %o1, %o5)) | ||
870 | + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16) | ||
871 | add %o1, 0x08, %o1 | ||
872 | - EX_LD(LOAD(ldx, %o1, %g1)) | ||
873 | + EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16) | ||
874 | sub %o1, 0x08, %o1 | ||
875 | - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) | ||
876 | + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16) | ||
877 | add %o1, 0x8, %o1 | ||
878 | - EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE)) | ||
879 | + EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8) | ||
880 | bgu,pt %XCC, 1b | ||
881 | add %o1, 0x8, %o1 | ||
882 | 73: andcc %o2, 0x8, %g0 | ||
883 | be,pt %XCC, 1f | ||
884 | nop | ||
885 | sub %o2, 0x8, %o2 | ||
886 | - EX_LD(LOAD(ldx, %o1, %o5)) | ||
887 | - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) | ||
888 | + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8) | ||
889 | + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8) | ||
890 | add %o1, 0x8, %o1 | ||
891 | 1: andcc %o2, 0x4, %g0 | ||
892 | be,pt %XCC, 1f | ||
893 | nop | ||
894 | sub %o2, 0x4, %o2 | ||
895 | - EX_LD(LOAD(lduw, %o1, %o5)) | ||
896 | - EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE)) | ||
897 | + EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4) | ||
898 | + EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) | ||
899 | add %o1, 0x4, %o1 | ||
900 | 1: cmp %o2, 0 | ||
901 | be,pt %XCC, 85f | ||
902 | @@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
903 | sub %o2, %g1, %o2 | ||
904 | |||
905 | 1: subcc %g1, 1, %g1 | ||
906 | - EX_LD(LOAD(ldub, %o1, %o5)) | ||
907 | - EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE)) | ||
908 | + EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1) | ||
909 | + EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1) | ||
910 | bgu,pt %icc, 1b | ||
911 | add %o1, 1, %o1 | ||
912 | |||
913 | @@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
914 | |||
915 | 8: mov 64, GLOBAL_SPARE | ||
916 | andn %o1, 0x7, %o1 | ||
917 | - EX_LD(LOAD(ldx, %o1, %g2)) | ||
918 | + EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2) | ||
919 | sub GLOBAL_SPARE, %g1, GLOBAL_SPARE | ||
920 | andn %o2, 0x7, %o4 | ||
921 | sllx %g2, %g1, %g2 | ||
922 | 1: add %o1, 0x8, %o1 | ||
923 | - EX_LD(LOAD(ldx, %o1, %g3)) | ||
924 | + EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4) | ||
925 | subcc %o4, 0x8, %o4 | ||
926 | srlx %g3, GLOBAL_SPARE, %o5 | ||
927 | or %o5, %g2, %o5 | ||
928 | - EX_ST(STORE(stx, %o5, %o0)) | ||
929 | + EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8) | ||
930 | add %o0, 0x8, %o0 | ||
931 | bgu,pt %icc, 1b | ||
932 | sllx %g3, %g1, %g2 | ||
933 | @@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
934 | |||
935 | 1: | ||
936 | subcc %o2, 4, %o2 | ||
937 | - EX_LD(LOAD(lduw, %o1, %g1)) | ||
938 | - EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE)) | ||
939 | + EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4) | ||
940 | + EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) | ||
941 | bgu,pt %XCC, 1b | ||
942 | add %o1, 4, %o1 | ||
943 | |||
944 | @@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
945 | .align 32 | ||
946 | 90: | ||
947 | subcc %o2, 1, %o2 | ||
948 | - EX_LD(LOAD(ldub, %o1, %g1)) | ||
949 | - EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE)) | ||
950 | + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1) | ||
951 | + EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1) | ||
952 | bgu,pt %XCC, 90b | ||
953 | add %o1, 1, %o1 | ||
954 | retl | ||
955 | diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S | ||
956 | index 2e8ee7ad07a9..16a286c1a528 100644 | ||
957 | --- a/arch/sparc/lib/NG4copy_from_user.S | ||
958 | +++ b/arch/sparc/lib/NG4copy_from_user.S | ||
959 | @@ -3,19 +3,19 @@ | ||
960 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) | ||
961 | */ | ||
962 | |||
963 | -#define EX_LD(x) \ | ||
964 | +#define EX_LD(x, y) \ | ||
965 | 98: x; \ | ||
966 | .section __ex_table,"a";\ | ||
967 | .align 4; \ | ||
968 | - .word 98b, __retl_one_asi;\ | ||
969 | + .word 98b, y; \ | ||
970 | .text; \ | ||
971 | .align 4; | ||
972 | |||
973 | -#define EX_LD_FP(x) \ | ||
974 | +#define EX_LD_FP(x,y) \ | ||
975 | 98: x; \ | ||
976 | .section __ex_table,"a";\ | ||
977 | .align 4; \ | ||
978 | - .word 98b, __retl_one_asi_fp;\ | ||
979 | + .word 98b, y##_fp; \ | ||
980 | .text; \ | ||
981 | .align 4; | ||
982 | |||
983 | diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S | ||
984 | index be0bf4590df8..6b0276ffc858 100644 | ||
985 | --- a/arch/sparc/lib/NG4copy_to_user.S | ||
986 | +++ b/arch/sparc/lib/NG4copy_to_user.S | ||
987 | @@ -3,19 +3,19 @@ | ||
988 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) | ||
989 | */ | ||
990 | |||
991 | -#define EX_ST(x) \ | ||
992 | +#define EX_ST(x,y) \ | ||
993 | 98: x; \ | ||
994 | .section __ex_table,"a";\ | ||
995 | .align 4; \ | ||
996 | - .word 98b, __retl_one_asi;\ | ||
997 | + .word 98b, y; \ | ||
998 | .text; \ | ||
999 | .align 4; | ||
1000 | |||
1001 | -#define EX_ST_FP(x) \ | ||
1002 | +#define EX_ST_FP(x,y) \ | ||
1003 | 98: x; \ | ||
1004 | .section __ex_table,"a";\ | ||
1005 | .align 4; \ | ||
1006 | - .word 98b, __retl_one_asi_fp;\ | ||
1007 | + .word 98b, y##_fp; \ | ||
1008 | .text; \ | ||
1009 | .align 4; | ||
1010 | |||
1011 | diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S | ||
1012 | index 8e13ee1f4454..75bb93b1437f 100644 | ||
1013 | --- a/arch/sparc/lib/NG4memcpy.S | ||
1014 | +++ b/arch/sparc/lib/NG4memcpy.S | ||
1015 | @@ -4,6 +4,7 @@ | ||
1016 | */ | ||
1017 | |||
1018 | #ifdef __KERNEL__ | ||
1019 | +#include <linux/linkage.h> | ||
1020 | #include <asm/visasm.h> | ||
1021 | #include <asm/asi.h> | ||
1022 | #define GLOBAL_SPARE %g7 | ||
1023 | @@ -46,22 +47,19 @@ | ||
1024 | #endif | ||
1025 | |||
1026 | #ifndef EX_LD | ||
1027 | -#define EX_LD(x) x | ||
1028 | +#define EX_LD(x,y) x | ||
1029 | #endif | ||
1030 | #ifndef EX_LD_FP | ||
1031 | -#define EX_LD_FP(x) x | ||
1032 | +#define EX_LD_FP(x,y) x | ||
1033 | #endif | ||
1034 | |||
1035 | #ifndef EX_ST | ||
1036 | -#define EX_ST(x) x | ||
1037 | +#define EX_ST(x,y) x | ||
1038 | #endif | ||
1039 | #ifndef EX_ST_FP | ||
1040 | -#define EX_ST_FP(x) x | ||
1041 | +#define EX_ST_FP(x,y) x | ||
1042 | #endif | ||
1043 | |||
1044 | -#ifndef EX_RETVAL | ||
1045 | -#define EX_RETVAL(x) x | ||
1046 | -#endif | ||
1047 | |||
1048 | #ifndef LOAD | ||
1049 | #define LOAD(type,addr,dest) type [addr], dest | ||
1050 | @@ -94,6 +92,158 @@ | ||
1051 | .register %g3,#scratch | ||
1052 | |||
1053 | .text | ||
1054 | +#ifndef EX_RETVAL | ||
1055 | +#define EX_RETVAL(x) x | ||
1056 | +__restore_asi_fp: | ||
1057 | + VISExitHalf | ||
1058 | +__restore_asi: | ||
1059 | + retl | ||
1060 | + wr %g0, ASI_AIUS, %asi | ||
1061 | + | ||
1062 | +ENTRY(NG4_retl_o2) | ||
1063 | + ba,pt %xcc, __restore_asi | ||
1064 | + mov %o2, %o0 | ||
1065 | +ENDPROC(NG4_retl_o2) | ||
1066 | +ENTRY(NG4_retl_o2_plus_1) | ||
1067 | + ba,pt %xcc, __restore_asi | ||
1068 | + add %o2, 1, %o0 | ||
1069 | +ENDPROC(NG4_retl_o2_plus_1) | ||
1070 | +ENTRY(NG4_retl_o2_plus_4) | ||
1071 | + ba,pt %xcc, __restore_asi | ||
1072 | + add %o2, 4, %o0 | ||
1073 | +ENDPROC(NG4_retl_o2_plus_4) | ||
1074 | +ENTRY(NG4_retl_o2_plus_o5) | ||
1075 | + ba,pt %xcc, __restore_asi | ||
1076 | + add %o2, %o5, %o0 | ||
1077 | +ENDPROC(NG4_retl_o2_plus_o5) | ||
1078 | +ENTRY(NG4_retl_o2_plus_o5_plus_4) | ||
1079 | + add %o5, 4, %o5 | ||
1080 | + ba,pt %xcc, __restore_asi | ||
1081 | + add %o2, %o5, %o0 | ||
1082 | +ENDPROC(NG4_retl_o2_plus_o5_plus_4) | ||
1083 | +ENTRY(NG4_retl_o2_plus_o5_plus_8) | ||
1084 | + add %o5, 8, %o5 | ||
1085 | + ba,pt %xcc, __restore_asi | ||
1086 | + add %o2, %o5, %o0 | ||
1087 | +ENDPROC(NG4_retl_o2_plus_o5_plus_8) | ||
1088 | +ENTRY(NG4_retl_o2_plus_o5_plus_16) | ||
1089 | + add %o5, 16, %o5 | ||
1090 | + ba,pt %xcc, __restore_asi | ||
1091 | + add %o2, %o5, %o0 | ||
1092 | +ENDPROC(NG4_retl_o2_plus_o5_plus_16) | ||
1093 | +ENTRY(NG4_retl_o2_plus_o5_plus_24) | ||
1094 | + add %o5, 24, %o5 | ||
1095 | + ba,pt %xcc, __restore_asi | ||
1096 | + add %o2, %o5, %o0 | ||
1097 | +ENDPROC(NG4_retl_o2_plus_o5_plus_24) | ||
1098 | +ENTRY(NG4_retl_o2_plus_o5_plus_32) | ||
1099 | + add %o5, 32, %o5 | ||
1100 | + ba,pt %xcc, __restore_asi | ||
1101 | + add %o2, %o5, %o0 | ||
1102 | +ENDPROC(NG4_retl_o2_plus_o5_plus_32) | ||
1103 | +ENTRY(NG4_retl_o2_plus_g1) | ||
1104 | + ba,pt %xcc, __restore_asi | ||
1105 | + add %o2, %g1, %o0 | ||
1106 | +ENDPROC(NG4_retl_o2_plus_g1) | ||
1107 | +ENTRY(NG4_retl_o2_plus_g1_plus_1) | ||
1108 | + add %g1, 1, %g1 | ||
1109 | + ba,pt %xcc, __restore_asi | ||
1110 | + add %o2, %g1, %o0 | ||
1111 | +ENDPROC(NG4_retl_o2_plus_g1_plus_1) | ||
1112 | +ENTRY(NG4_retl_o2_plus_g1_plus_8) | ||
1113 | + add %g1, 8, %g1 | ||
1114 | + ba,pt %xcc, __restore_asi | ||
1115 | + add %o2, %g1, %o0 | ||
1116 | +ENDPROC(NG4_retl_o2_plus_g1_plus_8) | ||
1117 | +ENTRY(NG4_retl_o2_plus_o4) | ||
1118 | + ba,pt %xcc, __restore_asi | ||
1119 | + add %o2, %o4, %o0 | ||
1120 | +ENDPROC(NG4_retl_o2_plus_o4) | ||
1121 | +ENTRY(NG4_retl_o2_plus_o4_plus_8) | ||
1122 | + add %o4, 8, %o4 | ||
1123 | + ba,pt %xcc, __restore_asi | ||
1124 | + add %o2, %o4, %o0 | ||
1125 | +ENDPROC(NG4_retl_o2_plus_o4_plus_8) | ||
1126 | +ENTRY(NG4_retl_o2_plus_o4_plus_16) | ||
1127 | + add %o4, 16, %o4 | ||
1128 | + ba,pt %xcc, __restore_asi | ||
1129 | + add %o2, %o4, %o0 | ||
1130 | +ENDPROC(NG4_retl_o2_plus_o4_plus_16) | ||
1131 | +ENTRY(NG4_retl_o2_plus_o4_plus_24) | ||
1132 | + add %o4, 24, %o4 | ||
1133 | + ba,pt %xcc, __restore_asi | ||
1134 | + add %o2, %o4, %o0 | ||
1135 | +ENDPROC(NG4_retl_o2_plus_o4_plus_24) | ||
1136 | +ENTRY(NG4_retl_o2_plus_o4_plus_32) | ||
1137 | + add %o4, 32, %o4 | ||
1138 | + ba,pt %xcc, __restore_asi | ||
1139 | + add %o2, %o4, %o0 | ||
1140 | +ENDPROC(NG4_retl_o2_plus_o4_plus_32) | ||
1141 | +ENTRY(NG4_retl_o2_plus_o4_plus_40) | ||
1142 | + add %o4, 40, %o4 | ||
1143 | + ba,pt %xcc, __restore_asi | ||
1144 | + add %o2, %o4, %o0 | ||
1145 | +ENDPROC(NG4_retl_o2_plus_o4_plus_40) | ||
1146 | +ENTRY(NG4_retl_o2_plus_o4_plus_48) | ||
1147 | + add %o4, 48, %o4 | ||
1148 | + ba,pt %xcc, __restore_asi | ||
1149 | + add %o2, %o4, %o0 | ||
1150 | +ENDPROC(NG4_retl_o2_plus_o4_plus_48) | ||
1151 | +ENTRY(NG4_retl_o2_plus_o4_plus_56) | ||
1152 | + add %o4, 56, %o4 | ||
1153 | + ba,pt %xcc, __restore_asi | ||
1154 | + add %o2, %o4, %o0 | ||
1155 | +ENDPROC(NG4_retl_o2_plus_o4_plus_56) | ||
1156 | +ENTRY(NG4_retl_o2_plus_o4_plus_64) | ||
1157 | + add %o4, 64, %o4 | ||
1158 | + ba,pt %xcc, __restore_asi | ||
1159 | + add %o2, %o4, %o0 | ||
1160 | +ENDPROC(NG4_retl_o2_plus_o4_plus_64) | ||
1161 | +ENTRY(NG4_retl_o2_plus_o4_fp) | ||
1162 | + ba,pt %xcc, __restore_asi_fp | ||
1163 | + add %o2, %o4, %o0 | ||
1164 | +ENDPROC(NG4_retl_o2_plus_o4_fp) | ||
1165 | +ENTRY(NG4_retl_o2_plus_o4_plus_8_fp) | ||
1166 | + add %o4, 8, %o4 | ||
1167 | + ba,pt %xcc, __restore_asi_fp | ||
1168 | + add %o2, %o4, %o0 | ||
1169 | +ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp) | ||
1170 | +ENTRY(NG4_retl_o2_plus_o4_plus_16_fp) | ||
1171 | + add %o4, 16, %o4 | ||
1172 | + ba,pt %xcc, __restore_asi_fp | ||
1173 | + add %o2, %o4, %o0 | ||
1174 | +ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp) | ||
1175 | +ENTRY(NG4_retl_o2_plus_o4_plus_24_fp) | ||
1176 | + add %o4, 24, %o4 | ||
1177 | + ba,pt %xcc, __restore_asi_fp | ||
1178 | + add %o2, %o4, %o0 | ||
1179 | +ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp) | ||
1180 | +ENTRY(NG4_retl_o2_plus_o4_plus_32_fp) | ||
1181 | + add %o4, 32, %o4 | ||
1182 | + ba,pt %xcc, __restore_asi_fp | ||
1183 | + add %o2, %o4, %o0 | ||
1184 | +ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp) | ||
1185 | +ENTRY(NG4_retl_o2_plus_o4_plus_40_fp) | ||
1186 | + add %o4, 40, %o4 | ||
1187 | + ba,pt %xcc, __restore_asi_fp | ||
1188 | + add %o2, %o4, %o0 | ||
1189 | +ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp) | ||
1190 | +ENTRY(NG4_retl_o2_plus_o4_plus_48_fp) | ||
1191 | + add %o4, 48, %o4 | ||
1192 | + ba,pt %xcc, __restore_asi_fp | ||
1193 | + add %o2, %o4, %o0 | ||
1194 | +ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp) | ||
1195 | +ENTRY(NG4_retl_o2_plus_o4_plus_56_fp) | ||
1196 | + add %o4, 56, %o4 | ||
1197 | + ba,pt %xcc, __restore_asi_fp | ||
1198 | + add %o2, %o4, %o0 | ||
1199 | +ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp) | ||
1200 | +ENTRY(NG4_retl_o2_plus_o4_plus_64_fp) | ||
1201 | + add %o4, 64, %o4 | ||
1202 | + ba,pt %xcc, __restore_asi_fp | ||
1203 | + add %o2, %o4, %o0 | ||
1204 | +ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp) | ||
1205 | +#endif | ||
1206 | .align 64 | ||
1207 | |||
1208 | .globl FUNC_NAME | ||
1209 | @@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
1210 | brz,pt %g1, 51f | ||
1211 | sub %o2, %g1, %o2 | ||
1212 | |||
1213 | -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) | ||
1214 | + | ||
1215 | +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) | ||
1216 | add %o1, 1, %o1 | ||
1217 | subcc %g1, 1, %g1 | ||
1218 | add %o0, 1, %o0 | ||
1219 | bne,pt %icc, 1b | ||
1220 | - EX_ST(STORE(stb, %g2, %o0 - 0x01)) | ||
1221 | + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) | ||
1222 | |||
1223 | 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) | ||
1224 | LOAD(prefetch, %o1 + 0x080, #n_reads_strong) | ||
1225 | @@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
1226 | brz,pt %g1, .Llarge_aligned | ||
1227 | sub %o2, %g1, %o2 | ||
1228 | |||
1229 | -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) | ||
1230 | +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) | ||
1231 | add %o1, 8, %o1 | ||
1232 | subcc %g1, 8, %g1 | ||
1233 | add %o0, 8, %o0 | ||
1234 | bne,pt %icc, 1b | ||
1235 | - EX_ST(STORE(stx, %g2, %o0 - 0x08)) | ||
1236 | + EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8) | ||
1237 | |||
1238 | .Llarge_aligned: | ||
1239 | /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ | ||
1240 | andn %o2, 0x3f, %o4 | ||
1241 | sub %o2, %o4, %o2 | ||
1242 | |||
1243 | -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) | ||
1244 | +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4) | ||
1245 | add %o1, 0x40, %o1 | ||
1246 | - EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) | ||
1247 | + EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4) | ||
1248 | subcc %o4, 0x40, %o4 | ||
1249 | - EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) | ||
1250 | - EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) | ||
1251 | - EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) | ||
1252 | - EX_ST(STORE_INIT(%g1, %o0)) | ||
1253 | + EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64) | ||
1254 | + EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64) | ||
1255 | + EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64) | ||
1256 | + EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64) | ||
1257 | add %o0, 0x08, %o0 | ||
1258 | - EX_ST(STORE_INIT(%g2, %o0)) | ||
1259 | + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56) | ||
1260 | add %o0, 0x08, %o0 | ||
1261 | - EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) | ||
1262 | - EX_ST(STORE_INIT(%g3, %o0)) | ||
1263 | + EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48) | ||
1264 | + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48) | ||
1265 | add %o0, 0x08, %o0 | ||
1266 | - EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) | ||
1267 | - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) | ||
1268 | + EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40) | ||
1269 | + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40) | ||
1270 | add %o0, 0x08, %o0 | ||
1271 | - EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) | ||
1272 | - EX_ST(STORE_INIT(%o5, %o0)) | ||
1273 | + EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32) | ||
1274 | + EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32) | ||
1275 | add %o0, 0x08, %o0 | ||
1276 | - EX_ST(STORE_INIT(%g2, %o0)) | ||
1277 | + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24) | ||
1278 | add %o0, 0x08, %o0 | ||
1279 | - EX_ST(STORE_INIT(%g3, %o0)) | ||
1280 | + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16) | ||
1281 | add %o0, 0x08, %o0 | ||
1282 | - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) | ||
1283 | + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8) | ||
1284 | add %o0, 0x08, %o0 | ||
1285 | bne,pt %icc, 1b | ||
1286 | LOAD(prefetch, %o1 + 0x200, #n_reads_strong) | ||
1287 | @@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
1288 | sub %o2, %o4, %o2 | ||
1289 | alignaddr %o1, %g0, %g1 | ||
1290 | add %o1, %o4, %o1 | ||
1291 | - EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0)) | ||
1292 | -1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2)) | ||
1293 | + EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4) | ||
1294 | +1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4) | ||
1295 | subcc %o4, 0x40, %o4 | ||
1296 | - EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4)) | ||
1297 | - EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6)) | ||
1298 | - EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8)) | ||
1299 | - EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10)) | ||
1300 | - EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12)) | ||
1301 | - EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14)) | ||
1302 | + EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64) | ||
1303 | + EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64) | ||
1304 | + EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64) | ||
1305 | + EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64) | ||
1306 | + EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64) | ||
1307 | + EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64) | ||
1308 | faligndata %f0, %f2, %f16 | ||
1309 | - EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0)) | ||
1310 | + EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64) | ||
1311 | faligndata %f2, %f4, %f18 | ||
1312 | add %g1, 0x40, %g1 | ||
1313 | faligndata %f4, %f6, %f20 | ||
1314 | @@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
1315 | faligndata %f10, %f12, %f26 | ||
1316 | faligndata %f12, %f14, %f28 | ||
1317 | faligndata %f14, %f0, %f30 | ||
1318 | - EX_ST_FP(STORE(std, %f16, %o0 + 0x00)) | ||
1319 | - EX_ST_FP(STORE(std, %f18, %o0 + 0x08)) | ||
1320 | - EX_ST_FP(STORE(std, %f20, %o0 + 0x10)) | ||
1321 | - EX_ST_FP(STORE(std, %f22, %o0 + 0x18)) | ||
1322 | - EX_ST_FP(STORE(std, %f24, %o0 + 0x20)) | ||
1323 | - EX_ST_FP(STORE(std, %f26, %o0 + 0x28)) | ||
1324 | - EX_ST_FP(STORE(std, %f28, %o0 + 0x30)) | ||
1325 | - EX_ST_FP(STORE(std, %f30, %o0 + 0x38)) | ||
1326 | + EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64) | ||
1327 | + EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56) | ||
1328 | + EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48) | ||
1329 | + EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40) | ||
1330 | + EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32) | ||
1331 | + EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24) | ||
1332 | + EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16) | ||
1333 | + EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8) | ||
1334 | add %o0, 0x40, %o0 | ||
1335 | bne,pt %icc, 1b | ||
1336 | LOAD(prefetch, %g1 + 0x200, #n_reads_strong) | ||
1337 | @@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
1338 | andncc %o2, 0x20 - 1, %o5 | ||
1339 | be,pn %icc, 2f | ||
1340 | sub %o2, %o5, %o2 | ||
1341 | -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) | ||
1342 | - EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) | ||
1343 | - EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) | ||
1344 | - EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) | ||
1345 | +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) | ||
1346 | + EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5) | ||
1347 | + EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5) | ||
1348 | + EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5) | ||
1349 | add %o1, 0x20, %o1 | ||
1350 | subcc %o5, 0x20, %o5 | ||
1351 | - EX_ST(STORE(stx, %g1, %o0 + 0x00)) | ||
1352 | - EX_ST(STORE(stx, %g2, %o0 + 0x08)) | ||
1353 | - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) | ||
1354 | - EX_ST(STORE(stx, %o4, %o0 + 0x18)) | ||
1355 | + EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32) | ||
1356 | + EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24) | ||
1357 | + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24) | ||
1358 | + EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8) | ||
1359 | bne,pt %icc, 1b | ||
1360 | add %o0, 0x20, %o0 | ||
1361 | 2: andcc %o2, 0x18, %o5 | ||
1362 | be,pt %icc, 3f | ||
1363 | sub %o2, %o5, %o2 | ||
1364 | -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) | ||
1365 | + | ||
1366 | +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) | ||
1367 | add %o1, 0x08, %o1 | ||
1368 | add %o0, 0x08, %o0 | ||
1369 | subcc %o5, 0x08, %o5 | ||
1370 | bne,pt %icc, 1b | ||
1371 | - EX_ST(STORE(stx, %g1, %o0 - 0x08)) | ||
1372 | + EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8) | ||
1373 | 3: brz,pt %o2, .Lexit | ||
1374 | cmp %o2, 0x04 | ||
1375 | bl,pn %icc, .Ltiny | ||
1376 | nop | ||
1377 | - EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) | ||
1378 | + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2) | ||
1379 | add %o1, 0x04, %o1 | ||
1380 | add %o0, 0x04, %o0 | ||
1381 | subcc %o2, 0x04, %o2 | ||
1382 | bne,pn %icc, .Ltiny | ||
1383 | - EX_ST(STORE(stw, %g1, %o0 - 0x04)) | ||
1384 | + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4) | ||
1385 | ba,a,pt %icc, .Lexit | ||
1386 | .Lmedium_unaligned: | ||
1387 | /* First get dest 8 byte aligned. */ | ||
1388 | @@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
1389 | brz,pt %g1, 2f | ||
1390 | sub %o2, %g1, %o2 | ||
1391 | |||
1392 | -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) | ||
1393 | +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) | ||
1394 | add %o1, 1, %o1 | ||
1395 | subcc %g1, 1, %g1 | ||
1396 | add %o0, 1, %o0 | ||
1397 | bne,pt %icc, 1b | ||
1398 | - EX_ST(STORE(stb, %g2, %o0 - 0x01)) | ||
1399 | + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) | ||
1400 | 2: | ||
1401 | and %o1, 0x7, %g1 | ||
1402 | brz,pn %g1, .Lmedium_noprefetch | ||
1403 | @@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
1404 | mov 64, %g2 | ||
1405 | sub %g2, %g1, %g2 | ||
1406 | andn %o1, 0x7, %o1 | ||
1407 | - EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) | ||
1408 | + EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2) | ||
1409 | sllx %o4, %g1, %o4 | ||
1410 | andn %o2, 0x08 - 1, %o5 | ||
1411 | sub %o2, %o5, %o2 | ||
1412 | -1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) | ||
1413 | +1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5) | ||
1414 | add %o1, 0x08, %o1 | ||
1415 | subcc %o5, 0x08, %o5 | ||
1416 | srlx %g3, %g2, GLOBAL_SPARE | ||
1417 | or GLOBAL_SPARE, %o4, GLOBAL_SPARE | ||
1418 | - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) | ||
1419 | + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8) | ||
1420 | add %o0, 0x08, %o0 | ||
1421 | bne,pt %icc, 1b | ||
1422 | sllx %g3, %g1, %o4 | ||
1423 | @@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
1424 | ba,pt %icc, .Lsmall_unaligned | ||
1425 | |||
1426 | .Ltiny: | ||
1427 | - EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) | ||
1428 | + EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) | ||
1429 | subcc %o2, 1, %o2 | ||
1430 | be,pn %icc, .Lexit | ||
1431 | - EX_ST(STORE(stb, %g1, %o0 + 0x00)) | ||
1432 | - EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) | ||
1433 | + EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1) | ||
1434 | + EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2) | ||
1435 | subcc %o2, 1, %o2 | ||
1436 | be,pn %icc, .Lexit | ||
1437 | - EX_ST(STORE(stb, %g1, %o0 + 0x01)) | ||
1438 | - EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) | ||
1439 | + EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1) | ||
1440 | + EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2) | ||
1441 | ba,pt %icc, .Lexit | ||
1442 | - EX_ST(STORE(stb, %g1, %o0 + 0x02)) | ||
1443 | + EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2) | ||
1444 | |||
1445 | .Lsmall: | ||
1446 | andcc %g2, 0x3, %g0 | ||
1447 | @@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
1448 | andn %o2, 0x4 - 1, %o5 | ||
1449 | sub %o2, %o5, %o2 | ||
1450 | 1: | ||
1451 | - EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) | ||
1452 | + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) | ||
1453 | add %o1, 0x04, %o1 | ||
1454 | subcc %o5, 0x04, %o5 | ||
1455 | add %o0, 0x04, %o0 | ||
1456 | bne,pt %icc, 1b | ||
1457 | - EX_ST(STORE(stw, %g1, %o0 - 0x04)) | ||
1458 | + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4) | ||
1459 | brz,pt %o2, .Lexit | ||
1460 | nop | ||
1461 | ba,a,pt %icc, .Ltiny | ||
1462 | |||
1463 | .Lsmall_unaligned: | ||
1464 | -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) | ||
1465 | +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) | ||
1466 | add %o1, 1, %o1 | ||
1467 | add %o0, 1, %o0 | ||
1468 | subcc %o2, 1, %o2 | ||
1469 | bne,pt %icc, 1b | ||
1470 | - EX_ST(STORE(stb, %g1, %o0 - 0x01)) | ||
1471 | + EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1) | ||
1472 | ba,a,pt %icc, .Lexit | ||
1473 | .size FUNC_NAME, .-FUNC_NAME | ||
1474 | diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S | ||
1475 | index 5d1e4d1ac21e..9cd42fcbc781 100644 | ||
1476 | --- a/arch/sparc/lib/NGcopy_from_user.S | ||
1477 | +++ b/arch/sparc/lib/NGcopy_from_user.S | ||
1478 | @@ -3,11 +3,11 @@ | ||
1479 | * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) | ||
1480 | */ | ||
1481 | |||
1482 | -#define EX_LD(x) \ | ||
1483 | +#define EX_LD(x,y) \ | ||
1484 | 98: x; \ | ||
1485 | .section __ex_table,"a";\ | ||
1486 | .align 4; \ | ||
1487 | - .word 98b, __ret_one_asi;\ | ||
1488 | + .word 98b, y; \ | ||
1489 | .text; \ | ||
1490 | .align 4; | ||
1491 | |||
1492 | diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S | ||
1493 | index ff630dcb273c..5c358afd464e 100644 | ||
1494 | --- a/arch/sparc/lib/NGcopy_to_user.S | ||
1495 | +++ b/arch/sparc/lib/NGcopy_to_user.S | ||
1496 | @@ -3,11 +3,11 @@ | ||
1497 | * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) | ||
1498 | */ | ||
1499 | |||
1500 | -#define EX_ST(x) \ | ||
1501 | +#define EX_ST(x,y) \ | ||
1502 | 98: x; \ | ||
1503 | .section __ex_table,"a";\ | ||
1504 | .align 4; \ | ||
1505 | - .word 98b, __ret_one_asi;\ | ||
1506 | + .word 98b, y; \ | ||
1507 | .text; \ | ||
1508 | .align 4; | ||
1509 | |||
1510 | diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S | ||
1511 | index 96a14caf6966..d88c4ed50a00 100644 | ||
1512 | --- a/arch/sparc/lib/NGmemcpy.S | ||
1513 | +++ b/arch/sparc/lib/NGmemcpy.S | ||
1514 | @@ -4,6 +4,7 @@ | ||
1515 | */ | ||
1516 | |||
1517 | #ifdef __KERNEL__ | ||
1518 | +#include <linux/linkage.h> | ||
1519 | #include <asm/asi.h> | ||
1520 | #include <asm/thread_info.h> | ||
1521 | #define GLOBAL_SPARE %g7 | ||
1522 | @@ -27,15 +28,11 @@ | ||
1523 | #endif | ||
1524 | |||
1525 | #ifndef EX_LD | ||
1526 | -#define EX_LD(x) x | ||
1527 | +#define EX_LD(x,y) x | ||
1528 | #endif | ||
1529 | |||
1530 | #ifndef EX_ST | ||
1531 | -#define EX_ST(x) x | ||
1532 | -#endif | ||
1533 | - | ||
1534 | -#ifndef EX_RETVAL | ||
1535 | -#define EX_RETVAL(x) x | ||
1536 | +#define EX_ST(x,y) x | ||
1537 | #endif | ||
1538 | |||
1539 | #ifndef LOAD | ||
1540 | @@ -79,6 +76,92 @@ | ||
1541 | .register %g3,#scratch | ||
1542 | |||
1543 | .text | ||
1544 | +#ifndef EX_RETVAL | ||
1545 | +#define EX_RETVAL(x) x | ||
1546 | +__restore_asi: | ||
1547 | + ret | ||
1548 | + wr %g0, ASI_AIUS, %asi | ||
1549 | + restore | ||
1550 | +ENTRY(NG_ret_i2_plus_i4_plus_1) | ||
1551 | + ba,pt %xcc, __restore_asi | ||
1552 | + add %i2, %i5, %i0 | ||
1553 | +ENDPROC(NG_ret_i2_plus_i4_plus_1) | ||
1554 | +ENTRY(NG_ret_i2_plus_g1) | ||
1555 | + ba,pt %xcc, __restore_asi | ||
1556 | + add %i2, %g1, %i0 | ||
1557 | +ENDPROC(NG_ret_i2_plus_g1) | ||
1558 | +ENTRY(NG_ret_i2_plus_g1_minus_8) | ||
1559 | + sub %g1, 8, %g1 | ||
1560 | + ba,pt %xcc, __restore_asi | ||
1561 | + add %i2, %g1, %i0 | ||
1562 | +ENDPROC(NG_ret_i2_plus_g1_minus_8) | ||
1563 | +ENTRY(NG_ret_i2_plus_g1_minus_16) | ||
1564 | + sub %g1, 16, %g1 | ||
1565 | + ba,pt %xcc, __restore_asi | ||
1566 | + add %i2, %g1, %i0 | ||
1567 | +ENDPROC(NG_ret_i2_plus_g1_minus_16) | ||
1568 | +ENTRY(NG_ret_i2_plus_g1_minus_24) | ||
1569 | + sub %g1, 24, %g1 | ||
1570 | + ba,pt %xcc, __restore_asi | ||
1571 | + add %i2, %g1, %i0 | ||
1572 | +ENDPROC(NG_ret_i2_plus_g1_minus_24) | ||
1573 | +ENTRY(NG_ret_i2_plus_g1_minus_32) | ||
1574 | + sub %g1, 32, %g1 | ||
1575 | + ba,pt %xcc, __restore_asi | ||
1576 | + add %i2, %g1, %i0 | ||
1577 | +ENDPROC(NG_ret_i2_plus_g1_minus_32) | ||
1578 | +ENTRY(NG_ret_i2_plus_g1_minus_40) | ||
1579 | + sub %g1, 40, %g1 | ||
1580 | + ba,pt %xcc, __restore_asi | ||
1581 | + add %i2, %g1, %i0 | ||
1582 | +ENDPROC(NG_ret_i2_plus_g1_minus_40) | ||
1583 | +ENTRY(NG_ret_i2_plus_g1_minus_48) | ||
1584 | + sub %g1, 48, %g1 | ||
1585 | + ba,pt %xcc, __restore_asi | ||
1586 | + add %i2, %g1, %i0 | ||
1587 | +ENDPROC(NG_ret_i2_plus_g1_minus_48) | ||
1588 | +ENTRY(NG_ret_i2_plus_g1_minus_56) | ||
1589 | + sub %g1, 56, %g1 | ||
1590 | + ba,pt %xcc, __restore_asi | ||
1591 | + add %i2, %g1, %i0 | ||
1592 | +ENDPROC(NG_ret_i2_plus_g1_minus_56) | ||
1593 | +ENTRY(NG_ret_i2_plus_i4) | ||
1594 | + ba,pt %xcc, __restore_asi | ||
1595 | + add %i2, %i4, %i0 | ||
1596 | +ENDPROC(NG_ret_i2_plus_i4) | ||
1597 | +ENTRY(NG_ret_i2_plus_i4_minus_8) | ||
1598 | + sub %i4, 8, %i4 | ||
1599 | + ba,pt %xcc, __restore_asi | ||
1600 | + add %i2, %i4, %i0 | ||
1601 | +ENDPROC(NG_ret_i2_plus_i4_minus_8) | ||
1602 | +ENTRY(NG_ret_i2_plus_8) | ||
1603 | + ba,pt %xcc, __restore_asi | ||
1604 | + add %i2, 8, %i0 | ||
1605 | +ENDPROC(NG_ret_i2_plus_8) | ||
1606 | +ENTRY(NG_ret_i2_plus_4) | ||
1607 | + ba,pt %xcc, __restore_asi | ||
1608 | + add %i2, 4, %i0 | ||
1609 | +ENDPROC(NG_ret_i2_plus_4) | ||
1610 | +ENTRY(NG_ret_i2_plus_1) | ||
1611 | + ba,pt %xcc, __restore_asi | ||
1612 | + add %i2, 1, %i0 | ||
1613 | +ENDPROC(NG_ret_i2_plus_1) | ||
1614 | +ENTRY(NG_ret_i2_plus_g1_plus_1) | ||
1615 | + add %g1, 1, %g1 | ||
1616 | + ba,pt %xcc, __restore_asi | ||
1617 | + add %i2, %g1, %i0 | ||
1618 | +ENDPROC(NG_ret_i2_plus_g1_plus_1) | ||
1619 | +ENTRY(NG_ret_i2) | ||
1620 | + ba,pt %xcc, __restore_asi | ||
1621 | + mov %i2, %i0 | ||
1622 | +ENDPROC(NG_ret_i2) | ||
1623 | +ENTRY(NG_ret_i2_and_7_plus_i4) | ||
1624 | + and %i2, 7, %i2 | ||
1625 | + ba,pt %xcc, __restore_asi | ||
1626 | + add %i2, %i4, %i0 | ||
1627 | +ENDPROC(NG_ret_i2_and_7_plus_i4) | ||
1628 | +#endif | ||
1629 | + | ||
1630 | .align 64 | ||
1631 | |||
1632 | .globl FUNC_NAME | ||
1633 | @@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ | ||
1634 | sub %g0, %i4, %i4 ! bytes to align dst | ||
1635 | sub %i2, %i4, %i2 | ||
1636 | 1: subcc %i4, 1, %i4 | ||
1637 | - EX_LD(LOAD(ldub, %i1, %g1)) | ||
1638 | - EX_ST(STORE(stb, %g1, %o0)) | ||
1639 | + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1) | ||
1640 | + EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1) | ||
1641 | add %i1, 1, %i1 | ||
1642 | bne,pt %XCC, 1b | ||
1643 | add %o0, 1, %o0 | ||
1644 | @@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ | ||
1645 | and %i4, 0x7, GLOBAL_SPARE | ||
1646 | sll GLOBAL_SPARE, 3, GLOBAL_SPARE | ||
1647 | mov 64, %i5 | ||
1648 | - EX_LD(LOAD_TWIN(%i1, %g2, %g3)) | ||
1649 | + EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1) | ||
1650 | sub %i5, GLOBAL_SPARE, %i5 | ||
1651 | mov 16, %o4 | ||
1652 | mov 32, %o5 | ||
1653 | @@ -178,31 +261,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ | ||
1654 | srlx WORD3, PRE_SHIFT, TMP; \ | ||
1655 | or WORD2, TMP, WORD2; | ||
1656 | |||
1657 | -8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) | ||
1658 | +8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) | ||
1659 | MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) | ||
1660 | LOAD(prefetch, %i1 + %i3, #one_read) | ||
1661 | |||
1662 | - EX_ST(STORE_INIT(%g2, %o0 + 0x00)) | ||
1663 | - EX_ST(STORE_INIT(%g3, %o0 + 0x08)) | ||
1664 | + EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1) | ||
1665 | + EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) | ||
1666 | |||
1667 | - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) | ||
1668 | + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) | ||
1669 | MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) | ||
1670 | |||
1671 | - EX_ST(STORE_INIT(%o2, %o0 + 0x10)) | ||
1672 | - EX_ST(STORE_INIT(%o3, %o0 + 0x18)) | ||
1673 | + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) | ||
1674 | + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) | ||
1675 | |||
1676 | - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) | ||
1677 | + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) | ||
1678 | MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) | ||
1679 | |||
1680 | - EX_ST(STORE_INIT(%g2, %o0 + 0x20)) | ||
1681 | - EX_ST(STORE_INIT(%g3, %o0 + 0x28)) | ||
1682 | + EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) | ||
1683 | + EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) | ||
1684 | |||
1685 | - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) | ||
1686 | + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) | ||
1687 | add %i1, 64, %i1 | ||
1688 | MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) | ||
1689 | |||
1690 | - EX_ST(STORE_INIT(%o2, %o0 + 0x30)) | ||
1691 | - EX_ST(STORE_INIT(%o3, %o0 + 0x38)) | ||
1692 | + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) | ||
1693 | + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) | ||
1694 | |||
1695 | subcc %g1, 64, %g1 | ||
1696 | bne,pt %XCC, 8b | ||
1697 | @@ -211,31 +294,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ | ||
1698 | ba,pt %XCC, 60f | ||
1699 | add %i1, %i4, %i1 | ||
1700 | |||
1701 | -9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) | ||
1702 | +9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) | ||
1703 | MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) | ||
1704 | LOAD(prefetch, %i1 + %i3, #one_read) | ||
1705 | |||
1706 | - EX_ST(STORE_INIT(%g3, %o0 + 0x00)) | ||
1707 | - EX_ST(STORE_INIT(%o2, %o0 + 0x08)) | ||
1708 | + EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1) | ||
1709 | + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) | ||
1710 | |||
1711 | - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) | ||
1712 | + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) | ||
1713 | MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) | ||
1714 | |||
1715 | - EX_ST(STORE_INIT(%o3, %o0 + 0x10)) | ||
1716 | - EX_ST(STORE_INIT(%g2, %o0 + 0x18)) | ||
1717 | + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) | ||
1718 | + EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) | ||
1719 | |||
1720 | - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) | ||
1721 | + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) | ||
1722 | MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) | ||
1723 | |||
1724 | - EX_ST(STORE_INIT(%g3, %o0 + 0x20)) | ||
1725 | - EX_ST(STORE_INIT(%o2, %o0 + 0x28)) | ||
1726 | + EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) | ||
1727 | + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) | ||
1728 | |||
1729 | - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) | ||
1730 | + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) | ||
1731 | add %i1, 64, %i1 | ||
1732 | MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) | ||
1733 | |||
1734 | - EX_ST(STORE_INIT(%o3, %o0 + 0x30)) | ||
1735 | - EX_ST(STORE_INIT(%g2, %o0 + 0x38)) | ||
1736 | + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) | ||
1737 | + EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) | ||
1738 | |||
1739 | subcc %g1, 64, %g1 | ||
1740 | bne,pt %XCC, 9b | ||
1741 | @@ -249,25 +332,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ | ||
1742 | * one twin load ahead, then add 8 back into source when | ||
1743 | * we finish the loop. | ||
1744 | */ | ||
1745 | - EX_LD(LOAD_TWIN(%i1, %o4, %o5)) | ||
1746 | + EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1) | ||
1747 | mov 16, %o7 | ||
1748 | mov 32, %g2 | ||
1749 | mov 48, %g3 | ||
1750 | mov 64, %o1 | ||
1751 | -1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) | ||
1752 | +1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) | ||
1753 | LOAD(prefetch, %i1 + %o1, #one_read) | ||
1754 | - EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line | ||
1755 | - EX_ST(STORE_INIT(%o2, %o0 + 0x08)) | ||
1756 | - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) | ||
1757 | - EX_ST(STORE_INIT(%o3, %o0 + 0x10)) | ||
1758 | - EX_ST(STORE_INIT(%o4, %o0 + 0x18)) | ||
1759 | - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) | ||
1760 | - EX_ST(STORE_INIT(%o5, %o0 + 0x20)) | ||
1761 | - EX_ST(STORE_INIT(%o2, %o0 + 0x28)) | ||
1762 | - EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5)) | ||
1763 | + EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line | ||
1764 | + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) | ||
1765 | + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) | ||
1766 | + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) | ||
1767 | + EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) | ||
1768 | + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) | ||
1769 | + EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) | ||
1770 | + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) | ||
1771 | + EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48) | ||
1772 | add %i1, 64, %i1 | ||
1773 | - EX_ST(STORE_INIT(%o3, %o0 + 0x30)) | ||
1774 | - EX_ST(STORE_INIT(%o4, %o0 + 0x38)) | ||
1775 | + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) | ||
1776 | + EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) | ||
1777 | subcc %g1, 64, %g1 | ||
1778 | bne,pt %XCC, 1b | ||
1779 | add %o0, 64, %o0 | ||
1780 | @@ -282,20 +365,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ | ||
1781 | mov 32, %g2 | ||
1782 | mov 48, %g3 | ||
1783 | mov 64, %o1 | ||
1784 | -1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5)) | ||
1785 | - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) | ||
1786 | +1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1) | ||
1787 | + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) | ||
1788 | LOAD(prefetch, %i1 + %o1, #one_read) | ||
1789 | - EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line | ||
1790 | - EX_ST(STORE_INIT(%o5, %o0 + 0x08)) | ||
1791 | - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) | ||
1792 | - EX_ST(STORE_INIT(%o2, %o0 + 0x10)) | ||
1793 | - EX_ST(STORE_INIT(%o3, %o0 + 0x18)) | ||
1794 | - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) | ||
1795 | + EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line | ||
1796 | + EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) | ||
1797 | + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) | ||
1798 | + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) | ||
1799 | + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) | ||
1800 | + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) | ||
1801 | add %i1, 64, %i1 | ||
1802 | - EX_ST(STORE_INIT(%o4, %o0 + 0x20)) | ||
1803 | - EX_ST(STORE_INIT(%o5, %o0 + 0x28)) | ||
1804 | - EX_ST(STORE_INIT(%o2, %o0 + 0x30)) | ||
1805 | - EX_ST(STORE_INIT(%o3, %o0 + 0x38)) | ||
1806 | + EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) | ||
1807 | + EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) | ||
1808 | + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) | ||
1809 | + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) | ||
1810 | subcc %g1, 64, %g1 | ||
1811 | bne,pt %XCC, 1b | ||
1812 | add %o0, 64, %o0 | ||
1813 | @@ -321,28 +404,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ | ||
1814 | andn %i2, 0xf, %i4 | ||
1815 | and %i2, 0xf, %i2 | ||
1816 | 1: subcc %i4, 0x10, %i4 | ||
1817 | - EX_LD(LOAD(ldx, %i1, %o4)) | ||
1818 | + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4) | ||
1819 | add %i1, 0x08, %i1 | ||
1820 | - EX_LD(LOAD(ldx, %i1, %g1)) | ||
1821 | + EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4) | ||
1822 | sub %i1, 0x08, %i1 | ||
1823 | - EX_ST(STORE(stx, %o4, %i1 + %i3)) | ||
1824 | + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4) | ||
1825 | add %i1, 0x8, %i1 | ||
1826 | - EX_ST(STORE(stx, %g1, %i1 + %i3)) | ||
1827 | + EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8) | ||
1828 | bgu,pt %XCC, 1b | ||
1829 | add %i1, 0x8, %i1 | ||
1830 | 73: andcc %i2, 0x8, %g0 | ||
1831 | be,pt %XCC, 1f | ||
1832 | nop | ||
1833 | sub %i2, 0x8, %i2 | ||
1834 | - EX_LD(LOAD(ldx, %i1, %o4)) | ||
1835 | - EX_ST(STORE(stx, %o4, %i1 + %i3)) | ||
1836 | + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8) | ||
1837 | + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8) | ||
1838 | add %i1, 0x8, %i1 | ||
1839 | 1: andcc %i2, 0x4, %g0 | ||
1840 | be,pt %XCC, 1f | ||
1841 | nop | ||
1842 | sub %i2, 0x4, %i2 | ||
1843 | - EX_LD(LOAD(lduw, %i1, %i5)) | ||
1844 | - EX_ST(STORE(stw, %i5, %i1 + %i3)) | ||
1845 | + EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4) | ||
1846 | + EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4) | ||
1847 | add %i1, 0x4, %i1 | ||
1848 | 1: cmp %i2, 0 | ||
1849 | be,pt %XCC, 85f | ||
1850 | @@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ | ||
1851 | sub %i2, %g1, %i2 | ||
1852 | |||
1853 | 1: subcc %g1, 1, %g1 | ||
1854 | - EX_LD(LOAD(ldub, %i1, %i5)) | ||
1855 | - EX_ST(STORE(stb, %i5, %i1 + %i3)) | ||
1856 | + EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1) | ||
1857 | + EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1) | ||
1858 | bgu,pt %icc, 1b | ||
1859 | add %i1, 1, %i1 | ||
1860 | |||
1861 | @@ -375,16 +458,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ | ||
1862 | |||
1863 | 8: mov 64, %i3 | ||
1864 | andn %i1, 0x7, %i1 | ||
1865 | - EX_LD(LOAD(ldx, %i1, %g2)) | ||
1866 | + EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2) | ||
1867 | sub %i3, %g1, %i3 | ||
1868 | andn %i2, 0x7, %i4 | ||
1869 | sllx %g2, %g1, %g2 | ||
1870 | 1: add %i1, 0x8, %i1 | ||
1871 | - EX_LD(LOAD(ldx, %i1, %g3)) | ||
1872 | + EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4) | ||
1873 | subcc %i4, 0x8, %i4 | ||
1874 | srlx %g3, %i3, %i5 | ||
1875 | or %i5, %g2, %i5 | ||
1876 | - EX_ST(STORE(stx, %i5, %o0)) | ||
1877 | + EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4) | ||
1878 | add %o0, 0x8, %o0 | ||
1879 | bgu,pt %icc, 1b | ||
1880 | sllx %g3, %g1, %g2 | ||
1881 | @@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ | ||
1882 | |||
1883 | 1: | ||
1884 | subcc %i2, 4, %i2 | ||
1885 | - EX_LD(LOAD(lduw, %i1, %g1)) | ||
1886 | - EX_ST(STORE(stw, %g1, %i1 + %i3)) | ||
1887 | + EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4) | ||
1888 | + EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4) | ||
1889 | bgu,pt %XCC, 1b | ||
1890 | add %i1, 4, %i1 | ||
1891 | |||
1892 | @@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ | ||
1893 | .align 32 | ||
1894 | 90: | ||
1895 | subcc %i2, 1, %i2 | ||
1896 | - EX_LD(LOAD(ldub, %i1, %g1)) | ||
1897 | - EX_ST(STORE(stb, %g1, %i1 + %i3)) | ||
1898 | + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1) | ||
1899 | + EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1) | ||
1900 | bgu,pt %XCC, 90b | ||
1901 | add %i1, 1, %i1 | ||
1902 | ret | ||
1903 | diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S | ||
1904 | index ecc5692fa2b4..bb6ff73229e3 100644 | ||
1905 | --- a/arch/sparc/lib/U1copy_from_user.S | ||
1906 | +++ b/arch/sparc/lib/U1copy_from_user.S | ||
1907 | @@ -3,19 +3,19 @@ | ||
1908 | * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) | ||
1909 | */ | ||
1910 | |||
1911 | -#define EX_LD(x) \ | ||
1912 | +#define EX_LD(x,y) \ | ||
1913 | 98: x; \ | ||
1914 | .section __ex_table,"a";\ | ||
1915 | .align 4; \ | ||
1916 | - .word 98b, __retl_one; \ | ||
1917 | + .word 98b, y; \ | ||
1918 | .text; \ | ||
1919 | .align 4; | ||
1920 | |||
1921 | -#define EX_LD_FP(x) \ | ||
1922 | +#define EX_LD_FP(x,y) \ | ||
1923 | 98: x; \ | ||
1924 | .section __ex_table,"a";\ | ||
1925 | .align 4; \ | ||
1926 | - .word 98b, __retl_one_fp;\ | ||
1927 | + .word 98b, y; \ | ||
1928 | .text; \ | ||
1929 | .align 4; | ||
1930 | |||
1931 | diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S | ||
1932 | index 9eea392e44d4..ed92ce739558 100644 | ||
1933 | --- a/arch/sparc/lib/U1copy_to_user.S | ||
1934 | +++ b/arch/sparc/lib/U1copy_to_user.S | ||
1935 | @@ -3,19 +3,19 @@ | ||
1936 | * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) | ||
1937 | */ | ||
1938 | |||
1939 | -#define EX_ST(x) \ | ||
1940 | +#define EX_ST(x,y) \ | ||
1941 | 98: x; \ | ||
1942 | .section __ex_table,"a";\ | ||
1943 | .align 4; \ | ||
1944 | - .word 98b, __retl_one; \ | ||
1945 | + .word 98b, y; \ | ||
1946 | .text; \ | ||
1947 | .align 4; | ||
1948 | |||
1949 | -#define EX_ST_FP(x) \ | ||
1950 | +#define EX_ST_FP(x,y) \ | ||
1951 | 98: x; \ | ||
1952 | .section __ex_table,"a";\ | ||
1953 | .align 4; \ | ||
1954 | - .word 98b, __retl_one_fp;\ | ||
1955 | + .word 98b, y; \ | ||
1956 | .text; \ | ||
1957 | .align 4; | ||
1958 | |||
1959 | diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S | ||
1960 | index 3e6209ebb7d7..f30d2ab2c371 100644 | ||
1961 | --- a/arch/sparc/lib/U1memcpy.S | ||
1962 | +++ b/arch/sparc/lib/U1memcpy.S | ||
1963 | @@ -5,6 +5,7 @@ | ||
1964 | */ | ||
1965 | |||
1966 | #ifdef __KERNEL__ | ||
1967 | +#include <linux/linkage.h> | ||
1968 | #include <asm/visasm.h> | ||
1969 | #include <asm/asi.h> | ||
1970 | #define GLOBAL_SPARE g7 | ||
1971 | @@ -23,21 +24,17 @@ | ||
1972 | #endif | ||
1973 | |||
1974 | #ifndef EX_LD | ||
1975 | -#define EX_LD(x) x | ||
1976 | +#define EX_LD(x,y) x | ||
1977 | #endif | ||
1978 | #ifndef EX_LD_FP | ||
1979 | -#define EX_LD_FP(x) x | ||
1980 | +#define EX_LD_FP(x,y) x | ||
1981 | #endif | ||
1982 | |||
1983 | #ifndef EX_ST | ||
1984 | -#define EX_ST(x) x | ||
1985 | +#define EX_ST(x,y) x | ||
1986 | #endif | ||
1987 | #ifndef EX_ST_FP | ||
1988 | -#define EX_ST_FP(x) x | ||
1989 | -#endif | ||
1990 | - | ||
1991 | -#ifndef EX_RETVAL | ||
1992 | -#define EX_RETVAL(x) x | ||
1993 | +#define EX_ST_FP(x,y) x | ||
1994 | #endif | ||
1995 | |||
1996 | #ifndef LOAD | ||
1997 | @@ -78,53 +75,169 @@ | ||
1998 | faligndata %f7, %f8, %f60; \ | ||
1999 | faligndata %f8, %f9, %f62; | ||
2000 | |||
2001 | -#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ | ||
2002 | - EX_LD_FP(LOAD_BLK(%src, %fdest)); \ | ||
2003 | - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ | ||
2004 | - add %src, 0x40, %src; \ | ||
2005 | - subcc %len, 0x40, %len; \ | ||
2006 | - be,pn %xcc, jmptgt; \ | ||
2007 | - add %dest, 0x40, %dest; \ | ||
2008 | - | ||
2009 | -#define LOOP_CHUNK1(src, dest, len, branch_dest) \ | ||
2010 | - MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) | ||
2011 | -#define LOOP_CHUNK2(src, dest, len, branch_dest) \ | ||
2012 | - MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) | ||
2013 | -#define LOOP_CHUNK3(src, dest, len, branch_dest) \ | ||
2014 | - MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) | ||
2015 | +#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \ | ||
2016 | + EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \ | ||
2017 | + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ | ||
2018 | + add %src, 0x40, %src; \ | ||
2019 | + subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \ | ||
2020 | + be,pn %xcc, jmptgt; \ | ||
2021 | + add %dest, 0x40, %dest; \ | ||
2022 | + | ||
2023 | +#define LOOP_CHUNK1(src, dest, branch_dest) \ | ||
2024 | + MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest) | ||
2025 | +#define LOOP_CHUNK2(src, dest, branch_dest) \ | ||
2026 | + MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest) | ||
2027 | +#define LOOP_CHUNK3(src, dest, branch_dest) \ | ||
2028 | + MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest) | ||
2029 | |||
2030 | #define DO_SYNC membar #Sync; | ||
2031 | #define STORE_SYNC(dest, fsrc) \ | ||
2032 | - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ | ||
2033 | + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ | ||
2034 | add %dest, 0x40, %dest; \ | ||
2035 | DO_SYNC | ||
2036 | |||
2037 | #define STORE_JUMP(dest, fsrc, target) \ | ||
2038 | - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ | ||
2039 | + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \ | ||
2040 | add %dest, 0x40, %dest; \ | ||
2041 | ba,pt %xcc, target; \ | ||
2042 | nop; | ||
2043 | |||
2044 | -#define FINISH_VISCHUNK(dest, f0, f1, left) \ | ||
2045 | - subcc %left, 8, %left;\ | ||
2046 | - bl,pn %xcc, 95f; \ | ||
2047 | - faligndata %f0, %f1, %f48; \ | ||
2048 | - EX_ST_FP(STORE(std, %f48, %dest)); \ | ||
2049 | +#define FINISH_VISCHUNK(dest, f0, f1) \ | ||
2050 | + subcc %g3, 8, %g3; \ | ||
2051 | + bl,pn %xcc, 95f; \ | ||
2052 | + faligndata %f0, %f1, %f48; \ | ||
2053 | + EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \ | ||
2054 | add %dest, 8, %dest; | ||
2055 | |||
2056 | -#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ | ||
2057 | - subcc %left, 8, %left; \ | ||
2058 | - bl,pn %xcc, 95f; \ | ||
2059 | +#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ | ||
2060 | + subcc %g3, 8, %g3; \ | ||
2061 | + bl,pn %xcc, 95f; \ | ||
2062 | fsrc2 %f0, %f1; | ||
2063 | |||
2064 | -#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ | ||
2065 | - UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ | ||
2066 | +#define UNEVEN_VISCHUNK(dest, f0, f1) \ | ||
2067 | + UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ | ||
2068 | ba,a,pt %xcc, 93f; | ||
2069 | |||
2070 | .register %g2,#scratch | ||
2071 | .register %g3,#scratch | ||
2072 | |||
2073 | .text | ||
2074 | +#ifndef EX_RETVAL | ||
2075 | +#define EX_RETVAL(x) x | ||
2076 | +ENTRY(U1_g1_1_fp) | ||
2077 | + VISExitHalf | ||
2078 | + add %g1, 1, %g1 | ||
2079 | + add %g1, %g2, %g1 | ||
2080 | + retl | ||
2081 | + add %g1, %o2, %o0 | ||
2082 | +ENDPROC(U1_g1_1_fp) | ||
2083 | +ENTRY(U1_g2_0_fp) | ||
2084 | + VISExitHalf | ||
2085 | + retl | ||
2086 | + add %g2, %o2, %o0 | ||
2087 | +ENDPROC(U1_g2_0_fp) | ||
2088 | +ENTRY(U1_g2_8_fp) | ||
2089 | + VISExitHalf | ||
2090 | + add %g2, 8, %g2 | ||
2091 | + retl | ||
2092 | + add %g2, %o2, %o0 | ||
2093 | +ENDPROC(U1_g2_8_fp) | ||
2094 | +ENTRY(U1_gs_0_fp) | ||
2095 | + VISExitHalf | ||
2096 | + add %GLOBAL_SPARE, %g3, %o0 | ||
2097 | + retl | ||
2098 | + add %o0, %o2, %o0 | ||
2099 | +ENDPROC(U1_gs_0_fp) | ||
2100 | +ENTRY(U1_gs_80_fp) | ||
2101 | + VISExitHalf | ||
2102 | + add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE | ||
2103 | + add %GLOBAL_SPARE, %g3, %o0 | ||
2104 | + retl | ||
2105 | + add %o0, %o2, %o0 | ||
2106 | +ENDPROC(U1_gs_80_fp) | ||
2107 | +ENTRY(U1_gs_40_fp) | ||
2108 | + VISExitHalf | ||
2109 | + add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE | ||
2110 | + add %GLOBAL_SPARE, %g3, %o0 | ||
2111 | + retl | ||
2112 | + add %o0, %o2, %o0 | ||
2113 | +ENDPROC(U1_gs_40_fp) | ||
2114 | +ENTRY(U1_g3_0_fp) | ||
2115 | + VISExitHalf | ||
2116 | + retl | ||
2117 | + add %g3, %o2, %o0 | ||
2118 | +ENDPROC(U1_g3_0_fp) | ||
2119 | +ENTRY(U1_g3_8_fp) | ||
2120 | + VISExitHalf | ||
2121 | + add %g3, 8, %g3 | ||
2122 | + retl | ||
2123 | + add %g3, %o2, %o0 | ||
2124 | +ENDPROC(U1_g3_8_fp) | ||
2125 | +ENTRY(U1_o2_0_fp) | ||
2126 | + VISExitHalf | ||
2127 | + retl | ||
2128 | + mov %o2, %o0 | ||
2129 | +ENDPROC(U1_o2_0_fp) | ||
2130 | +ENTRY(U1_o2_1_fp) | ||
2131 | + VISExitHalf | ||
2132 | + retl | ||
2133 | + add %o2, 1, %o0 | ||
2134 | +ENDPROC(U1_o2_1_fp) | ||
2135 | +ENTRY(U1_gs_0) | ||
2136 | + VISExitHalf | ||
2137 | + retl | ||
2138 | + add %GLOBAL_SPARE, %o2, %o0 | ||
2139 | +ENDPROC(U1_gs_0) | ||
2140 | +ENTRY(U1_gs_8) | ||
2141 | + VISExitHalf | ||
2142 | + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE | ||
2143 | + retl | ||
2144 | + add %GLOBAL_SPARE, 0x8, %o0 | ||
2145 | +ENDPROC(U1_gs_8) | ||
2146 | +ENTRY(U1_gs_10) | ||
2147 | + VISExitHalf | ||
2148 | + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE | ||
2149 | + retl | ||
2150 | + add %GLOBAL_SPARE, 0x10, %o0 | ||
2151 | +ENDPROC(U1_gs_10) | ||
2152 | +ENTRY(U1_o2_0) | ||
2153 | + retl | ||
2154 | + mov %o2, %o0 | ||
2155 | +ENDPROC(U1_o2_0) | ||
2156 | +ENTRY(U1_o2_8) | ||
2157 | + retl | ||
2158 | + add %o2, 8, %o0 | ||
2159 | +ENDPROC(U1_o2_8) | ||
2160 | +ENTRY(U1_o2_4) | ||
2161 | + retl | ||
2162 | + add %o2, 4, %o0 | ||
2163 | +ENDPROC(U1_o2_4) | ||
2164 | +ENTRY(U1_o2_1) | ||
2165 | + retl | ||
2166 | + add %o2, 1, %o0 | ||
2167 | +ENDPROC(U1_o2_1) | ||
2168 | +ENTRY(U1_g1_0) | ||
2169 | + retl | ||
2170 | + add %g1, %o2, %o0 | ||
2171 | +ENDPROC(U1_g1_0) | ||
2172 | +ENTRY(U1_g1_1) | ||
2173 | + add %g1, 1, %g1 | ||
2174 | + retl | ||
2175 | + add %g1, %o2, %o0 | ||
2176 | +ENDPROC(U1_g1_1) | ||
2177 | +ENTRY(U1_gs_0_o2_adj) | ||
2178 | + and %o2, 7, %o2 | ||
2179 | + retl | ||
2180 | + add %GLOBAL_SPARE, %o2, %o0 | ||
2181 | +ENDPROC(U1_gs_0_o2_adj) | ||
2182 | +ENTRY(U1_gs_8_o2_adj) | ||
2183 | + and %o2, 7, %o2 | ||
2184 | + add %GLOBAL_SPARE, 8, %GLOBAL_SPARE | ||
2185 | + retl | ||
2186 | + add %GLOBAL_SPARE, %o2, %o0 | ||
2187 | +ENDPROC(U1_gs_8_o2_adj) | ||
2188 | +#endif | ||
2189 | + | ||
2190 | .align 64 | ||
2191 | |||
2192 | .globl FUNC_NAME | ||
2193 | @@ -166,8 +279,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2194 | and %g2, 0x38, %g2 | ||
2195 | |||
2196 | 1: subcc %g1, 0x1, %g1 | ||
2197 | - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) | ||
2198 | - EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE)) | ||
2199 | + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp) | ||
2200 | + EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp) | ||
2201 | bgu,pt %XCC, 1b | ||
2202 | add %o1, 0x1, %o1 | ||
2203 | |||
2204 | @@ -178,20 +291,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2205 | be,pt %icc, 3f | ||
2206 | alignaddr %o1, %g0, %o1 | ||
2207 | |||
2208 | - EX_LD_FP(LOAD(ldd, %o1, %f4)) | ||
2209 | -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) | ||
2210 | + EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp) | ||
2211 | +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp) | ||
2212 | add %o1, 0x8, %o1 | ||
2213 | subcc %g2, 0x8, %g2 | ||
2214 | faligndata %f4, %f6, %f0 | ||
2215 | - EX_ST_FP(STORE(std, %f0, %o0)) | ||
2216 | + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) | ||
2217 | be,pn %icc, 3f | ||
2218 | add %o0, 0x8, %o0 | ||
2219 | |||
2220 | - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) | ||
2221 | + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp) | ||
2222 | add %o1, 0x8, %o1 | ||
2223 | subcc %g2, 0x8, %g2 | ||
2224 | faligndata %f6, %f4, %f0 | ||
2225 | - EX_ST_FP(STORE(std, %f0, %o0)) | ||
2226 | + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) | ||
2227 | bne,pt %icc, 1b | ||
2228 | add %o0, 0x8, %o0 | ||
2229 | |||
2230 | @@ -214,13 +327,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2231 | add %g1, %GLOBAL_SPARE, %g1 | ||
2232 | subcc %o2, %g3, %o2 | ||
2233 | |||
2234 | - EX_LD_FP(LOAD_BLK(%o1, %f0)) | ||
2235 | + EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp) | ||
2236 | add %o1, 0x40, %o1 | ||
2237 | add %g1, %g3, %g1 | ||
2238 | - EX_LD_FP(LOAD_BLK(%o1, %f16)) | ||
2239 | + EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp) | ||
2240 | add %o1, 0x40, %o1 | ||
2241 | sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE | ||
2242 | - EX_LD_FP(LOAD_BLK(%o1, %f32)) | ||
2243 | + EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp) | ||
2244 | add %o1, 0x40, %o1 | ||
2245 | |||
2246 | /* There are 8 instances of the unrolled loop, | ||
2247 | @@ -240,11 +353,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2248 | |||
2249 | .align 64 | ||
2250 | 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) | ||
2251 | - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | ||
2252 | + LOOP_CHUNK1(o1, o0, 1f) | ||
2253 | FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) | ||
2254 | - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) | ||
2255 | + LOOP_CHUNK2(o1, o0, 2f) | ||
2256 | FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) | ||
2257 | - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) | ||
2258 | + LOOP_CHUNK3(o1, o0, 3f) | ||
2259 | ba,pt %xcc, 1b+4 | ||
2260 | faligndata %f0, %f2, %f48 | ||
2261 | 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) | ||
2262 | @@ -261,11 +374,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2263 | STORE_JUMP(o0, f48, 56f) | ||
2264 | |||
2265 | 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) | ||
2266 | - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | ||
2267 | + LOOP_CHUNK1(o1, o0, 1f) | ||
2268 | FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) | ||
2269 | - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) | ||
2270 | + LOOP_CHUNK2(o1, o0, 2f) | ||
2271 | FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) | ||
2272 | - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) | ||
2273 | + LOOP_CHUNK3(o1, o0, 3f) | ||
2274 | ba,pt %xcc, 1b+4 | ||
2275 | faligndata %f2, %f4, %f48 | ||
2276 | 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) | ||
2277 | @@ -282,11 +395,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2278 | STORE_JUMP(o0, f48, 57f) | ||
2279 | |||
2280 | 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) | ||
2281 | - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | ||
2282 | + LOOP_CHUNK1(o1, o0, 1f) | ||
2283 | FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) | ||
2284 | - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) | ||
2285 | + LOOP_CHUNK2(o1, o0, 2f) | ||
2286 | FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) | ||
2287 | - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) | ||
2288 | + LOOP_CHUNK3(o1, o0, 3f) | ||
2289 | ba,pt %xcc, 1b+4 | ||
2290 | faligndata %f4, %f6, %f48 | ||
2291 | 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) | ||
2292 | @@ -303,11 +416,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2293 | STORE_JUMP(o0, f48, 58f) | ||
2294 | |||
2295 | 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) | ||
2296 | - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | ||
2297 | + LOOP_CHUNK1(o1, o0, 1f) | ||
2298 | FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) | ||
2299 | - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) | ||
2300 | + LOOP_CHUNK2(o1, o0, 2f) | ||
2301 | FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) | ||
2302 | - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) | ||
2303 | + LOOP_CHUNK3(o1, o0, 3f) | ||
2304 | ba,pt %xcc, 1b+4 | ||
2305 | faligndata %f6, %f8, %f48 | ||
2306 | 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) | ||
2307 | @@ -324,11 +437,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2308 | STORE_JUMP(o0, f48, 59f) | ||
2309 | |||
2310 | 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) | ||
2311 | - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | ||
2312 | + LOOP_CHUNK1(o1, o0, 1f) | ||
2313 | FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) | ||
2314 | - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) | ||
2315 | + LOOP_CHUNK2(o1, o0, 2f) | ||
2316 | FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) | ||
2317 | - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) | ||
2318 | + LOOP_CHUNK3(o1, o0, 3f) | ||
2319 | ba,pt %xcc, 1b+4 | ||
2320 | faligndata %f8, %f10, %f48 | ||
2321 | 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) | ||
2322 | @@ -345,11 +458,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2323 | STORE_JUMP(o0, f48, 60f) | ||
2324 | |||
2325 | 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) | ||
2326 | - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | ||
2327 | + LOOP_CHUNK1(o1, o0, 1f) | ||
2328 | FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) | ||
2329 | - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) | ||
2330 | + LOOP_CHUNK2(o1, o0, 2f) | ||
2331 | FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) | ||
2332 | - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) | ||
2333 | + LOOP_CHUNK3(o1, o0, 3f) | ||
2334 | ba,pt %xcc, 1b+4 | ||
2335 | faligndata %f10, %f12, %f48 | ||
2336 | 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) | ||
2337 | @@ -366,11 +479,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2338 | STORE_JUMP(o0, f48, 61f) | ||
2339 | |||
2340 | 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) | ||
2341 | - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | ||
2342 | + LOOP_CHUNK1(o1, o0, 1f) | ||
2343 | FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) | ||
2344 | - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) | ||
2345 | + LOOP_CHUNK2(o1, o0, 2f) | ||
2346 | FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) | ||
2347 | - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) | ||
2348 | + LOOP_CHUNK3(o1, o0, 3f) | ||
2349 | ba,pt %xcc, 1b+4 | ||
2350 | faligndata %f12, %f14, %f48 | ||
2351 | 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) | ||
2352 | @@ -387,11 +500,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2353 | STORE_JUMP(o0, f48, 62f) | ||
2354 | |||
2355 | 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) | ||
2356 | - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | ||
2357 | + LOOP_CHUNK1(o1, o0, 1f) | ||
2358 | FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) | ||
2359 | - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) | ||
2360 | + LOOP_CHUNK2(o1, o0, 2f) | ||
2361 | FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) | ||
2362 | - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) | ||
2363 | + LOOP_CHUNK3(o1, o0, 3f) | ||
2364 | ba,pt %xcc, 1b+4 | ||
2365 | faligndata %f14, %f16, %f48 | ||
2366 | 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) | ||
2367 | @@ -407,53 +520,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2368 | FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) | ||
2369 | STORE_JUMP(o0, f48, 63f) | ||
2370 | |||
2371 | -40: FINISH_VISCHUNK(o0, f0, f2, g3) | ||
2372 | -41: FINISH_VISCHUNK(o0, f2, f4, g3) | ||
2373 | -42: FINISH_VISCHUNK(o0, f4, f6, g3) | ||
2374 | -43: FINISH_VISCHUNK(o0, f6, f8, g3) | ||
2375 | -44: FINISH_VISCHUNK(o0, f8, f10, g3) | ||
2376 | -45: FINISH_VISCHUNK(o0, f10, f12, g3) | ||
2377 | -46: FINISH_VISCHUNK(o0, f12, f14, g3) | ||
2378 | -47: UNEVEN_VISCHUNK(o0, f14, f0, g3) | ||
2379 | -48: FINISH_VISCHUNK(o0, f16, f18, g3) | ||
2380 | -49: FINISH_VISCHUNK(o0, f18, f20, g3) | ||
2381 | -50: FINISH_VISCHUNK(o0, f20, f22, g3) | ||
2382 | -51: FINISH_VISCHUNK(o0, f22, f24, g3) | ||
2383 | -52: FINISH_VISCHUNK(o0, f24, f26, g3) | ||
2384 | -53: FINISH_VISCHUNK(o0, f26, f28, g3) | ||
2385 | -54: FINISH_VISCHUNK(o0, f28, f30, g3) | ||
2386 | -55: UNEVEN_VISCHUNK(o0, f30, f0, g3) | ||
2387 | -56: FINISH_VISCHUNK(o0, f32, f34, g3) | ||
2388 | -57: FINISH_VISCHUNK(o0, f34, f36, g3) | ||
2389 | -58: FINISH_VISCHUNK(o0, f36, f38, g3) | ||
2390 | -59: FINISH_VISCHUNK(o0, f38, f40, g3) | ||
2391 | -60: FINISH_VISCHUNK(o0, f40, f42, g3) | ||
2392 | -61: FINISH_VISCHUNK(o0, f42, f44, g3) | ||
2393 | -62: FINISH_VISCHUNK(o0, f44, f46, g3) | ||
2394 | -63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) | ||
2395 | - | ||
2396 | -93: EX_LD_FP(LOAD(ldd, %o1, %f2)) | ||
2397 | +40: FINISH_VISCHUNK(o0, f0, f2) | ||
2398 | +41: FINISH_VISCHUNK(o0, f2, f4) | ||
2399 | +42: FINISH_VISCHUNK(o0, f4, f6) | ||
2400 | +43: FINISH_VISCHUNK(o0, f6, f8) | ||
2401 | +44: FINISH_VISCHUNK(o0, f8, f10) | ||
2402 | +45: FINISH_VISCHUNK(o0, f10, f12) | ||
2403 | +46: FINISH_VISCHUNK(o0, f12, f14) | ||
2404 | +47: UNEVEN_VISCHUNK(o0, f14, f0) | ||
2405 | +48: FINISH_VISCHUNK(o0, f16, f18) | ||
2406 | +49: FINISH_VISCHUNK(o0, f18, f20) | ||
2407 | +50: FINISH_VISCHUNK(o0, f20, f22) | ||
2408 | +51: FINISH_VISCHUNK(o0, f22, f24) | ||
2409 | +52: FINISH_VISCHUNK(o0, f24, f26) | ||
2410 | +53: FINISH_VISCHUNK(o0, f26, f28) | ||
2411 | +54: FINISH_VISCHUNK(o0, f28, f30) | ||
2412 | +55: UNEVEN_VISCHUNK(o0, f30, f0) | ||
2413 | +56: FINISH_VISCHUNK(o0, f32, f34) | ||
2414 | +57: FINISH_VISCHUNK(o0, f34, f36) | ||
2415 | +58: FINISH_VISCHUNK(o0, f36, f38) | ||
2416 | +59: FINISH_VISCHUNK(o0, f38, f40) | ||
2417 | +60: FINISH_VISCHUNK(o0, f40, f42) | ||
2418 | +61: FINISH_VISCHUNK(o0, f42, f44) | ||
2419 | +62: FINISH_VISCHUNK(o0, f44, f46) | ||
2420 | +63: UNEVEN_VISCHUNK_LAST(o0, f46, f0) | ||
2421 | + | ||
2422 | +93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp) | ||
2423 | add %o1, 8, %o1 | ||
2424 | subcc %g3, 8, %g3 | ||
2425 | faligndata %f0, %f2, %f8 | ||
2426 | - EX_ST_FP(STORE(std, %f8, %o0)) | ||
2427 | + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) | ||
2428 | bl,pn %xcc, 95f | ||
2429 | add %o0, 8, %o0 | ||
2430 | - EX_LD_FP(LOAD(ldd, %o1, %f0)) | ||
2431 | + EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp) | ||
2432 | add %o1, 8, %o1 | ||
2433 | subcc %g3, 8, %g3 | ||
2434 | faligndata %f2, %f0, %f8 | ||
2435 | - EX_ST_FP(STORE(std, %f8, %o0)) | ||
2436 | + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) | ||
2437 | bge,pt %xcc, 93b | ||
2438 | add %o0, 8, %o0 | ||
2439 | |||
2440 | 95: brz,pt %o2, 2f | ||
2441 | mov %g1, %o1 | ||
2442 | |||
2443 | -1: EX_LD_FP(LOAD(ldub, %o1, %o3)) | ||
2444 | +1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp) | ||
2445 | add %o1, 1, %o1 | ||
2446 | subcc %o2, 1, %o2 | ||
2447 | - EX_ST_FP(STORE(stb, %o3, %o0)) | ||
2448 | + EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp) | ||
2449 | bne,pt %xcc, 1b | ||
2450 | add %o0, 1, %o0 | ||
2451 | |||
2452 | @@ -469,27 +582,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2453 | |||
2454 | 72: andn %o2, 0xf, %GLOBAL_SPARE | ||
2455 | and %o2, 0xf, %o2 | ||
2456 | -1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) | ||
2457 | - EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) | ||
2458 | +1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0) | ||
2459 | + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0) | ||
2460 | subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE | ||
2461 | - EX_ST(STORE(stx, %o5, %o1 + %o3)) | ||
2462 | + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10) | ||
2463 | add %o1, 0x8, %o1 | ||
2464 | - EX_ST(STORE(stx, %g1, %o1 + %o3)) | ||
2465 | + EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8) | ||
2466 | bgu,pt %XCC, 1b | ||
2467 | add %o1, 0x8, %o1 | ||
2468 | 73: andcc %o2, 0x8, %g0 | ||
2469 | be,pt %XCC, 1f | ||
2470 | nop | ||
2471 | - EX_LD(LOAD(ldx, %o1, %o5)) | ||
2472 | + EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0) | ||
2473 | sub %o2, 0x8, %o2 | ||
2474 | - EX_ST(STORE(stx, %o5, %o1 + %o3)) | ||
2475 | + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8) | ||
2476 | add %o1, 0x8, %o1 | ||
2477 | 1: andcc %o2, 0x4, %g0 | ||
2478 | be,pt %XCC, 1f | ||
2479 | nop | ||
2480 | - EX_LD(LOAD(lduw, %o1, %o5)) | ||
2481 | + EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0) | ||
2482 | sub %o2, 0x4, %o2 | ||
2483 | - EX_ST(STORE(stw, %o5, %o1 + %o3)) | ||
2484 | + EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4) | ||
2485 | add %o1, 0x4, %o1 | ||
2486 | 1: cmp %o2, 0 | ||
2487 | be,pt %XCC, 85f | ||
2488 | @@ -503,9 +616,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2489 | sub %g0, %g1, %g1 | ||
2490 | sub %o2, %g1, %o2 | ||
2491 | |||
2492 | -1: EX_LD(LOAD(ldub, %o1, %o5)) | ||
2493 | +1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0) | ||
2494 | subcc %g1, 1, %g1 | ||
2495 | - EX_ST(STORE(stb, %o5, %o1 + %o3)) | ||
2496 | + EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1) | ||
2497 | bgu,pt %icc, 1b | ||
2498 | add %o1, 1, %o1 | ||
2499 | |||
2500 | @@ -521,16 +634,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2501 | |||
2502 | 8: mov 64, %o3 | ||
2503 | andn %o1, 0x7, %o1 | ||
2504 | - EX_LD(LOAD(ldx, %o1, %g2)) | ||
2505 | + EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0) | ||
2506 | sub %o3, %g1, %o3 | ||
2507 | andn %o2, 0x7, %GLOBAL_SPARE | ||
2508 | sllx %g2, %g1, %g2 | ||
2509 | -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) | ||
2510 | +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj) | ||
2511 | subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE | ||
2512 | add %o1, 0x8, %o1 | ||
2513 | srlx %g3, %o3, %o5 | ||
2514 | or %o5, %g2, %o5 | ||
2515 | - EX_ST(STORE(stx, %o5, %o0)) | ||
2516 | + EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj) | ||
2517 | add %o0, 0x8, %o0 | ||
2518 | bgu,pt %icc, 1b | ||
2519 | sllx %g3, %g1, %g2 | ||
2520 | @@ -548,9 +661,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2521 | bne,pn %XCC, 90f | ||
2522 | sub %o0, %o1, %o3 | ||
2523 | |||
2524 | -1: EX_LD(LOAD(lduw, %o1, %g1)) | ||
2525 | +1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0) | ||
2526 | subcc %o2, 4, %o2 | ||
2527 | - EX_ST(STORE(stw, %g1, %o1 + %o3)) | ||
2528 | + EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4) | ||
2529 | bgu,pt %XCC, 1b | ||
2530 | add %o1, 4, %o1 | ||
2531 | |||
2532 | @@ -558,9 +671,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2533 | mov EX_RETVAL(%o4), %o0 | ||
2534 | |||
2535 | .align 32 | ||
2536 | -90: EX_LD(LOAD(ldub, %o1, %g1)) | ||
2537 | +90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0) | ||
2538 | subcc %o2, 1, %o2 | ||
2539 | - EX_ST(STORE(stb, %g1, %o1 + %o3)) | ||
2540 | + EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1) | ||
2541 | bgu,pt %XCC, 90b | ||
2542 | add %o1, 1, %o1 | ||
2543 | retl | ||
2544 | diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S | ||
2545 | index 88ad73d86fe4..db73010a1af8 100644 | ||
2546 | --- a/arch/sparc/lib/U3copy_from_user.S | ||
2547 | +++ b/arch/sparc/lib/U3copy_from_user.S | ||
2548 | @@ -3,19 +3,19 @@ | ||
2549 | * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) | ||
2550 | */ | ||
2551 | |||
2552 | -#define EX_LD(x) \ | ||
2553 | +#define EX_LD(x,y) \ | ||
2554 | 98: x; \ | ||
2555 | .section __ex_table,"a";\ | ||
2556 | .align 4; \ | ||
2557 | - .word 98b, __retl_one; \ | ||
2558 | + .word 98b, y; \ | ||
2559 | .text; \ | ||
2560 | .align 4; | ||
2561 | |||
2562 | -#define EX_LD_FP(x) \ | ||
2563 | +#define EX_LD_FP(x,y) \ | ||
2564 | 98: x; \ | ||
2565 | .section __ex_table,"a";\ | ||
2566 | .align 4; \ | ||
2567 | - .word 98b, __retl_one_fp;\ | ||
2568 | + .word 98b, y##_fp; \ | ||
2569 | .text; \ | ||
2570 | .align 4; | ||
2571 | |||
2572 | diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S | ||
2573 | index 845139d75537..c4ee858e352a 100644 | ||
2574 | --- a/arch/sparc/lib/U3copy_to_user.S | ||
2575 | +++ b/arch/sparc/lib/U3copy_to_user.S | ||
2576 | @@ -3,19 +3,19 @@ | ||
2577 | * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) | ||
2578 | */ | ||
2579 | |||
2580 | -#define EX_ST(x) \ | ||
2581 | +#define EX_ST(x,y) \ | ||
2582 | 98: x; \ | ||
2583 | .section __ex_table,"a";\ | ||
2584 | .align 4; \ | ||
2585 | - .word 98b, __retl_one; \ | ||
2586 | + .word 98b, y; \ | ||
2587 | .text; \ | ||
2588 | .align 4; | ||
2589 | |||
2590 | -#define EX_ST_FP(x) \ | ||
2591 | +#define EX_ST_FP(x,y) \ | ||
2592 | 98: x; \ | ||
2593 | .section __ex_table,"a";\ | ||
2594 | .align 4; \ | ||
2595 | - .word 98b, __retl_one_fp;\ | ||
2596 | + .word 98b, y##_fp; \ | ||
2597 | .text; \ | ||
2598 | .align 4; | ||
2599 | |||
2600 | diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S | ||
2601 | index 491ee69e4995..54f98706b03b 100644 | ||
2602 | --- a/arch/sparc/lib/U3memcpy.S | ||
2603 | +++ b/arch/sparc/lib/U3memcpy.S | ||
2604 | @@ -4,6 +4,7 @@ | ||
2605 | */ | ||
2606 | |||
2607 | #ifdef __KERNEL__ | ||
2608 | +#include <linux/linkage.h> | ||
2609 | #include <asm/visasm.h> | ||
2610 | #include <asm/asi.h> | ||
2611 | #define GLOBAL_SPARE %g7 | ||
2612 | @@ -22,21 +23,17 @@ | ||
2613 | #endif | ||
2614 | |||
2615 | #ifndef EX_LD | ||
2616 | -#define EX_LD(x) x | ||
2617 | +#define EX_LD(x,y) x | ||
2618 | #endif | ||
2619 | #ifndef EX_LD_FP | ||
2620 | -#define EX_LD_FP(x) x | ||
2621 | +#define EX_LD_FP(x,y) x | ||
2622 | #endif | ||
2623 | |||
2624 | #ifndef EX_ST | ||
2625 | -#define EX_ST(x) x | ||
2626 | +#define EX_ST(x,y) x | ||
2627 | #endif | ||
2628 | #ifndef EX_ST_FP | ||
2629 | -#define EX_ST_FP(x) x | ||
2630 | -#endif | ||
2631 | - | ||
2632 | -#ifndef EX_RETVAL | ||
2633 | -#define EX_RETVAL(x) x | ||
2634 | +#define EX_ST_FP(x,y) x | ||
2635 | #endif | ||
2636 | |||
2637 | #ifndef LOAD | ||
2638 | @@ -77,6 +74,87 @@ | ||
2639 | */ | ||
2640 | |||
2641 | .text | ||
2642 | +#ifndef EX_RETVAL | ||
2643 | +#define EX_RETVAL(x) x | ||
2644 | +__restore_fp: | ||
2645 | + VISExitHalf | ||
2646 | + retl | ||
2647 | + nop | ||
2648 | +ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) | ||
2649 | + add %g1, 1, %g1 | ||
2650 | + add %g2, %g1, %g2 | ||
2651 | + ba,pt %xcc, __restore_fp | ||
2652 | + add %o2, %g2, %o0 | ||
2653 | +ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) | ||
2654 | +ENTRY(U3_retl_o2_plus_g2_fp) | ||
2655 | + ba,pt %xcc, __restore_fp | ||
2656 | + add %o2, %g2, %o0 | ||
2657 | +ENDPROC(U3_retl_o2_plus_g2_fp) | ||
2658 | +ENTRY(U3_retl_o2_plus_g2_plus_8_fp) | ||
2659 | + add %g2, 8, %g2 | ||
2660 | + ba,pt %xcc, __restore_fp | ||
2661 | + add %o2, %g2, %o0 | ||
2662 | +ENDPROC(U3_retl_o2_plus_g2_plus_8_fp) | ||
2663 | +ENTRY(U3_retl_o2) | ||
2664 | + retl | ||
2665 | + mov %o2, %o0 | ||
2666 | +ENDPROC(U3_retl_o2) | ||
2667 | +ENTRY(U3_retl_o2_plus_1) | ||
2668 | + retl | ||
2669 | + add %o2, 1, %o0 | ||
2670 | +ENDPROC(U3_retl_o2_plus_1) | ||
2671 | +ENTRY(U3_retl_o2_plus_4) | ||
2672 | + retl | ||
2673 | + add %o2, 4, %o0 | ||
2674 | +ENDPROC(U3_retl_o2_plus_4) | ||
2675 | +ENTRY(U3_retl_o2_plus_8) | ||
2676 | + retl | ||
2677 | + add %o2, 8, %o0 | ||
2678 | +ENDPROC(U3_retl_o2_plus_8) | ||
2679 | +ENTRY(U3_retl_o2_plus_g1_plus_1) | ||
2680 | + add %g1, 1, %g1 | ||
2681 | + retl | ||
2682 | + add %o2, %g1, %o0 | ||
2683 | +ENDPROC(U3_retl_o2_plus_g1_plus_1) | ||
2684 | +ENTRY(U3_retl_o2_fp) | ||
2685 | + ba,pt %xcc, __restore_fp | ||
2686 | + mov %o2, %o0 | ||
2687 | +ENDPROC(U3_retl_o2_fp) | ||
2688 | +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) | ||
2689 | + sll %o3, 6, %o3 | ||
2690 | + add %o3, 0x80, %o3 | ||
2691 | + ba,pt %xcc, __restore_fp | ||
2692 | + add %o2, %o3, %o0 | ||
2693 | +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) | ||
2694 | +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) | ||
2695 | + sll %o3, 6, %o3 | ||
2696 | + add %o3, 0x40, %o3 | ||
2697 | + ba,pt %xcc, __restore_fp | ||
2698 | + add %o2, %o3, %o0 | ||
2699 | +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) | ||
2700 | +ENTRY(U3_retl_o2_plus_GS_plus_0x10) | ||
2701 | + add GLOBAL_SPARE, 0x10, GLOBAL_SPARE | ||
2702 | + retl | ||
2703 | + add %o2, GLOBAL_SPARE, %o0 | ||
2704 | +ENDPROC(U3_retl_o2_plus_GS_plus_0x10) | ||
2705 | +ENTRY(U3_retl_o2_plus_GS_plus_0x08) | ||
2706 | + add GLOBAL_SPARE, 0x08, GLOBAL_SPARE | ||
2707 | + retl | ||
2708 | + add %o2, GLOBAL_SPARE, %o0 | ||
2709 | +ENDPROC(U3_retl_o2_plus_GS_plus_0x08) | ||
2710 | +ENTRY(U3_retl_o2_and_7_plus_GS) | ||
2711 | + and %o2, 7, %o2 | ||
2712 | + retl | ||
2713 | + add %o2, GLOBAL_SPARE, %o2 | ||
2714 | +ENDPROC(U3_retl_o2_and_7_plus_GS) | ||
2715 | +ENTRY(U3_retl_o2_and_7_plus_GS_plus_8) | ||
2716 | + add GLOBAL_SPARE, 8, GLOBAL_SPARE | ||
2717 | + and %o2, 7, %o2 | ||
2718 | + retl | ||
2719 | + add %o2, GLOBAL_SPARE, %o2 | ||
2720 | +ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8) | ||
2721 | +#endif | ||
2722 | + | ||
2723 | .align 64 | ||
2724 | |||
2725 | /* The cheetah's flexible spine, oversized liver, enlarged heart, | ||
2726 | @@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2727 | and %g2, 0x38, %g2 | ||
2728 | |||
2729 | 1: subcc %g1, 0x1, %g1 | ||
2730 | - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) | ||
2731 | - EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE)) | ||
2732 | + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1) | ||
2733 | + EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1) | ||
2734 | bgu,pt %XCC, 1b | ||
2735 | add %o1, 0x1, %o1 | ||
2736 | |||
2737 | @@ -138,20 +216,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2738 | be,pt %icc, 3f | ||
2739 | alignaddr %o1, %g0, %o1 | ||
2740 | |||
2741 | - EX_LD_FP(LOAD(ldd, %o1, %f4)) | ||
2742 | -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) | ||
2743 | + EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2) | ||
2744 | +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2) | ||
2745 | add %o1, 0x8, %o1 | ||
2746 | subcc %g2, 0x8, %g2 | ||
2747 | faligndata %f4, %f6, %f0 | ||
2748 | - EX_ST_FP(STORE(std, %f0, %o0)) | ||
2749 | + EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8) | ||
2750 | be,pn %icc, 3f | ||
2751 | add %o0, 0x8, %o0 | ||
2752 | |||
2753 | - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) | ||
2754 | + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2) | ||
2755 | add %o1, 0x8, %o1 | ||
2756 | subcc %g2, 0x8, %g2 | ||
2757 | faligndata %f6, %f4, %f2 | ||
2758 | - EX_ST_FP(STORE(std, %f2, %o0)) | ||
2759 | + EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8) | ||
2760 | bne,pt %icc, 1b | ||
2761 | add %o0, 0x8, %o0 | ||
2762 | |||
2763 | @@ -161,25 +239,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2764 | LOAD(prefetch, %o1 + 0x080, #one_read) | ||
2765 | LOAD(prefetch, %o1 + 0x0c0, #one_read) | ||
2766 | LOAD(prefetch, %o1 + 0x100, #one_read) | ||
2767 | - EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0)) | ||
2768 | + EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2) | ||
2769 | LOAD(prefetch, %o1 + 0x140, #one_read) | ||
2770 | - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) | ||
2771 | + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2) | ||
2772 | LOAD(prefetch, %o1 + 0x180, #one_read) | ||
2773 | - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) | ||
2774 | + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2) | ||
2775 | LOAD(prefetch, %o1 + 0x1c0, #one_read) | ||
2776 | faligndata %f0, %f2, %f16 | ||
2777 | - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) | ||
2778 | + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2) | ||
2779 | faligndata %f2, %f4, %f18 | ||
2780 | - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) | ||
2781 | + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2) | ||
2782 | faligndata %f4, %f6, %f20 | ||
2783 | - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) | ||
2784 | + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2) | ||
2785 | faligndata %f6, %f8, %f22 | ||
2786 | |||
2787 | - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) | ||
2788 | + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2) | ||
2789 | faligndata %f8, %f10, %f24 | ||
2790 | - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) | ||
2791 | + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2) | ||
2792 | faligndata %f10, %f12, %f26 | ||
2793 | - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) | ||
2794 | + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2) | ||
2795 | |||
2796 | subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE | ||
2797 | add %o1, 0x40, %o1 | ||
2798 | @@ -190,26 +268,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2799 | |||
2800 | .align 64 | ||
2801 | 1: | ||
2802 | - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) | ||
2803 | + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) | ||
2804 | faligndata %f12, %f14, %f28 | ||
2805 | - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) | ||
2806 | + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) | ||
2807 | faligndata %f14, %f0, %f30 | ||
2808 | - EX_ST_FP(STORE_BLK(%f16, %o0)) | ||
2809 | - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) | ||
2810 | + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) | ||
2811 | + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) | ||
2812 | faligndata %f0, %f2, %f16 | ||
2813 | add %o0, 0x40, %o0 | ||
2814 | |||
2815 | - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) | ||
2816 | + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) | ||
2817 | faligndata %f2, %f4, %f18 | ||
2818 | - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) | ||
2819 | + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) | ||
2820 | faligndata %f4, %f6, %f20 | ||
2821 | - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) | ||
2822 | + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) | ||
2823 | subcc %o3, 0x01, %o3 | ||
2824 | faligndata %f6, %f8, %f22 | ||
2825 | - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) | ||
2826 | + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80) | ||
2827 | |||
2828 | faligndata %f8, %f10, %f24 | ||
2829 | - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) | ||
2830 | + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80) | ||
2831 | LOAD(prefetch, %o1 + 0x1c0, #one_read) | ||
2832 | faligndata %f10, %f12, %f26 | ||
2833 | bg,pt %XCC, 1b | ||
2834 | @@ -217,29 +295,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2835 | |||
2836 | /* Finally we copy the last full 64-byte block. */ | ||
2837 | 2: | ||
2838 | - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) | ||
2839 | + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) | ||
2840 | faligndata %f12, %f14, %f28 | ||
2841 | - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) | ||
2842 | + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) | ||
2843 | faligndata %f14, %f0, %f30 | ||
2844 | - EX_ST_FP(STORE_BLK(%f16, %o0)) | ||
2845 | - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) | ||
2846 | + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) | ||
2847 | + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) | ||
2848 | faligndata %f0, %f2, %f16 | ||
2849 | - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) | ||
2850 | + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) | ||
2851 | faligndata %f2, %f4, %f18 | ||
2852 | - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) | ||
2853 | + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) | ||
2854 | faligndata %f4, %f6, %f20 | ||
2855 | - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) | ||
2856 | + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) | ||
2857 | faligndata %f6, %f8, %f22 | ||
2858 | - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) | ||
2859 | + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40) | ||
2860 | faligndata %f8, %f10, %f24 | ||
2861 | cmp %g1, 0 | ||
2862 | be,pt %XCC, 1f | ||
2863 | add %o0, 0x40, %o0 | ||
2864 | - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) | ||
2865 | + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40) | ||
2866 | 1: faligndata %f10, %f12, %f26 | ||
2867 | faligndata %f12, %f14, %f28 | ||
2868 | faligndata %f14, %f0, %f30 | ||
2869 | - EX_ST_FP(STORE_BLK(%f16, %o0)) | ||
2870 | + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40) | ||
2871 | add %o0, 0x40, %o0 | ||
2872 | add %o1, 0x40, %o1 | ||
2873 | membar #Sync | ||
2874 | @@ -259,20 +337,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2875 | |||
2876 | sub %o2, %g2, %o2 | ||
2877 | be,a,pt %XCC, 1f | ||
2878 | - EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0)) | ||
2879 | + EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2) | ||
2880 | |||
2881 | -1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2)) | ||
2882 | +1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2) | ||
2883 | add %o1, 0x8, %o1 | ||
2884 | subcc %g2, 0x8, %g2 | ||
2885 | faligndata %f0, %f2, %f8 | ||
2886 | - EX_ST_FP(STORE(std, %f8, %o0)) | ||
2887 | + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) | ||
2888 | be,pn %XCC, 2f | ||
2889 | add %o0, 0x8, %o0 | ||
2890 | - EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0)) | ||
2891 | + EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2) | ||
2892 | add %o1, 0x8, %o1 | ||
2893 | subcc %g2, 0x8, %g2 | ||
2894 | faligndata %f2, %f0, %f8 | ||
2895 | - EX_ST_FP(STORE(std, %f8, %o0)) | ||
2896 | + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) | ||
2897 | bne,pn %XCC, 1b | ||
2898 | add %o0, 0x8, %o0 | ||
2899 | |||
2900 | @@ -292,30 +370,33 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2901 | andcc %o2, 0x8, %g0 | ||
2902 | be,pt %icc, 1f | ||
2903 | nop | ||
2904 | - EX_LD(LOAD(ldx, %o1, %o5)) | ||
2905 | - EX_ST(STORE(stx, %o5, %o1 + %o3)) | ||
2906 | + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2) | ||
2907 | + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2) | ||
2908 | add %o1, 0x8, %o1 | ||
2909 | + sub %o2, 8, %o2 | ||
2910 | |||
2911 | 1: andcc %o2, 0x4, %g0 | ||
2912 | be,pt %icc, 1f | ||
2913 | nop | ||
2914 | - EX_LD(LOAD(lduw, %o1, %o5)) | ||
2915 | - EX_ST(STORE(stw, %o5, %o1 + %o3)) | ||
2916 | + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2) | ||
2917 | + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2) | ||
2918 | add %o1, 0x4, %o1 | ||
2919 | + sub %o2, 4, %o2 | ||
2920 | |||
2921 | 1: andcc %o2, 0x2, %g0 | ||
2922 | be,pt %icc, 1f | ||
2923 | nop | ||
2924 | - EX_LD(LOAD(lduh, %o1, %o5)) | ||
2925 | - EX_ST(STORE(sth, %o5, %o1 + %o3)) | ||
2926 | + EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2) | ||
2927 | + EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2) | ||
2928 | add %o1, 0x2, %o1 | ||
2929 | + sub %o2, 2, %o2 | ||
2930 | |||
2931 | 1: andcc %o2, 0x1, %g0 | ||
2932 | be,pt %icc, 85f | ||
2933 | nop | ||
2934 | - EX_LD(LOAD(ldub, %o1, %o5)) | ||
2935 | + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2) | ||
2936 | ba,pt %xcc, 85f | ||
2937 | - EX_ST(STORE(stb, %o5, %o1 + %o3)) | ||
2938 | + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2) | ||
2939 | |||
2940 | .align 64 | ||
2941 | 70: /* 16 < len <= 64 */ | ||
2942 | @@ -326,26 +407,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2943 | andn %o2, 0xf, GLOBAL_SPARE | ||
2944 | and %o2, 0xf, %o2 | ||
2945 | 1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE | ||
2946 | - EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) | ||
2947 | - EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) | ||
2948 | - EX_ST(STORE(stx, %o5, %o1 + %o3)) | ||
2949 | + EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10) | ||
2950 | + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10) | ||
2951 | + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10) | ||
2952 | add %o1, 0x8, %o1 | ||
2953 | - EX_ST(STORE(stx, %g1, %o1 + %o3)) | ||
2954 | + EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08) | ||
2955 | bgu,pt %XCC, 1b | ||
2956 | add %o1, 0x8, %o1 | ||
2957 | 73: andcc %o2, 0x8, %g0 | ||
2958 | be,pt %XCC, 1f | ||
2959 | nop | ||
2960 | sub %o2, 0x8, %o2 | ||
2961 | - EX_LD(LOAD(ldx, %o1, %o5)) | ||
2962 | - EX_ST(STORE(stx, %o5, %o1 + %o3)) | ||
2963 | + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8) | ||
2964 | + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8) | ||
2965 | add %o1, 0x8, %o1 | ||
2966 | 1: andcc %o2, 0x4, %g0 | ||
2967 | be,pt %XCC, 1f | ||
2968 | nop | ||
2969 | sub %o2, 0x4, %o2 | ||
2970 | - EX_LD(LOAD(lduw, %o1, %o5)) | ||
2971 | - EX_ST(STORE(stw, %o5, %o1 + %o3)) | ||
2972 | + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4) | ||
2973 | + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4) | ||
2974 | add %o1, 0x4, %o1 | ||
2975 | 1: cmp %o2, 0 | ||
2976 | be,pt %XCC, 85f | ||
2977 | @@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2978 | sub %o2, %g1, %o2 | ||
2979 | |||
2980 | 1: subcc %g1, 1, %g1 | ||
2981 | - EX_LD(LOAD(ldub, %o1, %o5)) | ||
2982 | - EX_ST(STORE(stb, %o5, %o1 + %o3)) | ||
2983 | + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1) | ||
2984 | + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1) | ||
2985 | bgu,pt %icc, 1b | ||
2986 | add %o1, 1, %o1 | ||
2987 | |||
2988 | @@ -378,16 +459,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
2989 | |||
2990 | 8: mov 64, %o3 | ||
2991 | andn %o1, 0x7, %o1 | ||
2992 | - EX_LD(LOAD(ldx, %o1, %g2)) | ||
2993 | + EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2) | ||
2994 | sub %o3, %g1, %o3 | ||
2995 | andn %o2, 0x7, GLOBAL_SPARE | ||
2996 | sllx %g2, %g1, %g2 | ||
2997 | -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) | ||
2998 | +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS) | ||
2999 | subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE | ||
3000 | add %o1, 0x8, %o1 | ||
3001 | srlx %g3, %o3, %o5 | ||
3002 | or %o5, %g2, %o5 | ||
3003 | - EX_ST(STORE(stx, %o5, %o0)) | ||
3004 | + EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8) | ||
3005 | add %o0, 0x8, %o0 | ||
3006 | bgu,pt %icc, 1b | ||
3007 | sllx %g3, %g1, %g2 | ||
3008 | @@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
3009 | |||
3010 | 1: | ||
3011 | subcc %o2, 4, %o2 | ||
3012 | - EX_LD(LOAD(lduw, %o1, %g1)) | ||
3013 | - EX_ST(STORE(stw, %g1, %o1 + %o3)) | ||
3014 | + EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4) | ||
3015 | + EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4) | ||
3016 | bgu,pt %XCC, 1b | ||
3017 | add %o1, 4, %o1 | ||
3018 | |||
3019 | @@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
3020 | .align 32 | ||
3021 | 90: | ||
3022 | subcc %o2, 1, %o2 | ||
3023 | - EX_LD(LOAD(ldub, %o1, %g1)) | ||
3024 | - EX_ST(STORE(stb, %g1, %o1 + %o3)) | ||
3025 | + EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1) | ||
3026 | + EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1) | ||
3027 | bgu,pt %XCC, 90b | ||
3028 | add %o1, 1, %o1 | ||
3029 | retl | ||
3030 | diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S | ||
3031 | index 302c0e60dc2c..4c89b486fa0d 100644 | ||
3032 | --- a/arch/sparc/lib/copy_in_user.S | ||
3033 | +++ b/arch/sparc/lib/copy_in_user.S | ||
3034 | @@ -8,18 +8,33 @@ | ||
3035 | |||
3036 | #define XCC xcc | ||
3037 | |||
3038 | -#define EX(x,y) \ | ||
3039 | +#define EX(x,y,z) \ | ||
3040 | 98: x,y; \ | ||
3041 | .section __ex_table,"a";\ | ||
3042 | .align 4; \ | ||
3043 | - .word 98b, __retl_one; \ | ||
3044 | + .word 98b, z; \ | ||
3045 | .text; \ | ||
3046 | .align 4; | ||
3047 | |||
3048 | +#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8) | ||
3049 | +#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4) | ||
3050 | +#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1) | ||
3051 | + | ||
3052 | .register %g2,#scratch | ||
3053 | .register %g3,#scratch | ||
3054 | |||
3055 | .text | ||
3056 | +__retl_o4_plus_8: | ||
3057 | + add %o4, %o2, %o4 | ||
3058 | + retl | ||
3059 | + add %o4, 8, %o0 | ||
3060 | +__retl_o2_plus_4: | ||
3061 | + retl | ||
3062 | + add %o2, 4, %o0 | ||
3063 | +__retl_o2_plus_1: | ||
3064 | + retl | ||
3065 | + add %o2, 1, %o0 | ||
3066 | + | ||
3067 | .align 32 | ||
3068 | |||
3069 | /* Don't try to get too fancy here, just nice and | ||
3070 | @@ -44,8 +59,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ | ||
3071 | andn %o2, 0x7, %o4 | ||
3072 | and %o2, 0x7, %o2 | ||
3073 | 1: subcc %o4, 0x8, %o4 | ||
3074 | - EX(ldxa [%o1] %asi, %o5) | ||
3075 | - EX(stxa %o5, [%o0] %asi) | ||
3076 | + EX_O4(ldxa [%o1] %asi, %o5) | ||
3077 | + EX_O4(stxa %o5, [%o0] %asi) | ||
3078 | add %o1, 0x8, %o1 | ||
3079 | bgu,pt %XCC, 1b | ||
3080 | add %o0, 0x8, %o0 | ||
3081 | @@ -53,8 +68,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ | ||
3082 | be,pt %XCC, 1f | ||
3083 | nop | ||
3084 | sub %o2, 0x4, %o2 | ||
3085 | - EX(lduwa [%o1] %asi, %o5) | ||
3086 | - EX(stwa %o5, [%o0] %asi) | ||
3087 | + EX_O2_4(lduwa [%o1] %asi, %o5) | ||
3088 | + EX_O2_4(stwa %o5, [%o0] %asi) | ||
3089 | add %o1, 0x4, %o1 | ||
3090 | add %o0, 0x4, %o0 | ||
3091 | 1: cmp %o2, 0 | ||
3092 | @@ -70,8 +85,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ | ||
3093 | |||
3094 | 82: | ||
3095 | subcc %o2, 4, %o2 | ||
3096 | - EX(lduwa [%o1] %asi, %g1) | ||
3097 | - EX(stwa %g1, [%o0] %asi) | ||
3098 | + EX_O2_4(lduwa [%o1] %asi, %g1) | ||
3099 | + EX_O2_4(stwa %g1, [%o0] %asi) | ||
3100 | add %o1, 4, %o1 | ||
3101 | bgu,pt %XCC, 82b | ||
3102 | add %o0, 4, %o0 | ||
3103 | @@ -82,8 +97,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ | ||
3104 | .align 32 | ||
3105 | 90: | ||
3106 | subcc %o2, 1, %o2 | ||
3107 | - EX(lduba [%o1] %asi, %g1) | ||
3108 | - EX(stba %g1, [%o0] %asi) | ||
3109 | + EX_O2_1(lduba [%o1] %asi, %g1) | ||
3110 | + EX_O2_1(stba %g1, [%o0] %asi) | ||
3111 | add %o1, 1, %o1 | ||
3112 | bgu,pt %XCC, 90b | ||
3113 | add %o0, 1, %o0 | ||
3114 | diff --git a/arch/sparc/lib/user_fixup.c b/arch/sparc/lib/user_fixup.c | ||
3115 | deleted file mode 100644 | ||
3116 | index ac96ae236709..000000000000 | ||
3117 | --- a/arch/sparc/lib/user_fixup.c | ||
3118 | +++ /dev/null | ||
3119 | @@ -1,71 +0,0 @@ | ||
3120 | -/* user_fixup.c: Fix up user copy faults. | ||
3121 | - * | ||
3122 | - * Copyright (C) 2004 David S. Miller <davem@redhat.com> | ||
3123 | - */ | ||
3124 | - | ||
3125 | -#include <linux/compiler.h> | ||
3126 | -#include <linux/kernel.h> | ||
3127 | -#include <linux/string.h> | ||
3128 | -#include <linux/errno.h> | ||
3129 | -#include <linux/module.h> | ||
3130 | - | ||
3131 | -#include <asm/uaccess.h> | ||
3132 | - | ||
3133 | -/* Calculating the exact fault address when using | ||
3134 | - * block loads and stores can be very complicated. | ||
3135 | - * | ||
3136 | - * Instead of trying to be clever and handling all | ||
3137 | - * of the cases, just fix things up simply here. | ||
3138 | - */ | ||
3139 | - | ||
3140 | -static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset) | ||
3141 | -{ | ||
3142 | - unsigned long fault_addr = current_thread_info()->fault_address; | ||
3143 | - unsigned long end = start + size; | ||
3144 | - | ||
3145 | - if (fault_addr < start || fault_addr >= end) { | ||
3146 | - *offset = 0; | ||
3147 | - } else { | ||
3148 | - *offset = fault_addr - start; | ||
3149 | - size = end - fault_addr; | ||
3150 | - } | ||
3151 | - return size; | ||
3152 | -} | ||
3153 | - | ||
3154 | -unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size) | ||
3155 | -{ | ||
3156 | - unsigned long offset; | ||
3157 | - | ||
3158 | - size = compute_size((unsigned long) from, size, &offset); | ||
3159 | - if (likely(size)) | ||
3160 | - memset(to + offset, 0, size); | ||
3161 | - | ||
3162 | - return size; | ||
3163 | -} | ||
3164 | -EXPORT_SYMBOL(copy_from_user_fixup); | ||
3165 | - | ||
3166 | -unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size) | ||
3167 | -{ | ||
3168 | - unsigned long offset; | ||
3169 | - | ||
3170 | - return compute_size((unsigned long) to, size, &offset); | ||
3171 | -} | ||
3172 | -EXPORT_SYMBOL(copy_to_user_fixup); | ||
3173 | - | ||
3174 | -unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size) | ||
3175 | -{ | ||
3176 | - unsigned long fault_addr = current_thread_info()->fault_address; | ||
3177 | - unsigned long start = (unsigned long) to; | ||
3178 | - unsigned long end = start + size; | ||
3179 | - | ||
3180 | - if (fault_addr >= start && fault_addr < end) | ||
3181 | - return end - fault_addr; | ||
3182 | - | ||
3183 | - start = (unsigned long) from; | ||
3184 | - end = start + size; | ||
3185 | - if (fault_addr >= start && fault_addr < end) | ||
3186 | - return end - fault_addr; | ||
3187 | - | ||
3188 | - return size; | ||
3189 | -} | ||
3190 | -EXPORT_SYMBOL(copy_in_user_fixup); | ||
3191 | diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c | ||
3192 | index dbabe5713a15..e15f33715103 100644 | ||
3193 | --- a/arch/sparc/mm/fault_64.c | ||
3194 | +++ b/arch/sparc/mm/fault_64.c | ||
3195 | @@ -479,14 +479,14 @@ good_area: | ||
3196 | up_read(&mm->mmap_sem); | ||
3197 | |||
3198 | mm_rss = get_mm_rss(mm); | ||
3199 | -#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) | ||
3200 | - mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE)); | ||
3201 | +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) | ||
3202 | + mm_rss -= (mm->context.thp_pte_count * (HPAGE_SIZE / PAGE_SIZE)); | ||
3203 | #endif | ||
3204 | if (unlikely(mm_rss > | ||
3205 | mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit)) | ||
3206 | tsb_grow(mm, MM_TSB_BASE, mm_rss); | ||
3207 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) | ||
3208 | - mm_rss = mm->context.huge_pte_count; | ||
3209 | + mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count; | ||
3210 | if (unlikely(mm_rss > | ||
3211 | mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { | ||
3212 | if (mm->context.tsb_block[MM_TSB_HUGE].tsb) | ||
3213 | diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c | ||
3214 | index 364d093f46c6..da1142401bf4 100644 | ||
3215 | --- a/arch/sparc/mm/hugetlbpage.c | ||
3216 | +++ b/arch/sparc/mm/hugetlbpage.c | ||
3217 | @@ -180,7 +180,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | ||
3218 | unsigned long nptes; | ||
3219 | |||
3220 | if (!pte_present(*ptep) && pte_present(entry)) | ||
3221 | - mm->context.huge_pte_count++; | ||
3222 | + mm->context.hugetlb_pte_count++; | ||
3223 | |||
3224 | addr &= HPAGE_MASK; | ||
3225 | |||
3226 | @@ -212,7 +212,7 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | ||
3227 | |||
3228 | entry = *ptep; | ||
3229 | if (pte_present(entry)) | ||
3230 | - mm->context.huge_pte_count--; | ||
3231 | + mm->context.hugetlb_pte_count--; | ||
3232 | |||
3233 | addr &= HPAGE_MASK; | ||
3234 | nptes = 1 << HUGETLB_PAGE_ORDER; | ||
3235 | diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c | ||
3236 | index 3c4b8975fa76..a5331c336b2a 100644 | ||
3237 | --- a/arch/sparc/mm/init_64.c | ||
3238 | +++ b/arch/sparc/mm/init_64.c | ||
3239 | @@ -346,7 +346,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * | ||
3240 | spin_lock_irqsave(&mm->context.lock, flags); | ||
3241 | |||
3242 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) | ||
3243 | - if (mm->context.huge_pte_count && is_hugetlb_pte(pte)) | ||
3244 | + if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) && | ||
3245 | + is_hugetlb_pte(pte)) | ||
3246 | __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, | ||
3247 | address, pte_val(pte)); | ||
3248 | else | ||
3249 | diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c | ||
3250 | index f81cd9736700..3659d37b4d81 100644 | ||
3251 | --- a/arch/sparc/mm/tlb.c | ||
3252 | +++ b/arch/sparc/mm/tlb.c | ||
3253 | @@ -175,9 +175,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, | ||
3254 | |||
3255 | if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { | ||
3256 | if (pmd_val(pmd) & _PAGE_PMD_HUGE) | ||
3257 | - mm->context.huge_pte_count++; | ||
3258 | + mm->context.thp_pte_count++; | ||
3259 | else | ||
3260 | - mm->context.huge_pte_count--; | ||
3261 | + mm->context.thp_pte_count--; | ||
3262 | |||
3263 | /* Do not try to allocate the TSB hash table if we | ||
3264 | * don't have one already. We have various locks held | ||
3265 | diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c | ||
3266 | index a0604a493a36..9cdeca0fa955 100644 | ||
3267 | --- a/arch/sparc/mm/tsb.c | ||
3268 | +++ b/arch/sparc/mm/tsb.c | ||
3269 | @@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned long vaddr) | ||
3270 | return (tag == (vaddr >> 22)); | ||
3271 | } | ||
3272 | |||
3273 | +static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end) | ||
3274 | +{ | ||
3275 | + unsigned long idx; | ||
3276 | + | ||
3277 | + for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) { | ||
3278 | + struct tsb *ent = &swapper_tsb[idx]; | ||
3279 | + unsigned long match = idx << 13; | ||
3280 | + | ||
3281 | + match |= (ent->tag << 22); | ||
3282 | + if (match >= start && match < end) | ||
3283 | + ent->tag = (1UL << TSB_TAG_INVALID_BIT); | ||
3284 | + } | ||
3285 | +} | ||
3286 | + | ||
3287 | /* TSB flushes need only occur on the processor initiating the address | ||
3288 | * space modification, not on each cpu the address space has run on. | ||
3289 | * Only the TLB flush needs that treatment. | ||
3290 | @@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end) | ||
3291 | { | ||
3292 | unsigned long v; | ||
3293 | |||
3294 | + if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES) | ||
3295 | + return flush_tsb_kernel_range_scan(start, end); | ||
3296 | + | ||
3297 | for (v = start; v < end; v += PAGE_SIZE) { | ||
3298 | unsigned long hash = tsb_hash(v, PAGE_SHIFT, | ||
3299 | KERNEL_TSB_NENTRIES); | ||
3300 | @@ -470,7 +487,7 @@ retry_tsb_alloc: | ||
3301 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | ||
3302 | { | ||
3303 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) | ||
3304 | - unsigned long huge_pte_count; | ||
3305 | + unsigned long total_huge_pte_count; | ||
3306 | #endif | ||
3307 | unsigned int i; | ||
3308 | |||
3309 | @@ -479,12 +496,14 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | ||
3310 | mm->context.sparc64_ctx_val = 0UL; | ||
3311 | |||
3312 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) | ||
3313 | - /* We reset it to zero because the fork() page copying | ||
3314 | + /* We reset them to zero because the fork() page copying | ||
3315 | * will re-increment the counters as the parent PTEs are | ||
3316 | * copied into the child address space. | ||
3317 | */ | ||
3318 | - huge_pte_count = mm->context.huge_pte_count; | ||
3319 | - mm->context.huge_pte_count = 0; | ||
3320 | + total_huge_pte_count = mm->context.hugetlb_pte_count + | ||
3321 | + mm->context.thp_pte_count; | ||
3322 | + mm->context.hugetlb_pte_count = 0; | ||
3323 | + mm->context.thp_pte_count = 0; | ||
3324 | #endif | ||
3325 | |||
3326 | /* copy_mm() copies over the parent's mm_struct before calling | ||
3327 | @@ -500,8 +519,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | ||
3328 | tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); | ||
3329 | |||
3330 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) | ||
3331 | - if (unlikely(huge_pte_count)) | ||
3332 | - tsb_grow(mm, MM_TSB_HUGE, huge_pte_count); | ||
3333 | + if (unlikely(total_huge_pte_count)) | ||
3334 | + tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count); | ||
3335 | #endif | ||
3336 | |||
3337 | if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) | ||
3338 | diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S | ||
3339 | index b4f4733abc6e..5d2fd6cd3189 100644 | ||
3340 | --- a/arch/sparc/mm/ultra.S | ||
3341 | +++ b/arch/sparc/mm/ultra.S | ||
3342 | @@ -30,7 +30,7 @@ | ||
3343 | .text | ||
3344 | .align 32 | ||
3345 | .globl __flush_tlb_mm | ||
3346 | -__flush_tlb_mm: /* 18 insns */ | ||
3347 | +__flush_tlb_mm: /* 19 insns */ | ||
3348 | /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ | ||
3349 | ldxa [%o1] ASI_DMMU, %g2 | ||
3350 | cmp %g2, %o0 | ||
3351 | @@ -81,7 +81,7 @@ __flush_tlb_page: /* 22 insns */ | ||
3352 | |||
3353 | .align 32 | ||
3354 | .globl __flush_tlb_pending | ||
3355 | -__flush_tlb_pending: /* 26 insns */ | ||
3356 | +__flush_tlb_pending: /* 27 insns */ | ||
3357 | /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ | ||
3358 | rdpr %pstate, %g7 | ||
3359 | sllx %o1, 3, %o1 | ||
3360 | @@ -113,12 +113,14 @@ __flush_tlb_pending: /* 26 insns */ | ||
3361 | |||
3362 | .align 32 | ||
3363 | .globl __flush_tlb_kernel_range | ||
3364 | -__flush_tlb_kernel_range: /* 16 insns */ | ||
3365 | +__flush_tlb_kernel_range: /* 31 insns */ | ||
3366 | /* %o0=start, %o1=end */ | ||
3367 | cmp %o0, %o1 | ||
3368 | be,pn %xcc, 2f | ||
3369 | + sub %o1, %o0, %o3 | ||
3370 | + srlx %o3, 18, %o4 | ||
3371 | + brnz,pn %o4, __spitfire_flush_tlb_kernel_range_slow | ||
3372 | sethi %hi(PAGE_SIZE), %o4 | ||
3373 | - sub %o1, %o0, %o3 | ||
3374 | sub %o3, %o4, %o3 | ||
3375 | or %o0, 0x20, %o0 ! Nucleus | ||
3376 | 1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP | ||
3377 | @@ -131,6 +133,41 @@ __flush_tlb_kernel_range: /* 16 insns */ | ||
3378 | retl | ||
3379 | nop | ||
3380 | nop | ||
3381 | + nop | ||
3382 | + nop | ||
3383 | + nop | ||
3384 | + nop | ||
3385 | + nop | ||
3386 | + nop | ||
3387 | + nop | ||
3388 | + nop | ||
3389 | + nop | ||
3390 | + nop | ||
3391 | + nop | ||
3392 | + nop | ||
3393 | + nop | ||
3394 | + | ||
3395 | +__spitfire_flush_tlb_kernel_range_slow: | ||
3396 | + mov 63 * 8, %o4 | ||
3397 | +1: ldxa [%o4] ASI_ITLB_DATA_ACCESS, %o3 | ||
3398 | + andcc %o3, 0x40, %g0 /* _PAGE_L_4U */ | ||
3399 | + bne,pn %xcc, 2f | ||
3400 | + mov TLB_TAG_ACCESS, %o3 | ||
3401 | + stxa %g0, [%o3] ASI_IMMU | ||
3402 | + stxa %g0, [%o4] ASI_ITLB_DATA_ACCESS | ||
3403 | + membar #Sync | ||
3404 | +2: ldxa [%o4] ASI_DTLB_DATA_ACCESS, %o3 | ||
3405 | + andcc %o3, 0x40, %g0 | ||
3406 | + bne,pn %xcc, 2f | ||
3407 | + mov TLB_TAG_ACCESS, %o3 | ||
3408 | + stxa %g0, [%o3] ASI_DMMU | ||
3409 | + stxa %g0, [%o4] ASI_DTLB_DATA_ACCESS | ||
3410 | + membar #Sync | ||
3411 | +2: sub %o4, 8, %o4 | ||
3412 | + brgez,pt %o4, 1b | ||
3413 | + nop | ||
3414 | + retl | ||
3415 | + nop | ||
3416 | |||
3417 | __spitfire_flush_tlb_mm_slow: | ||
3418 | rdpr %pstate, %g1 | ||
3419 | @@ -285,6 +322,40 @@ __cheetah_flush_tlb_pending: /* 27 insns */ | ||
3420 | retl | ||
3421 | wrpr %g7, 0x0, %pstate | ||
3422 | |||
3423 | +__cheetah_flush_tlb_kernel_range: /* 31 insns */ | ||
3424 | + /* %o0=start, %o1=end */ | ||
3425 | + cmp %o0, %o1 | ||
3426 | + be,pn %xcc, 2f | ||
3427 | + sub %o1, %o0, %o3 | ||
3428 | + srlx %o3, 18, %o4 | ||
3429 | + brnz,pn %o4, 3f | ||
3430 | + sethi %hi(PAGE_SIZE), %o4 | ||
3431 | + sub %o3, %o4, %o3 | ||
3432 | + or %o0, 0x20, %o0 ! Nucleus | ||
3433 | +1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP | ||
3434 | + stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP | ||
3435 | + membar #Sync | ||
3436 | + brnz,pt %o3, 1b | ||
3437 | + sub %o3, %o4, %o3 | ||
3438 | +2: sethi %hi(KERNBASE), %o3 | ||
3439 | + flush %o3 | ||
3440 | + retl | ||
3441 | + nop | ||
3442 | +3: mov 0x80, %o4 | ||
3443 | + stxa %g0, [%o4] ASI_DMMU_DEMAP | ||
3444 | + membar #Sync | ||
3445 | + stxa %g0, [%o4] ASI_IMMU_DEMAP | ||
3446 | + membar #Sync | ||
3447 | + retl | ||
3448 | + nop | ||
3449 | + nop | ||
3450 | + nop | ||
3451 | + nop | ||
3452 | + nop | ||
3453 | + nop | ||
3454 | + nop | ||
3455 | + nop | ||
3456 | + | ||
3457 | #ifdef DCACHE_ALIASING_POSSIBLE | ||
3458 | __cheetah_flush_dcache_page: /* 11 insns */ | ||
3459 | sethi %hi(PAGE_OFFSET), %g1 | ||
3460 | @@ -309,19 +380,28 @@ __hypervisor_tlb_tl0_error: | ||
3461 | ret | ||
3462 | restore | ||
3463 | |||
3464 | -__hypervisor_flush_tlb_mm: /* 10 insns */ | ||
3465 | +__hypervisor_flush_tlb_mm: /* 19 insns */ | ||
3466 | mov %o0, %o2 /* ARG2: mmu context */ | ||
3467 | mov 0, %o0 /* ARG0: CPU lists unimplemented */ | ||
3468 | mov 0, %o1 /* ARG1: CPU lists unimplemented */ | ||
3469 | mov HV_MMU_ALL, %o3 /* ARG3: flags */ | ||
3470 | mov HV_FAST_MMU_DEMAP_CTX, %o5 | ||
3471 | ta HV_FAST_TRAP | ||
3472 | - brnz,pn %o0, __hypervisor_tlb_tl0_error | ||
3473 | + brnz,pn %o0, 1f | ||
3474 | mov HV_FAST_MMU_DEMAP_CTX, %o1 | ||
3475 | retl | ||
3476 | nop | ||
3477 | +1: sethi %hi(__hypervisor_tlb_tl0_error), %o5 | ||
3478 | + jmpl %o5 + %lo(__hypervisor_tlb_tl0_error), %g0 | ||
3479 | + nop | ||
3480 | + nop | ||
3481 | + nop | ||
3482 | + nop | ||
3483 | + nop | ||
3484 | + nop | ||
3485 | + nop | ||
3486 | |||
3487 | -__hypervisor_flush_tlb_page: /* 11 insns */ | ||
3488 | +__hypervisor_flush_tlb_page: /* 22 insns */ | ||
3489 | /* %o0 = context, %o1 = vaddr */ | ||
3490 | mov %o0, %g2 | ||
3491 | mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */ | ||
3492 | @@ -330,12 +410,23 @@ __hypervisor_flush_tlb_page: /* 11 insns */ | ||
3493 | srlx %o0, PAGE_SHIFT, %o0 | ||
3494 | sllx %o0, PAGE_SHIFT, %o0 | ||
3495 | ta HV_MMU_UNMAP_ADDR_TRAP | ||
3496 | - brnz,pn %o0, __hypervisor_tlb_tl0_error | ||
3497 | + brnz,pn %o0, 1f | ||
3498 | mov HV_MMU_UNMAP_ADDR_TRAP, %o1 | ||
3499 | retl | ||
3500 | nop | ||
3501 | +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2 | ||
3502 | + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 | ||
3503 | + nop | ||
3504 | + nop | ||
3505 | + nop | ||
3506 | + nop | ||
3507 | + nop | ||
3508 | + nop | ||
3509 | + nop | ||
3510 | + nop | ||
3511 | + nop | ||
3512 | |||
3513 | -__hypervisor_flush_tlb_pending: /* 16 insns */ | ||
3514 | +__hypervisor_flush_tlb_pending: /* 27 insns */ | ||
3515 | /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ | ||
3516 | sllx %o1, 3, %g1 | ||
3517 | mov %o2, %g2 | ||
3518 | @@ -347,31 +438,57 @@ __hypervisor_flush_tlb_pending: /* 16 insns */ | ||
3519 | srlx %o0, PAGE_SHIFT, %o0 | ||
3520 | sllx %o0, PAGE_SHIFT, %o0 | ||
3521 | ta HV_MMU_UNMAP_ADDR_TRAP | ||
3522 | - brnz,pn %o0, __hypervisor_tlb_tl0_error | ||
3523 | + brnz,pn %o0, 1f | ||
3524 | mov HV_MMU_UNMAP_ADDR_TRAP, %o1 | ||
3525 | brnz,pt %g1, 1b | ||
3526 | nop | ||
3527 | retl | ||
3528 | nop | ||
3529 | +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2 | ||
3530 | + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 | ||
3531 | + nop | ||
3532 | + nop | ||
3533 | + nop | ||
3534 | + nop | ||
3535 | + nop | ||
3536 | + nop | ||
3537 | + nop | ||
3538 | + nop | ||
3539 | + nop | ||
3540 | |||
3541 | -__hypervisor_flush_tlb_kernel_range: /* 16 insns */ | ||
3542 | +__hypervisor_flush_tlb_kernel_range: /* 31 insns */ | ||
3543 | /* %o0=start, %o1=end */ | ||
3544 | cmp %o0, %o1 | ||
3545 | be,pn %xcc, 2f | ||
3546 | - sethi %hi(PAGE_SIZE), %g3 | ||
3547 | - mov %o0, %g1 | ||
3548 | - sub %o1, %g1, %g2 | ||
3549 | + sub %o1, %o0, %g2 | ||
3550 | + srlx %g2, 18, %g3 | ||
3551 | + brnz,pn %g3, 4f | ||
3552 | + mov %o0, %g1 | ||
3553 | + sethi %hi(PAGE_SIZE), %g3 | ||
3554 | sub %g2, %g3, %g2 | ||
3555 | 1: add %g1, %g2, %o0 /* ARG0: virtual address */ | ||
3556 | mov 0, %o1 /* ARG1: mmu context */ | ||
3557 | mov HV_MMU_ALL, %o2 /* ARG2: flags */ | ||
3558 | ta HV_MMU_UNMAP_ADDR_TRAP | ||
3559 | - brnz,pn %o0, __hypervisor_tlb_tl0_error | ||
3560 | + brnz,pn %o0, 3f | ||
3561 | mov HV_MMU_UNMAP_ADDR_TRAP, %o1 | ||
3562 | brnz,pt %g2, 1b | ||
3563 | sub %g2, %g3, %g2 | ||
3564 | 2: retl | ||
3565 | nop | ||
3566 | +3: sethi %hi(__hypervisor_tlb_tl0_error), %o2 | ||
3567 | + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 | ||
3568 | + nop | ||
3569 | +4: mov 0, %o0 /* ARG0: CPU lists unimplemented */ | ||
3570 | + mov 0, %o1 /* ARG1: CPU lists unimplemented */ | ||
3571 | + mov 0, %o2 /* ARG2: mmu context == nucleus */ | ||
3572 | + mov HV_MMU_ALL, %o3 /* ARG3: flags */ | ||
3573 | + mov HV_FAST_MMU_DEMAP_CTX, %o5 | ||
3574 | + ta HV_FAST_TRAP | ||
3575 | + brnz,pn %o0, 3b | ||
3576 | + mov HV_FAST_MMU_DEMAP_CTX, %o1 | ||
3577 | + retl | ||
3578 | + nop | ||
3579 | |||
3580 | #ifdef DCACHE_ALIASING_POSSIBLE | ||
3581 | /* XXX Niagara and friends have an 8K cache, so no aliasing is | ||
3582 | @@ -394,43 +511,6 @@ tlb_patch_one: | ||
3583 | retl | ||
3584 | nop | ||
3585 | |||
3586 | - .globl cheetah_patch_cachetlbops | ||
3587 | -cheetah_patch_cachetlbops: | ||
3588 | - save %sp, -128, %sp | ||
3589 | - | ||
3590 | - sethi %hi(__flush_tlb_mm), %o0 | ||
3591 | - or %o0, %lo(__flush_tlb_mm), %o0 | ||
3592 | - sethi %hi(__cheetah_flush_tlb_mm), %o1 | ||
3593 | - or %o1, %lo(__cheetah_flush_tlb_mm), %o1 | ||
3594 | - call tlb_patch_one | ||
3595 | - mov 19, %o2 | ||
3596 | - | ||
3597 | - sethi %hi(__flush_tlb_page), %o0 | ||
3598 | - or %o0, %lo(__flush_tlb_page), %o0 | ||
3599 | - sethi %hi(__cheetah_flush_tlb_page), %o1 | ||
3600 | - or %o1, %lo(__cheetah_flush_tlb_page), %o1 | ||
3601 | - call tlb_patch_one | ||
3602 | - mov 22, %o2 | ||
3603 | - | ||
3604 | - sethi %hi(__flush_tlb_pending), %o0 | ||
3605 | - or %o0, %lo(__flush_tlb_pending), %o0 | ||
3606 | - sethi %hi(__cheetah_flush_tlb_pending), %o1 | ||
3607 | - or %o1, %lo(__cheetah_flush_tlb_pending), %o1 | ||
3608 | - call tlb_patch_one | ||
3609 | - mov 27, %o2 | ||
3610 | - | ||
3611 | -#ifdef DCACHE_ALIASING_POSSIBLE | ||
3612 | - sethi %hi(__flush_dcache_page), %o0 | ||
3613 | - or %o0, %lo(__flush_dcache_page), %o0 | ||
3614 | - sethi %hi(__cheetah_flush_dcache_page), %o1 | ||
3615 | - or %o1, %lo(__cheetah_flush_dcache_page), %o1 | ||
3616 | - call tlb_patch_one | ||
3617 | - mov 11, %o2 | ||
3618 | -#endif /* DCACHE_ALIASING_POSSIBLE */ | ||
3619 | - | ||
3620 | - ret | ||
3621 | - restore | ||
3622 | - | ||
3623 | #ifdef CONFIG_SMP | ||
3624 | /* These are all called by the slaves of a cross call, at | ||
3625 | * trap level 1, with interrupts fully disabled. | ||
3626 | @@ -447,7 +527,7 @@ cheetah_patch_cachetlbops: | ||
3627 | */ | ||
3628 | .align 32 | ||
3629 | .globl xcall_flush_tlb_mm | ||
3630 | -xcall_flush_tlb_mm: /* 21 insns */ | ||
3631 | +xcall_flush_tlb_mm: /* 24 insns */ | ||
3632 | mov PRIMARY_CONTEXT, %g2 | ||
3633 | ldxa [%g2] ASI_DMMU, %g3 | ||
3634 | srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4 | ||
3635 | @@ -469,9 +549,12 @@ xcall_flush_tlb_mm: /* 21 insns */ | ||
3636 | nop | ||
3637 | nop | ||
3638 | nop | ||
3639 | + nop | ||
3640 | + nop | ||
3641 | + nop | ||
3642 | |||
3643 | .globl xcall_flush_tlb_page | ||
3644 | -xcall_flush_tlb_page: /* 17 insns */ | ||
3645 | +xcall_flush_tlb_page: /* 20 insns */ | ||
3646 | /* %g5=context, %g1=vaddr */ | ||
3647 | mov PRIMARY_CONTEXT, %g4 | ||
3648 | ldxa [%g4] ASI_DMMU, %g2 | ||
3649 | @@ -490,15 +573,20 @@ xcall_flush_tlb_page: /* 17 insns */ | ||
3650 | retry | ||
3651 | nop | ||
3652 | nop | ||
3653 | + nop | ||
3654 | + nop | ||
3655 | + nop | ||
3656 | |||
3657 | .globl xcall_flush_tlb_kernel_range | ||
3658 | -xcall_flush_tlb_kernel_range: /* 25 insns */ | ||
3659 | +xcall_flush_tlb_kernel_range: /* 44 insns */ | ||
3660 | sethi %hi(PAGE_SIZE - 1), %g2 | ||
3661 | or %g2, %lo(PAGE_SIZE - 1), %g2 | ||
3662 | andn %g1, %g2, %g1 | ||
3663 | andn %g7, %g2, %g7 | ||
3664 | sub %g7, %g1, %g3 | ||
3665 | - add %g2, 1, %g2 | ||
3666 | + srlx %g3, 18, %g2 | ||
3667 | + brnz,pn %g2, 2f | ||
3668 | + add %g2, 1, %g2 | ||
3669 | sub %g3, %g2, %g3 | ||
3670 | or %g1, 0x20, %g1 ! Nucleus | ||
3671 | 1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP | ||
3672 | @@ -507,8 +595,25 @@ xcall_flush_tlb_kernel_range: /* 25 insns */ | ||
3673 | brnz,pt %g3, 1b | ||
3674 | sub %g3, %g2, %g3 | ||
3675 | retry | ||
3676 | - nop | ||
3677 | - nop | ||
3678 | +2: mov 63 * 8, %g1 | ||
3679 | +1: ldxa [%g1] ASI_ITLB_DATA_ACCESS, %g2 | ||
3680 | + andcc %g2, 0x40, %g0 /* _PAGE_L_4U */ | ||
3681 | + bne,pn %xcc, 2f | ||
3682 | + mov TLB_TAG_ACCESS, %g2 | ||
3683 | + stxa %g0, [%g2] ASI_IMMU | ||
3684 | + stxa %g0, [%g1] ASI_ITLB_DATA_ACCESS | ||
3685 | + membar #Sync | ||
3686 | +2: ldxa [%g1] ASI_DTLB_DATA_ACCESS, %g2 | ||
3687 | + andcc %g2, 0x40, %g0 | ||
3688 | + bne,pn %xcc, 2f | ||
3689 | + mov TLB_TAG_ACCESS, %g2 | ||
3690 | + stxa %g0, [%g2] ASI_DMMU | ||
3691 | + stxa %g0, [%g1] ASI_DTLB_DATA_ACCESS | ||
3692 | + membar #Sync | ||
3693 | +2: sub %g1, 8, %g1 | ||
3694 | + brgez,pt %g1, 1b | ||
3695 | + nop | ||
3696 | + retry | ||
3697 | nop | ||
3698 | nop | ||
3699 | nop | ||
3700 | @@ -637,6 +742,52 @@ xcall_fetch_glob_pmu_n4: | ||
3701 | |||
3702 | retry | ||
3703 | |||
3704 | +__cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */ | ||
3705 | + sethi %hi(PAGE_SIZE - 1), %g2 | ||
3706 | + or %g2, %lo(PAGE_SIZE - 1), %g2 | ||
3707 | + andn %g1, %g2, %g1 | ||
3708 | + andn %g7, %g2, %g7 | ||
3709 | + sub %g7, %g1, %g3 | ||
3710 | + srlx %g3, 18, %g2 | ||
3711 | + brnz,pn %g2, 2f | ||
3712 | + add %g2, 1, %g2 | ||
3713 | + sub %g3, %g2, %g3 | ||
3714 | + or %g1, 0x20, %g1 ! Nucleus | ||
3715 | +1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP | ||
3716 | + stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP | ||
3717 | + membar #Sync | ||
3718 | + brnz,pt %g3, 1b | ||
3719 | + sub %g3, %g2, %g3 | ||
3720 | + retry | ||
3721 | +2: mov 0x80, %g2 | ||
3722 | + stxa %g0, [%g2] ASI_DMMU_DEMAP | ||
3723 | + membar #Sync | ||
3724 | + stxa %g0, [%g2] ASI_IMMU_DEMAP | ||
3725 | + membar #Sync | ||
3726 | + retry | ||
3727 | + nop | ||
3728 | + nop | ||
3729 | + nop | ||
3730 | + nop | ||
3731 | + nop | ||
3732 | + nop | ||
3733 | + nop | ||
3734 | + nop | ||
3735 | + nop | ||
3736 | + nop | ||
3737 | + nop | ||
3738 | + nop | ||
3739 | + nop | ||
3740 | + nop | ||
3741 | + nop | ||
3742 | + nop | ||
3743 | + nop | ||
3744 | + nop | ||
3745 | + nop | ||
3746 | + nop | ||
3747 | + nop | ||
3748 | + nop | ||
3749 | + | ||
3750 | #ifdef DCACHE_ALIASING_POSSIBLE | ||
3751 | .align 32 | ||
3752 | .globl xcall_flush_dcache_page_cheetah | ||
3753 | @@ -700,7 +851,7 @@ __hypervisor_tlb_xcall_error: | ||
3754 | ba,a,pt %xcc, rtrap | ||
3755 | |||
3756 | .globl __hypervisor_xcall_flush_tlb_mm | ||
3757 | -__hypervisor_xcall_flush_tlb_mm: /* 21 insns */ | ||
3758 | +__hypervisor_xcall_flush_tlb_mm: /* 24 insns */ | ||
3759 | /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */ | ||
3760 | mov %o0, %g2 | ||
3761 | mov %o1, %g3 | ||
3762 | @@ -714,7 +865,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ | ||
3763 | mov HV_FAST_MMU_DEMAP_CTX, %o5 | ||
3764 | ta HV_FAST_TRAP | ||
3765 | mov HV_FAST_MMU_DEMAP_CTX, %g6 | ||
3766 | - brnz,pn %o0, __hypervisor_tlb_xcall_error | ||
3767 | + brnz,pn %o0, 1f | ||
3768 | mov %o0, %g5 | ||
3769 | mov %g2, %o0 | ||
3770 | mov %g3, %o1 | ||
3771 | @@ -723,9 +874,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ | ||
3772 | mov %g7, %o5 | ||
3773 | membar #Sync | ||
3774 | retry | ||
3775 | +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 | ||
3776 | + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 | ||
3777 | + nop | ||
3778 | |||
3779 | .globl __hypervisor_xcall_flush_tlb_page | ||
3780 | -__hypervisor_xcall_flush_tlb_page: /* 17 insns */ | ||
3781 | +__hypervisor_xcall_flush_tlb_page: /* 20 insns */ | ||
3782 | /* %g5=ctx, %g1=vaddr */ | ||
3783 | mov %o0, %g2 | ||
3784 | mov %o1, %g3 | ||
3785 | @@ -737,42 +891,64 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */ | ||
3786 | sllx %o0, PAGE_SHIFT, %o0 | ||
3787 | ta HV_MMU_UNMAP_ADDR_TRAP | ||
3788 | mov HV_MMU_UNMAP_ADDR_TRAP, %g6 | ||
3789 | - brnz,a,pn %o0, __hypervisor_tlb_xcall_error | ||
3790 | + brnz,a,pn %o0, 1f | ||
3791 | mov %o0, %g5 | ||
3792 | mov %g2, %o0 | ||
3793 | mov %g3, %o1 | ||
3794 | mov %g4, %o2 | ||
3795 | membar #Sync | ||
3796 | retry | ||
3797 | +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 | ||
3798 | + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 | ||
3799 | + nop | ||
3800 | |||
3801 | .globl __hypervisor_xcall_flush_tlb_kernel_range | ||
3802 | -__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */ | ||
3803 | +__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */ | ||
3804 | /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */ | ||
3805 | sethi %hi(PAGE_SIZE - 1), %g2 | ||
3806 | or %g2, %lo(PAGE_SIZE - 1), %g2 | ||
3807 | andn %g1, %g2, %g1 | ||
3808 | andn %g7, %g2, %g7 | ||
3809 | sub %g7, %g1, %g3 | ||
3810 | + srlx %g3, 18, %g7 | ||
3811 | add %g2, 1, %g2 | ||
3812 | sub %g3, %g2, %g3 | ||
3813 | mov %o0, %g2 | ||
3814 | mov %o1, %g4 | ||
3815 | - mov %o2, %g7 | ||
3816 | + brnz,pn %g7, 2f | ||
3817 | + mov %o2, %g7 | ||
3818 | 1: add %g1, %g3, %o0 /* ARG0: virtual address */ | ||
3819 | mov 0, %o1 /* ARG1: mmu context */ | ||
3820 | mov HV_MMU_ALL, %o2 /* ARG2: flags */ | ||
3821 | ta HV_MMU_UNMAP_ADDR_TRAP | ||
3822 | mov HV_MMU_UNMAP_ADDR_TRAP, %g6 | ||
3823 | - brnz,pn %o0, __hypervisor_tlb_xcall_error | ||
3824 | + brnz,pn %o0, 1f | ||
3825 | mov %o0, %g5 | ||
3826 | sethi %hi(PAGE_SIZE), %o2 | ||
3827 | brnz,pt %g3, 1b | ||
3828 | sub %g3, %o2, %g3 | ||
3829 | - mov %g2, %o0 | ||
3830 | +5: mov %g2, %o0 | ||
3831 | mov %g4, %o1 | ||
3832 | mov %g7, %o2 | ||
3833 | membar #Sync | ||
3834 | retry | ||
3835 | +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 | ||
3836 | + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 | ||
3837 | + nop | ||
3838 | +2: mov %o3, %g1 | ||
3839 | + mov %o5, %g3 | ||
3840 | + mov 0, %o0 /* ARG0: CPU lists unimplemented */ | ||
3841 | + mov 0, %o1 /* ARG1: CPU lists unimplemented */ | ||
3842 | + mov 0, %o2 /* ARG2: mmu context == nucleus */ | ||
3843 | + mov HV_MMU_ALL, %o3 /* ARG3: flags */ | ||
3844 | + mov HV_FAST_MMU_DEMAP_CTX, %o5 | ||
3845 | + ta HV_FAST_TRAP | ||
3846 | + mov %g1, %o3 | ||
3847 | + brz,pt %o0, 5b | ||
3848 | + mov %g3, %o5 | ||
3849 | + mov HV_FAST_MMU_DEMAP_CTX, %g6 | ||
3850 | + ba,pt %xcc, 1b | ||
3851 | + clr %g5 | ||
3852 | |||
3853 | /* These just get rescheduled to PIL vectors. */ | ||
3854 | .globl xcall_call_function | ||
3855 | @@ -809,6 +985,58 @@ xcall_kgdb_capture: | ||
3856 | |||
3857 | #endif /* CONFIG_SMP */ | ||
3858 | |||
3859 | + .globl cheetah_patch_cachetlbops | ||
3860 | +cheetah_patch_cachetlbops: | ||
3861 | + save %sp, -128, %sp | ||
3862 | + | ||
3863 | + sethi %hi(__flush_tlb_mm), %o0 | ||
3864 | + or %o0, %lo(__flush_tlb_mm), %o0 | ||
3865 | + sethi %hi(__cheetah_flush_tlb_mm), %o1 | ||
3866 | + or %o1, %lo(__cheetah_flush_tlb_mm), %o1 | ||
3867 | + call tlb_patch_one | ||
3868 | + mov 19, %o2 | ||
3869 | + | ||
3870 | + sethi %hi(__flush_tlb_page), %o0 | ||
3871 | + or %o0, %lo(__flush_tlb_page), %o0 | ||
3872 | + sethi %hi(__cheetah_flush_tlb_page), %o1 | ||
3873 | + or %o1, %lo(__cheetah_flush_tlb_page), %o1 | ||
3874 | + call tlb_patch_one | ||
3875 | + mov 22, %o2 | ||
3876 | + | ||
3877 | + sethi %hi(__flush_tlb_pending), %o0 | ||
3878 | + or %o0, %lo(__flush_tlb_pending), %o0 | ||
3879 | + sethi %hi(__cheetah_flush_tlb_pending), %o1 | ||
3880 | + or %o1, %lo(__cheetah_flush_tlb_pending), %o1 | ||
3881 | + call tlb_patch_one | ||
3882 | + mov 27, %o2 | ||
3883 | + | ||
3884 | + sethi %hi(__flush_tlb_kernel_range), %o0 | ||
3885 | + or %o0, %lo(__flush_tlb_kernel_range), %o0 | ||
3886 | + sethi %hi(__cheetah_flush_tlb_kernel_range), %o1 | ||
3887 | + or %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1 | ||
3888 | + call tlb_patch_one | ||
3889 | + mov 31, %o2 | ||
3890 | + | ||
3891 | +#ifdef DCACHE_ALIASING_POSSIBLE | ||
3892 | + sethi %hi(__flush_dcache_page), %o0 | ||
3893 | + or %o0, %lo(__flush_dcache_page), %o0 | ||
3894 | + sethi %hi(__cheetah_flush_dcache_page), %o1 | ||
3895 | + or %o1, %lo(__cheetah_flush_dcache_page), %o1 | ||
3896 | + call tlb_patch_one | ||
3897 | + mov 11, %o2 | ||
3898 | +#endif /* DCACHE_ALIASING_POSSIBLE */ | ||
3899 | + | ||
3900 | +#ifdef CONFIG_SMP | ||
3901 | + sethi %hi(xcall_flush_tlb_kernel_range), %o0 | ||
3902 | + or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 | ||
3903 | + sethi %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1 | ||
3904 | + or %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1 | ||
3905 | + call tlb_patch_one | ||
3906 | + mov 44, %o2 | ||
3907 | +#endif /* CONFIG_SMP */ | ||
3908 | + | ||
3909 | + ret | ||
3910 | + restore | ||
3911 | |||
3912 | .globl hypervisor_patch_cachetlbops | ||
3913 | hypervisor_patch_cachetlbops: | ||
3914 | @@ -819,28 +1047,28 @@ hypervisor_patch_cachetlbops: | ||
3915 | sethi %hi(__hypervisor_flush_tlb_mm), %o1 | ||
3916 | or %o1, %lo(__hypervisor_flush_tlb_mm), %o1 | ||
3917 | call tlb_patch_one | ||
3918 | - mov 10, %o2 | ||
3919 | + mov 19, %o2 | ||
3920 | |||
3921 | sethi %hi(__flush_tlb_page), %o0 | ||
3922 | or %o0, %lo(__flush_tlb_page), %o0 | ||
3923 | sethi %hi(__hypervisor_flush_tlb_page), %o1 | ||
3924 | or %o1, %lo(__hypervisor_flush_tlb_page), %o1 | ||
3925 | call tlb_patch_one | ||
3926 | - mov 11, %o2 | ||
3927 | + mov 22, %o2 | ||
3928 | |||
3929 | sethi %hi(__flush_tlb_pending), %o0 | ||
3930 | or %o0, %lo(__flush_tlb_pending), %o0 | ||
3931 | sethi %hi(__hypervisor_flush_tlb_pending), %o1 | ||
3932 | or %o1, %lo(__hypervisor_flush_tlb_pending), %o1 | ||
3933 | call tlb_patch_one | ||
3934 | - mov 16, %o2 | ||
3935 | + mov 27, %o2 | ||
3936 | |||
3937 | sethi %hi(__flush_tlb_kernel_range), %o0 | ||
3938 | or %o0, %lo(__flush_tlb_kernel_range), %o0 | ||
3939 | sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1 | ||
3940 | or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1 | ||
3941 | call tlb_patch_one | ||
3942 | - mov 16, %o2 | ||
3943 | + mov 31, %o2 | ||
3944 | |||
3945 | #ifdef DCACHE_ALIASING_POSSIBLE | ||
3946 | sethi %hi(__flush_dcache_page), %o0 | ||
3947 | @@ -857,21 +1085,21 @@ hypervisor_patch_cachetlbops: | ||
3948 | sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1 | ||
3949 | or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1 | ||
3950 | call tlb_patch_one | ||
3951 | - mov 21, %o2 | ||
3952 | + mov 24, %o2 | ||
3953 | |||
3954 | sethi %hi(xcall_flush_tlb_page), %o0 | ||
3955 | or %o0, %lo(xcall_flush_tlb_page), %o0 | ||
3956 | sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1 | ||
3957 | or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1 | ||
3958 | call tlb_patch_one | ||
3959 | - mov 17, %o2 | ||
3960 | + mov 20, %o2 | ||
3961 | |||
3962 | sethi %hi(xcall_flush_tlb_kernel_range), %o0 | ||
3963 | or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 | ||
3964 | sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1 | ||
3965 | or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1 | ||
3966 | call tlb_patch_one | ||
3967 | - mov 25, %o2 | ||
3968 | + mov 44, %o2 | ||
3969 | #endif /* CONFIG_SMP */ | ||
3970 | |||
3971 | ret | ||
3972 | diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c | ||
3973 | index c32f5d32f811..b56c9c581359 100644 | ||
3974 | --- a/drivers/net/ethernet/broadcom/bgmac.c | ||
3975 | +++ b/drivers/net/ethernet/broadcom/bgmac.c | ||
3976 | @@ -314,6 +314,10 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac, | ||
3977 | u32 ctl; | ||
3978 | |||
3979 | ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL); | ||
3980 | + | ||
3981 | + /* preserve ONLY bits 16-17 from current hardware value */ | ||
3982 | + ctl &= BGMAC_DMA_RX_ADDREXT_MASK; | ||
3983 | + | ||
3984 | if (bgmac->core->id.rev >= 4) { | ||
3985 | ctl &= ~BGMAC_DMA_RX_BL_MASK; | ||
3986 | ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT; | ||
3987 | @@ -324,7 +328,6 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac, | ||
3988 | ctl &= ~BGMAC_DMA_RX_PT_MASK; | ||
3989 | ctl |= BGMAC_DMA_RX_PT_1 << BGMAC_DMA_RX_PT_SHIFT; | ||
3990 | } | ||
3991 | - ctl &= BGMAC_DMA_RX_ADDREXT_MASK; | ||
3992 | ctl |= BGMAC_DMA_RX_ENABLE; | ||
3993 | ctl |= BGMAC_DMA_RX_PARITY_DISABLE; | ||
3994 | ctl |= BGMAC_DMA_RX_OVERFLOW_CONT; | ||
3995 | diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c | ||
3996 | index ca0d3802f2af..4e603d060e80 100644 | ||
3997 | --- a/drivers/tty/serial/sunhv.c | ||
3998 | +++ b/drivers/tty/serial/sunhv.c | ||
3999 | @@ -490,12 +490,6 @@ static void sunhv_console_write_bychar(struct console *con, const char *s, unsig | ||
4000 | locked = spin_trylock_irqsave(&port->lock, flags); | ||
4001 | else | ||
4002 | spin_lock_irqsave(&port->lock, flags); | ||
4003 | - if (port->sysrq) { | ||
4004 | - locked = 0; | ||
4005 | - } else if (oops_in_progress) { | ||
4006 | - locked = spin_trylock(&port->lock); | ||
4007 | - } else | ||
4008 | - spin_lock(&port->lock); | ||
4009 | |||
4010 | for (i = 0; i < n; i++) { | ||
4011 | if (*s == '\n') | ||
4012 | diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c | ||
4013 | index 629e3c865072..9bee25cfa0be 100644 | ||
4014 | --- a/drivers/tty/tty_ldisc.c | ||
4015 | +++ b/drivers/tty/tty_ldisc.c | ||
4016 | @@ -417,6 +417,10 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush); | ||
4017 | * they are not on hot paths so a little discipline won't do | ||
4018 | * any harm. | ||
4019 | * | ||
4020 | + * The line discipline-related tty_struct fields are reset to | ||
4021 | + * prevent the ldisc driver from re-using stale information for | ||
4022 | + * the new ldisc instance. | ||
4023 | + * | ||
4024 | * Locking: takes termios_rwsem | ||
4025 | */ | ||
4026 | |||
4027 | @@ -425,6 +429,9 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num) | ||
4028 | down_write(&tty->termios_rwsem); | ||
4029 | tty->termios.c_line = num; | ||
4030 | up_write(&tty->termios_rwsem); | ||
4031 | + | ||
4032 | + tty->disc_data = NULL; | ||
4033 | + tty->receive_room = 0; | ||
4034 | } | ||
4035 | |||
4036 | /** | ||
4037 | diff --git a/include/linux/filter.h b/include/linux/filter.h | ||
4038 | index 5110d4211866..ccb98b459c59 100644 | ||
4039 | --- a/include/linux/filter.h | ||
4040 | +++ b/include/linux/filter.h | ||
4041 | @@ -421,7 +421,11 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) | ||
4042 | } | ||
4043 | #endif /* CONFIG_DEBUG_SET_MODULE_RONX */ | ||
4044 | |||
4045 | -int sk_filter(struct sock *sk, struct sk_buff *skb); | ||
4046 | +int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); | ||
4047 | +static inline int sk_filter(struct sock *sk, struct sk_buff *skb) | ||
4048 | +{ | ||
4049 | + return sk_filter_trim_cap(sk, skb, 1); | ||
4050 | +} | ||
4051 | |||
4052 | int bpf_prog_select_runtime(struct bpf_prog *fp); | ||
4053 | void bpf_prog_free(struct bpf_prog *fp); | ||
4054 | diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h | ||
4055 | index ff788b665277..9c2c044153f6 100644 | ||
4056 | --- a/include/net/ip6_tunnel.h | ||
4057 | +++ b/include/net/ip6_tunnel.h | ||
4058 | @@ -86,6 +86,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, | ||
4059 | struct net_device_stats *stats = &dev->stats; | ||
4060 | int pkt_len, err; | ||
4061 | |||
4062 | + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); | ||
4063 | pkt_len = skb->len - skb_inner_network_offset(skb); | ||
4064 | err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); | ||
4065 | |||
4066 | diff --git a/include/net/tcp.h b/include/net/tcp.h | ||
4067 | index 9c3ab544d3a8..e9d7a8ef9a6d 100644 | ||
4068 | --- a/include/net/tcp.h | ||
4069 | +++ b/include/net/tcp.h | ||
4070 | @@ -1156,6 +1156,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) | ||
4071 | } | ||
4072 | |||
4073 | bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); | ||
4074 | +int tcp_filter(struct sock *sk, struct sk_buff *skb); | ||
4075 | |||
4076 | #undef STATE_TRACE | ||
4077 | |||
4078 | diff --git a/net/core/dev.c b/net/core/dev.c | ||
4079 | index b3fa4b86ab4c..9ca749c81b6c 100644 | ||
4080 | --- a/net/core/dev.c | ||
4081 | +++ b/net/core/dev.c | ||
4082 | @@ -2462,7 +2462,7 @@ int skb_checksum_help(struct sk_buff *skb) | ||
4083 | goto out; | ||
4084 | } | ||
4085 | |||
4086 | - *(__sum16 *)(skb->data + offset) = csum_fold(csum); | ||
4087 | + *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0; | ||
4088 | out_set_summed: | ||
4089 | skb->ip_summed = CHECKSUM_NONE; | ||
4090 | out: | ||
4091 | diff --git a/net/core/filter.c b/net/core/filter.c | ||
4092 | index 75e9b2b2336d..e94355452166 100644 | ||
4093 | --- a/net/core/filter.c | ||
4094 | +++ b/net/core/filter.c | ||
4095 | @@ -52,9 +52,10 @@ | ||
4096 | #include <net/dst.h> | ||
4097 | |||
4098 | /** | ||
4099 | - * sk_filter - run a packet through a socket filter | ||
4100 | + * sk_filter_trim_cap - run a packet through a socket filter | ||
4101 | * @sk: sock associated with &sk_buff | ||
4102 | * @skb: buffer to filter | ||
4103 | + * @cap: limit on how short the eBPF program may trim the packet | ||
4104 | * | ||
4105 | * Run the eBPF program and then cut skb->data to correct size returned by | ||
4106 | * the program. If pkt_len is 0 we toss packet. If skb->len is smaller | ||
4107 | @@ -63,7 +64,7 @@ | ||
4108 | * be accepted or -EPERM if the packet should be tossed. | ||
4109 | * | ||
4110 | */ | ||
4111 | -int sk_filter(struct sock *sk, struct sk_buff *skb) | ||
4112 | +int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) | ||
4113 | { | ||
4114 | int err; | ||
4115 | struct sk_filter *filter; | ||
4116 | @@ -84,14 +85,13 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) | ||
4117 | filter = rcu_dereference(sk->sk_filter); | ||
4118 | if (filter) { | ||
4119 | unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb); | ||
4120 | - | ||
4121 | - err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; | ||
4122 | + err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM; | ||
4123 | } | ||
4124 | rcu_read_unlock(); | ||
4125 | |||
4126 | return err; | ||
4127 | } | ||
4128 | -EXPORT_SYMBOL(sk_filter); | ||
4129 | +EXPORT_SYMBOL(sk_filter_trim_cap); | ||
4130 | |||
4131 | static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) | ||
4132 | { | ||
4133 | diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c | ||
4134 | index 4ab6ead3d8ee..9aba9e93c0a2 100644 | ||
4135 | --- a/net/core/flow_dissector.c | ||
4136 | +++ b/net/core/flow_dissector.c | ||
4137 | @@ -131,7 +131,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, | ||
4138 | struct flow_dissector_key_tags *key_tags; | ||
4139 | struct flow_dissector_key_keyid *key_keyid; | ||
4140 | u8 ip_proto = 0; | ||
4141 | - bool ret = false; | ||
4142 | + bool ret; | ||
4143 | |||
4144 | if (!data) { | ||
4145 | data = skb->data; | ||
4146 | @@ -492,12 +492,17 @@ ip_proto_again: | ||
4147 | out_good: | ||
4148 | ret = true; | ||
4149 | |||
4150 | -out_bad: | ||
4151 | + key_control->thoff = (u16)nhoff; | ||
4152 | +out: | ||
4153 | key_basic->n_proto = proto; | ||
4154 | key_basic->ip_proto = ip_proto; | ||
4155 | - key_control->thoff = (u16)nhoff; | ||
4156 | |||
4157 | return ret; | ||
4158 | + | ||
4159 | +out_bad: | ||
4160 | + ret = false; | ||
4161 | + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); | ||
4162 | + goto out; | ||
4163 | } | ||
4164 | EXPORT_SYMBOL(__skb_flow_dissect); | ||
4165 | |||
4166 | diff --git a/net/core/sock.c b/net/core/sock.c | ||
4167 | index 0d91f7dca751..88f017854509 100644 | ||
4168 | --- a/net/core/sock.c | ||
4169 | +++ b/net/core/sock.c | ||
4170 | @@ -1562,6 +1562,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) | ||
4171 | } | ||
4172 | |||
4173 | newsk->sk_err = 0; | ||
4174 | + newsk->sk_err_soft = 0; | ||
4175 | newsk->sk_priority = 0; | ||
4176 | newsk->sk_incoming_cpu = raw_smp_processor_id(); | ||
4177 | atomic64_set(&newsk->sk_cookie, 0); | ||
4178 | diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c | ||
4179 | index 8be8f27bfacc..861e1fa25d5e 100644 | ||
4180 | --- a/net/dccp/ipv4.c | ||
4181 | +++ b/net/dccp/ipv4.c | ||
4182 | @@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) | ||
4183 | { | ||
4184 | const struct iphdr *iph = (struct iphdr *)skb->data; | ||
4185 | const u8 offset = iph->ihl << 2; | ||
4186 | - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); | ||
4187 | + const struct dccp_hdr *dh; | ||
4188 | struct dccp_sock *dp; | ||
4189 | struct inet_sock *inet; | ||
4190 | const int type = icmp_hdr(skb)->type; | ||
4191 | @@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) | ||
4192 | int err; | ||
4193 | struct net *net = dev_net(skb->dev); | ||
4194 | |||
4195 | - if (skb->len < offset + sizeof(*dh) || | ||
4196 | - skb->len < offset + __dccp_basic_hdr_len(dh)) { | ||
4197 | - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); | ||
4198 | - return; | ||
4199 | - } | ||
4200 | + /* Only need dccph_dport & dccph_sport which are the first | ||
4201 | + * 4 bytes in dccp header. | ||
4202 | + * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us. | ||
4203 | + */ | ||
4204 | + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); | ||
4205 | + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); | ||
4206 | + dh = (struct dccp_hdr *)(skb->data + offset); | ||
4207 | |||
4208 | sk = __inet_lookup_established(net, &dccp_hashinfo, | ||
4209 | iph->daddr, dh->dccph_dport, | ||
4210 | diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c | ||
4211 | index b8608b71a66d..27c4e81efa24 100644 | ||
4212 | --- a/net/dccp/ipv6.c | ||
4213 | +++ b/net/dccp/ipv6.c | ||
4214 | @@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | ||
4215 | u8 type, u8 code, int offset, __be32 info) | ||
4216 | { | ||
4217 | const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; | ||
4218 | - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); | ||
4219 | + const struct dccp_hdr *dh; | ||
4220 | struct dccp_sock *dp; | ||
4221 | struct ipv6_pinfo *np; | ||
4222 | struct sock *sk; | ||
4223 | @@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | ||
4224 | __u64 seq; | ||
4225 | struct net *net = dev_net(skb->dev); | ||
4226 | |||
4227 | - if (skb->len < offset + sizeof(*dh) || | ||
4228 | - skb->len < offset + __dccp_basic_hdr_len(dh)) { | ||
4229 | - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), | ||
4230 | - ICMP6_MIB_INERRORS); | ||
4231 | - return; | ||
4232 | - } | ||
4233 | + /* Only need dccph_dport & dccph_sport which are the first | ||
4234 | + * 4 bytes in dccp header. | ||
4235 | + * Our caller (icmpv6_notify()) already pulled 8 bytes for us. | ||
4236 | + */ | ||
4237 | + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); | ||
4238 | + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); | ||
4239 | + dh = (struct dccp_hdr *)(skb->data + offset); | ||
4240 | |||
4241 | sk = __inet6_lookup_established(net, &dccp_hashinfo, | ||
4242 | &hdr->daddr, dh->dccph_dport, | ||
4243 | @@ -947,6 +948,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { | ||
4244 | .getsockopt = ipv6_getsockopt, | ||
4245 | .addr2sockaddr = inet6_csk_addr2sockaddr, | ||
4246 | .sockaddr_len = sizeof(struct sockaddr_in6), | ||
4247 | + .bind_conflict = inet6_csk_bind_conflict, | ||
4248 | #ifdef CONFIG_COMPAT | ||
4249 | .compat_setsockopt = compat_ipv6_setsockopt, | ||
4250 | .compat_getsockopt = compat_ipv6_getsockopt, | ||
4251 | diff --git a/net/dccp/proto.c b/net/dccp/proto.c | ||
4252 | index 41e65804ddf5..9fe25bf63296 100644 | ||
4253 | --- a/net/dccp/proto.c | ||
4254 | +++ b/net/dccp/proto.c | ||
4255 | @@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long timeout) | ||
4256 | __kfree_skb(skb); | ||
4257 | } | ||
4258 | |||
4259 | + /* If socket has been already reset kill it. */ | ||
4260 | + if (sk->sk_state == DCCP_CLOSED) | ||
4261 | + goto adjudge_to_death; | ||
4262 | + | ||
4263 | if (data_was_unread) { | ||
4264 | /* Unread data was tossed, send an appropriate Reset Code */ | ||
4265 | DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); | ||
4266 | diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c | ||
4267 | index e5a3ff210fec..7c52afb98c42 100644 | ||
4268 | --- a/net/ipv4/fib_trie.c | ||
4269 | +++ b/net/ipv4/fib_trie.c | ||
4270 | @@ -2456,22 +2456,19 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, | ||
4271 | struct key_vector *l, **tp = &iter->tnode; | ||
4272 | t_key key; | ||
4273 | |||
4274 | - /* use cache location of next-to-find key */ | ||
4275 | + /* use cached location of previously found key */ | ||
4276 | if (iter->pos > 0 && pos >= iter->pos) { | ||
4277 | - pos -= iter->pos; | ||
4278 | key = iter->key; | ||
4279 | } else { | ||
4280 | - iter->pos = 0; | ||
4281 | + iter->pos = 1; | ||
4282 | key = 0; | ||
4283 | } | ||
4284 | |||
4285 | - while ((l = leaf_walk_rcu(tp, key)) != NULL) { | ||
4286 | + pos -= iter->pos; | ||
4287 | + | ||
4288 | + while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) { | ||
4289 | key = l->key + 1; | ||
4290 | iter->pos++; | ||
4291 | - | ||
4292 | - if (--pos <= 0) | ||
4293 | - break; | ||
4294 | - | ||
4295 | l = NULL; | ||
4296 | |||
4297 | /* handle unlikely case of a key wrap */ | ||
4298 | @@ -2480,7 +2477,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, | ||
4299 | } | ||
4300 | |||
4301 | if (l) | ||
4302 | - iter->key = key; /* remember it */ | ||
4303 | + iter->key = l->key; /* remember it */ | ||
4304 | else | ||
4305 | iter->pos = 0; /* forget it */ | ||
4306 | |||
4307 | @@ -2508,7 +2505,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) | ||
4308 | return fib_route_get_idx(iter, *pos); | ||
4309 | |||
4310 | iter->pos = 0; | ||
4311 | - iter->key = 0; | ||
4312 | + iter->key = KEY_MAX; | ||
4313 | |||
4314 | return SEQ_START_TOKEN; | ||
4315 | } | ||
4316 | @@ -2517,7 +2514,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
4317 | { | ||
4318 | struct fib_route_iter *iter = seq->private; | ||
4319 | struct key_vector *l = NULL; | ||
4320 | - t_key key = iter->key; | ||
4321 | + t_key key = iter->key + 1; | ||
4322 | |||
4323 | ++*pos; | ||
4324 | |||
4325 | @@ -2526,7 +2523,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
4326 | l = leaf_walk_rcu(&iter->tnode, key); | ||
4327 | |||
4328 | if (l) { | ||
4329 | - iter->key = l->key + 1; | ||
4330 | + iter->key = l->key; | ||
4331 | iter->pos++; | ||
4332 | } else { | ||
4333 | iter->pos = 0; | ||
4334 | diff --git a/net/ipv4/route.c b/net/ipv4/route.c | ||
4335 | index 8533a75a9328..7ceb8a574a50 100644 | ||
4336 | --- a/net/ipv4/route.c | ||
4337 | +++ b/net/ipv4/route.c | ||
4338 | @@ -747,7 +747,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow | ||
4339 | goto reject_redirect; | ||
4340 | } | ||
4341 | |||
4342 | - n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); | ||
4343 | + n = __ipv4_neigh_lookup(rt->dst.dev, new_gw); | ||
4344 | + if (!n) | ||
4345 | + n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); | ||
4346 | if (!IS_ERR(n)) { | ||
4347 | if (!(n->nud_state & NUD_VALID)) { | ||
4348 | neigh_event_send(n, NULL); | ||
4349 | diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c | ||
4350 | index 036a76ba2ac2..69daa81736f6 100644 | ||
4351 | --- a/net/ipv4/tcp.c | ||
4352 | +++ b/net/ipv4/tcp.c | ||
4353 | @@ -1212,7 +1212,7 @@ new_segment: | ||
4354 | |||
4355 | if (!skb_can_coalesce(skb, i, pfrag->page, | ||
4356 | pfrag->offset)) { | ||
4357 | - if (i == sysctl_max_skb_frags || !sg) { | ||
4358 | + if (i >= sysctl_max_skb_frags || !sg) { | ||
4359 | tcp_mark_push(tp, skb); | ||
4360 | goto new_segment; | ||
4361 | } | ||
4362 | diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c | ||
4363 | index 7e538f71f5fb..55d7da1d2ce9 100644 | ||
4364 | --- a/net/ipv4/tcp_dctcp.c | ||
4365 | +++ b/net/ipv4/tcp_dctcp.c | ||
4366 | @@ -56,6 +56,7 @@ struct dctcp { | ||
4367 | u32 next_seq; | ||
4368 | u32 ce_state; | ||
4369 | u32 delayed_ack_reserved; | ||
4370 | + u32 loss_cwnd; | ||
4371 | }; | ||
4372 | |||
4373 | static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */ | ||
4374 | @@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk) | ||
4375 | ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); | ||
4376 | |||
4377 | ca->delayed_ack_reserved = 0; | ||
4378 | + ca->loss_cwnd = 0; | ||
4379 | ca->ce_state = 0; | ||
4380 | |||
4381 | dctcp_reset(tp, ca); | ||
4382 | @@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk) | ||
4383 | |||
4384 | static u32 dctcp_ssthresh(struct sock *sk) | ||
4385 | { | ||
4386 | - const struct dctcp *ca = inet_csk_ca(sk); | ||
4387 | + struct dctcp *ca = inet_csk_ca(sk); | ||
4388 | struct tcp_sock *tp = tcp_sk(sk); | ||
4389 | |||
4390 | + ca->loss_cwnd = tp->snd_cwnd; | ||
4391 | return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); | ||
4392 | } | ||
4393 | |||
4394 | @@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, | ||
4395 | return 0; | ||
4396 | } | ||
4397 | |||
4398 | +static u32 dctcp_cwnd_undo(struct sock *sk) | ||
4399 | +{ | ||
4400 | + const struct dctcp *ca = inet_csk_ca(sk); | ||
4401 | + | ||
4402 | + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); | ||
4403 | +} | ||
4404 | + | ||
4405 | static struct tcp_congestion_ops dctcp __read_mostly = { | ||
4406 | .init = dctcp_init, | ||
4407 | .in_ack_event = dctcp_update_alpha, | ||
4408 | .cwnd_event = dctcp_cwnd_event, | ||
4409 | .ssthresh = dctcp_ssthresh, | ||
4410 | .cong_avoid = tcp_reno_cong_avoid, | ||
4411 | + .undo_cwnd = dctcp_cwnd_undo, | ||
4412 | .set_state = dctcp_state, | ||
4413 | .get_info = dctcp_get_info, | ||
4414 | .flags = TCP_CONG_NEEDS_ECN, | ||
4415 | diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c | ||
4416 | index b5853cac3269..b58a38eea059 100644 | ||
4417 | --- a/net/ipv4/tcp_ipv4.c | ||
4418 | +++ b/net/ipv4/tcp_ipv4.c | ||
4419 | @@ -1533,6 +1533,21 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) | ||
4420 | } | ||
4421 | EXPORT_SYMBOL(tcp_prequeue); | ||
4422 | |||
4423 | +int tcp_filter(struct sock *sk, struct sk_buff *skb) | ||
4424 | +{ | ||
4425 | + struct tcphdr *th = (struct tcphdr *)skb->data; | ||
4426 | + unsigned int eaten = skb->len; | ||
4427 | + int err; | ||
4428 | + | ||
4429 | + err = sk_filter_trim_cap(sk, skb, th->doff * 4); | ||
4430 | + if (!err) { | ||
4431 | + eaten -= skb->len; | ||
4432 | + TCP_SKB_CB(skb)->end_seq -= eaten; | ||
4433 | + } | ||
4434 | + return err; | ||
4435 | +} | ||
4436 | +EXPORT_SYMBOL(tcp_filter); | ||
4437 | + | ||
4438 | /* | ||
4439 | * From tcp_input.c | ||
4440 | */ | ||
4441 | @@ -1638,8 +1653,10 @@ process: | ||
4442 | |||
4443 | nf_reset(skb); | ||
4444 | |||
4445 | - if (sk_filter(sk, skb)) | ||
4446 | + if (tcp_filter(sk, skb)) | ||
4447 | goto discard_and_relse; | ||
4448 | + th = (const struct tcphdr *)skb->data; | ||
4449 | + iph = ip_hdr(skb); | ||
4450 | |||
4451 | skb->dev = NULL; | ||
4452 | |||
4453 | diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c | ||
4454 | index fbd521fdae53..5f581616bf6a 100644 | ||
4455 | --- a/net/ipv6/tcp_ipv6.c | ||
4456 | +++ b/net/ipv6/tcp_ipv6.c | ||
4457 | @@ -1214,7 +1214,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) | ||
4458 | if (skb->protocol == htons(ETH_P_IP)) | ||
4459 | return tcp_v4_do_rcv(sk, skb); | ||
4460 | |||
4461 | - if (sk_filter(sk, skb)) | ||
4462 | + if (tcp_filter(sk, skb)) | ||
4463 | goto discard; | ||
4464 | |||
4465 | /* | ||
4466 | @@ -1438,8 +1438,10 @@ process: | ||
4467 | if (tcp_v6_inbound_md5_hash(sk, skb)) | ||
4468 | goto discard_and_relse; | ||
4469 | |||
4470 | - if (sk_filter(sk, skb)) | ||
4471 | + if (tcp_filter(sk, skb)) | ||
4472 | goto discard_and_relse; | ||
4473 | + th = (const struct tcphdr *)skb->data; | ||
4474 | + hdr = ipv6_hdr(skb); | ||
4475 | |||
4476 | skb->dev = NULL; | ||
4477 | |||
4478 | diff --git a/net/sctp/socket.c b/net/sctp/socket.c | ||
4479 | index 402817be3873..b5fd4ab56156 100644 | ||
4480 | --- a/net/sctp/socket.c | ||
4481 | +++ b/net/sctp/socket.c | ||
4482 | @@ -1212,9 +1212,12 @@ static int __sctp_connect(struct sock *sk, | ||
4483 | |||
4484 | timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); | ||
4485 | |||
4486 | - err = sctp_wait_for_connect(asoc, &timeo); | ||
4487 | - if ((err == 0 || err == -EINPROGRESS) && assoc_id) | ||
4488 | + if (assoc_id) | ||
4489 | *assoc_id = asoc->assoc_id; | ||
4490 | + err = sctp_wait_for_connect(asoc, &timeo); | ||
4491 | + /* Note: the asoc may be freed after the return of | ||
4492 | + * sctp_wait_for_connect. | ||
4493 | + */ | ||
4494 | |||
4495 | /* Don't free association on exit. */ | ||
4496 | asoc = NULL; | ||
4497 | diff --git a/net/socket.c b/net/socket.c | ||
4498 | index 263b334ec5e4..0090225eeb1e 100644 | ||
4499 | --- a/net/socket.c | ||
4500 | +++ b/net/socket.c | ||
4501 | @@ -2041,6 +2041,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, | ||
4502 | if (err) | ||
4503 | break; | ||
4504 | ++datagrams; | ||
4505 | + if (msg_data_left(&msg_sys)) | ||
4506 | + break; | ||
4507 | } | ||
4508 | |||
4509 | fput_light(sock->file, fput_needed); |