Annotation of /trunk/kernel-alx/patches-4.9/0120-4.9.21-all-fixes.patch
Parent Directory | Revision Log
Revision 2956 -
(hide annotations)
(download)
Mon Jul 24 12:03:46 2017 UTC (7 years, 2 months ago) by niro
File size: 148261 byte(s)
Mon Jul 24 12:03:46 2017 UTC (7 years, 2 months ago) by niro
File size: 148261 byte(s)
-added patches-4.9
1 | niro | 2956 | diff --git a/Makefile b/Makefile |
2 | index 44960184701a..1523557bd61f 100644 | ||
3 | --- a/Makefile | ||
4 | +++ b/Makefile | ||
5 | @@ -1,6 +1,6 @@ | ||
6 | VERSION = 4 | ||
7 | PATCHLEVEL = 9 | ||
8 | -SUBLEVEL = 20 | ||
9 | +SUBLEVEL = 21 | ||
10 | EXTRAVERSION = | ||
11 | NAME = Roaring Lionus | ||
12 | |||
13 | diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi | ||
14 | index ae4b3880616d..4616452ce74d 100644 | ||
15 | --- a/arch/arm/boot/dts/bcm5301x.dtsi | ||
16 | +++ b/arch/arm/boot/dts/bcm5301x.dtsi | ||
17 | @@ -66,14 +66,14 @@ | ||
18 | timer@20200 { | ||
19 | compatible = "arm,cortex-a9-global-timer"; | ||
20 | reg = <0x20200 0x100>; | ||
21 | - interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>; | ||
22 | + interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>; | ||
23 | clocks = <&periph_clk>; | ||
24 | }; | ||
25 | |||
26 | local-timer@20600 { | ||
27 | compatible = "arm,cortex-a9-twd-timer"; | ||
28 | reg = <0x20600 0x100>; | ||
29 | - interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>; | ||
30 | + interrupts = <GIC_PPI 13 IRQ_TYPE_EDGE_RISING>; | ||
31 | clocks = <&periph_clk>; | ||
32 | }; | ||
33 | |||
34 | diff --git a/arch/arm/mach-bcm/bcm_5301x.c b/arch/arm/mach-bcm/bcm_5301x.c | ||
35 | index c8830a2b0d60..fe067f6cebb6 100644 | ||
36 | --- a/arch/arm/mach-bcm/bcm_5301x.c | ||
37 | +++ b/arch/arm/mach-bcm/bcm_5301x.c | ||
38 | @@ -9,14 +9,42 @@ | ||
39 | #include <asm/hardware/cache-l2x0.h> | ||
40 | |||
41 | #include <asm/mach/arch.h> | ||
42 | +#include <asm/siginfo.h> | ||
43 | +#include <asm/signal.h> | ||
44 | + | ||
45 | +#define FSR_EXTERNAL (1 << 12) | ||
46 | +#define FSR_READ (0 << 10) | ||
47 | +#define FSR_IMPRECISE 0x0406 | ||
48 | |||
49 | static const char *const bcm5301x_dt_compat[] __initconst = { | ||
50 | "brcm,bcm4708", | ||
51 | NULL, | ||
52 | }; | ||
53 | |||
54 | +static int bcm5301x_abort_handler(unsigned long addr, unsigned int fsr, | ||
55 | + struct pt_regs *regs) | ||
56 | +{ | ||
57 | + /* | ||
58 | + * We want to ignore aborts forwarded from the PCIe bus that are | ||
59 | + * expected and shouldn't really be passed by the PCIe controller. | ||
60 | + * The biggest disadvantage is the same FSR code may be reported when | ||
61 | + * reading non-existing APB register and we shouldn't ignore that. | ||
62 | + */ | ||
63 | + if (fsr == (FSR_EXTERNAL | FSR_READ | FSR_IMPRECISE)) | ||
64 | + return 0; | ||
65 | + | ||
66 | + return 1; | ||
67 | +} | ||
68 | + | ||
69 | +static void __init bcm5301x_init_early(void) | ||
70 | +{ | ||
71 | + hook_fault_code(16 + 6, bcm5301x_abort_handler, SIGBUS, BUS_OBJERR, | ||
72 | + "imprecise external abort"); | ||
73 | +} | ||
74 | + | ||
75 | DT_MACHINE_START(BCM5301X, "BCM5301X") | ||
76 | .l2c_aux_val = 0, | ||
77 | .l2c_aux_mask = ~0, | ||
78 | .dt_compat = bcm5301x_dt_compat, | ||
79 | + .init_early = bcm5301x_init_early, | ||
80 | MACHINE_END | ||
81 | diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c | ||
82 | index 8ac0e5994ed2..0ddf3698b85d 100644 | ||
83 | --- a/arch/mips/lantiq/irq.c | ||
84 | +++ b/arch/mips/lantiq/irq.c | ||
85 | @@ -269,6 +269,11 @@ static void ltq_hw5_irqdispatch(void) | ||
86 | DEFINE_HWx_IRQDISPATCH(5) | ||
87 | #endif | ||
88 | |||
89 | +static void ltq_hw_irq_handler(struct irq_desc *desc) | ||
90 | +{ | ||
91 | + ltq_hw_irqdispatch(irq_desc_get_irq(desc) - 2); | ||
92 | +} | ||
93 | + | ||
94 | #ifdef CONFIG_MIPS_MT_SMP | ||
95 | void __init arch_init_ipiirq(int irq, struct irqaction *action) | ||
96 | { | ||
97 | @@ -313,23 +318,19 @@ static struct irqaction irq_call = { | ||
98 | asmlinkage void plat_irq_dispatch(void) | ||
99 | { | ||
100 | unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM; | ||
101 | - unsigned int i; | ||
102 | - | ||
103 | - if ((MIPS_CPU_TIMER_IRQ == 7) && (pending & CAUSEF_IP7)) { | ||
104 | - do_IRQ(MIPS_CPU_TIMER_IRQ); | ||
105 | - goto out; | ||
106 | - } else { | ||
107 | - for (i = 0; i < MAX_IM; i++) { | ||
108 | - if (pending & (CAUSEF_IP2 << i)) { | ||
109 | - ltq_hw_irqdispatch(i); | ||
110 | - goto out; | ||
111 | - } | ||
112 | - } | ||
113 | + int irq; | ||
114 | + | ||
115 | + if (!pending) { | ||
116 | + spurious_interrupt(); | ||
117 | + return; | ||
118 | } | ||
119 | - pr_alert("Spurious IRQ: CAUSE=0x%08x\n", read_c0_status()); | ||
120 | |||
121 | -out: | ||
122 | - return; | ||
123 | + pending >>= CAUSEB_IP; | ||
124 | + while (pending) { | ||
125 | + irq = fls(pending) - 1; | ||
126 | + do_IRQ(MIPS_CPU_IRQ_BASE + irq); | ||
127 | + pending &= ~BIT(irq); | ||
128 | + } | ||
129 | } | ||
130 | |||
131 | static int icu_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) | ||
132 | @@ -354,11 +355,6 @@ static const struct irq_domain_ops irq_domain_ops = { | ||
133 | .map = icu_map, | ||
134 | }; | ||
135 | |||
136 | -static struct irqaction cascade = { | ||
137 | - .handler = no_action, | ||
138 | - .name = "cascade", | ||
139 | -}; | ||
140 | - | ||
141 | int __init icu_of_init(struct device_node *node, struct device_node *parent) | ||
142 | { | ||
143 | struct device_node *eiu_node; | ||
144 | @@ -390,7 +386,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent) | ||
145 | mips_cpu_irq_init(); | ||
146 | |||
147 | for (i = 0; i < MAX_IM; i++) | ||
148 | - setup_irq(i + 2, &cascade); | ||
149 | + irq_set_chained_handler(i + 2, ltq_hw_irq_handler); | ||
150 | |||
151 | if (cpu_has_vint) { | ||
152 | pr_info("Setting up vectored interrupts\n"); | ||
153 | diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h | ||
154 | index 9a2aee1b90fc..7fcf5128996a 100644 | ||
155 | --- a/arch/parisc/include/asm/uaccess.h | ||
156 | +++ b/arch/parisc/include/asm/uaccess.h | ||
157 | @@ -68,6 +68,15 @@ struct exception_table_entry { | ||
158 | ".previous\n" | ||
159 | |||
160 | /* | ||
161 | + * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry | ||
162 | + * (with lowest bit set) for which the fault handler in fixup_exception() will | ||
163 | + * load -EFAULT into %r8 for a read or write fault, and zeroes the target | ||
164 | + * register in case of a read fault in get_user(). | ||
165 | + */ | ||
166 | +#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\ | ||
167 | + ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1) | ||
168 | + | ||
169 | +/* | ||
170 | * The page fault handler stores, in a per-cpu area, the following information | ||
171 | * if a fixup routine is available. | ||
172 | */ | ||
173 | @@ -94,7 +103,7 @@ struct exception_data { | ||
174 | #define __get_user(x, ptr) \ | ||
175 | ({ \ | ||
176 | register long __gu_err __asm__ ("r8") = 0; \ | ||
177 | - register long __gu_val __asm__ ("r9") = 0; \ | ||
178 | + register long __gu_val; \ | ||
179 | \ | ||
180 | load_sr2(); \ | ||
181 | switch (sizeof(*(ptr))) { \ | ||
182 | @@ -110,22 +119,23 @@ struct exception_data { | ||
183 | }) | ||
184 | |||
185 | #define __get_user_asm(ldx, ptr) \ | ||
186 | - __asm__("\n1:\t" ldx "\t0(%%sr2,%2),%0\n\t" \ | ||
187 | - ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_1)\ | ||
188 | + __asm__("1: " ldx " 0(%%sr2,%2),%0\n" \ | ||
189 | + "9:\n" \ | ||
190 | + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ | ||
191 | : "=r"(__gu_val), "=r"(__gu_err) \ | ||
192 | - : "r"(ptr), "1"(__gu_err) \ | ||
193 | - : "r1"); | ||
194 | + : "r"(ptr), "1"(__gu_err)); | ||
195 | |||
196 | #if !defined(CONFIG_64BIT) | ||
197 | |||
198 | #define __get_user_asm64(ptr) \ | ||
199 | - __asm__("\n1:\tldw 0(%%sr2,%2),%0" \ | ||
200 | - "\n2:\tldw 4(%%sr2,%2),%R0\n\t" \ | ||
201 | - ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_2)\ | ||
202 | - ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_get_user_skip_1)\ | ||
203 | + __asm__(" copy %%r0,%R0\n" \ | ||
204 | + "1: ldw 0(%%sr2,%2),%0\n" \ | ||
205 | + "2: ldw 4(%%sr2,%2),%R0\n" \ | ||
206 | + "9:\n" \ | ||
207 | + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ | ||
208 | + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ | ||
209 | : "=r"(__gu_val), "=r"(__gu_err) \ | ||
210 | - : "r"(ptr), "1"(__gu_err) \ | ||
211 | - : "r1"); | ||
212 | + : "r"(ptr), "1"(__gu_err)); | ||
213 | |||
214 | #endif /* !defined(CONFIG_64BIT) */ | ||
215 | |||
216 | @@ -151,32 +161,31 @@ struct exception_data { | ||
217 | * The "__put_user/kernel_asm()" macros tell gcc they read from memory | ||
218 | * instead of writing. This is because they do not write to any memory | ||
219 | * gcc knows about, so there are no aliasing issues. These macros must | ||
220 | - * also be aware that "fixup_put_user_skip_[12]" are executed in the | ||
221 | - * context of the fault, and any registers used there must be listed | ||
222 | - * as clobbers. In this case only "r1" is used by the current routines. | ||
223 | - * r8/r9 are already listed as err/val. | ||
224 | + * also be aware that fixups are executed in the context of the fault, | ||
225 | + * and any registers used there must be listed as clobbers. | ||
226 | + * r8 is already listed as err. | ||
227 | */ | ||
228 | |||
229 | #define __put_user_asm(stx, x, ptr) \ | ||
230 | __asm__ __volatile__ ( \ | ||
231 | - "\n1:\t" stx "\t%2,0(%%sr2,%1)\n\t" \ | ||
232 | - ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_1)\ | ||
233 | + "1: " stx " %2,0(%%sr2,%1)\n" \ | ||
234 | + "9:\n" \ | ||
235 | + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ | ||
236 | : "=r"(__pu_err) \ | ||
237 | - : "r"(ptr), "r"(x), "0"(__pu_err) \ | ||
238 | - : "r1") | ||
239 | + : "r"(ptr), "r"(x), "0"(__pu_err)) | ||
240 | |||
241 | |||
242 | #if !defined(CONFIG_64BIT) | ||
243 | |||
244 | #define __put_user_asm64(__val, ptr) do { \ | ||
245 | __asm__ __volatile__ ( \ | ||
246 | - "\n1:\tstw %2,0(%%sr2,%1)" \ | ||
247 | - "\n2:\tstw %R2,4(%%sr2,%1)\n\t" \ | ||
248 | - ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_2)\ | ||
249 | - ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_put_user_skip_1)\ | ||
250 | + "1: stw %2,0(%%sr2,%1)\n" \ | ||
251 | + "2: stw %R2,4(%%sr2,%1)\n" \ | ||
252 | + "9:\n" \ | ||
253 | + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ | ||
254 | + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ | ||
255 | : "=r"(__pu_err) \ | ||
256 | - : "r"(ptr), "r"(__val), "0"(__pu_err) \ | ||
257 | - : "r1"); \ | ||
258 | + : "r"(ptr), "r"(__val), "0"(__pu_err)); \ | ||
259 | } while (0) | ||
260 | |||
261 | #endif /* !defined(CONFIG_64BIT) */ | ||
262 | diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c | ||
263 | index 3cad8aadc69e..4e6f0d93154f 100644 | ||
264 | --- a/arch/parisc/kernel/parisc_ksyms.c | ||
265 | +++ b/arch/parisc/kernel/parisc_ksyms.c | ||
266 | @@ -47,16 +47,6 @@ EXPORT_SYMBOL(__cmpxchg_u64); | ||
267 | EXPORT_SYMBOL(lclear_user); | ||
268 | EXPORT_SYMBOL(lstrnlen_user); | ||
269 | |||
270 | -/* Global fixups - defined as int to avoid creation of function pointers */ | ||
271 | -extern int fixup_get_user_skip_1; | ||
272 | -extern int fixup_get_user_skip_2; | ||
273 | -extern int fixup_put_user_skip_1; | ||
274 | -extern int fixup_put_user_skip_2; | ||
275 | -EXPORT_SYMBOL(fixup_get_user_skip_1); | ||
276 | -EXPORT_SYMBOL(fixup_get_user_skip_2); | ||
277 | -EXPORT_SYMBOL(fixup_put_user_skip_1); | ||
278 | -EXPORT_SYMBOL(fixup_put_user_skip_2); | ||
279 | - | ||
280 | #ifndef CONFIG_64BIT | ||
281 | /* Needed so insmod can set dp value */ | ||
282 | extern int $global$; | ||
283 | diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c | ||
284 | index e81afc378850..e7ffde2758fc 100644 | ||
285 | --- a/arch/parisc/kernel/process.c | ||
286 | +++ b/arch/parisc/kernel/process.c | ||
287 | @@ -140,6 +140,8 @@ void machine_power_off(void) | ||
288 | printk(KERN_EMERG "System shut down completed.\n" | ||
289 | "Please power this system off now."); | ||
290 | |||
291 | + /* prevent soft lockup/stalled CPU messages for endless loop. */ | ||
292 | + rcu_sysrq_start(); | ||
293 | for (;;); | ||
294 | } | ||
295 | |||
296 | diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile | ||
297 | index 8fa92b8d839a..f2dac4d73b1b 100644 | ||
298 | --- a/arch/parisc/lib/Makefile | ||
299 | +++ b/arch/parisc/lib/Makefile | ||
300 | @@ -2,7 +2,7 @@ | ||
301 | # Makefile for parisc-specific library files | ||
302 | # | ||
303 | |||
304 | -lib-y := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o \ | ||
305 | +lib-y := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \ | ||
306 | ucmpdi2.o delay.o | ||
307 | |||
308 | obj-y := iomap.o | ||
309 | diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S | ||
310 | deleted file mode 100644 | ||
311 | index a5b72f22c7a6..000000000000 | ||
312 | --- a/arch/parisc/lib/fixup.S | ||
313 | +++ /dev/null | ||
314 | @@ -1,98 +0,0 @@ | ||
315 | -/* | ||
316 | - * Linux/PA-RISC Project (http://www.parisc-linux.org/) | ||
317 | - * | ||
318 | - * Copyright (C) 2004 Randolph Chung <tausq@debian.org> | ||
319 | - * | ||
320 | - * This program is free software; you can redistribute it and/or modify | ||
321 | - * it under the terms of the GNU General Public License as published by | ||
322 | - * the Free Software Foundation; either version 2, or (at your option) | ||
323 | - * any later version. | ||
324 | - * | ||
325 | - * This program is distributed in the hope that it will be useful, | ||
326 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
327 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
328 | - * GNU General Public License for more details. | ||
329 | - * | ||
330 | - * You should have received a copy of the GNU General Public License | ||
331 | - * along with this program; if not, write to the Free Software | ||
332 | - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
333 | - * | ||
334 | - * Fixup routines for kernel exception handling. | ||
335 | - */ | ||
336 | -#include <asm/asm-offsets.h> | ||
337 | -#include <asm/assembly.h> | ||
338 | -#include <asm/errno.h> | ||
339 | -#include <linux/linkage.h> | ||
340 | - | ||
341 | -#ifdef CONFIG_SMP | ||
342 | - .macro get_fault_ip t1 t2 | ||
343 | - loadgp | ||
344 | - addil LT%__per_cpu_offset,%r27 | ||
345 | - LDREG RT%__per_cpu_offset(%r1),\t1 | ||
346 | - /* t2 = smp_processor_id() */ | ||
347 | - mfctl 30,\t2 | ||
348 | - ldw TI_CPU(\t2),\t2 | ||
349 | -#ifdef CONFIG_64BIT | ||
350 | - extrd,u \t2,63,32,\t2 | ||
351 | -#endif | ||
352 | - /* t2 = &__per_cpu_offset[smp_processor_id()]; */ | ||
353 | - LDREGX \t2(\t1),\t2 | ||
354 | - addil LT%exception_data,%r27 | ||
355 | - LDREG RT%exception_data(%r1),\t1 | ||
356 | - /* t1 = this_cpu_ptr(&exception_data) */ | ||
357 | - add,l \t1,\t2,\t1 | ||
358 | - /* %r27 = t1->fault_gp - restore gp */ | ||
359 | - LDREG EXCDATA_GP(\t1), %r27 | ||
360 | - /* t1 = t1->fault_ip */ | ||
361 | - LDREG EXCDATA_IP(\t1), \t1 | ||
362 | - .endm | ||
363 | -#else | ||
364 | - .macro get_fault_ip t1 t2 | ||
365 | - loadgp | ||
366 | - /* t1 = this_cpu_ptr(&exception_data) */ | ||
367 | - addil LT%exception_data,%r27 | ||
368 | - LDREG RT%exception_data(%r1),\t2 | ||
369 | - /* %r27 = t2->fault_gp - restore gp */ | ||
370 | - LDREG EXCDATA_GP(\t2), %r27 | ||
371 | - /* t1 = t2->fault_ip */ | ||
372 | - LDREG EXCDATA_IP(\t2), \t1 | ||
373 | - .endm | ||
374 | -#endif | ||
375 | - | ||
376 | - .level LEVEL | ||
377 | - | ||
378 | - .text | ||
379 | - .section .fixup, "ax" | ||
380 | - | ||
381 | - /* get_user() fixups, store -EFAULT in r8, and 0 in r9 */ | ||
382 | -ENTRY_CFI(fixup_get_user_skip_1) | ||
383 | - get_fault_ip %r1,%r8 | ||
384 | - ldo 4(%r1), %r1 | ||
385 | - ldi -EFAULT, %r8 | ||
386 | - bv %r0(%r1) | ||
387 | - copy %r0, %r9 | ||
388 | -ENDPROC_CFI(fixup_get_user_skip_1) | ||
389 | - | ||
390 | -ENTRY_CFI(fixup_get_user_skip_2) | ||
391 | - get_fault_ip %r1,%r8 | ||
392 | - ldo 8(%r1), %r1 | ||
393 | - ldi -EFAULT, %r8 | ||
394 | - bv %r0(%r1) | ||
395 | - copy %r0, %r9 | ||
396 | -ENDPROC_CFI(fixup_get_user_skip_2) | ||
397 | - | ||
398 | - /* put_user() fixups, store -EFAULT in r8 */ | ||
399 | -ENTRY_CFI(fixup_put_user_skip_1) | ||
400 | - get_fault_ip %r1,%r8 | ||
401 | - ldo 4(%r1), %r1 | ||
402 | - bv %r0(%r1) | ||
403 | - ldi -EFAULT, %r8 | ||
404 | -ENDPROC_CFI(fixup_put_user_skip_1) | ||
405 | - | ||
406 | -ENTRY_CFI(fixup_put_user_skip_2) | ||
407 | - get_fault_ip %r1,%r8 | ||
408 | - ldo 8(%r1), %r1 | ||
409 | - bv %r0(%r1) | ||
410 | - ldi -EFAULT, %r8 | ||
411 | -ENDPROC_CFI(fixup_put_user_skip_2) | ||
412 | - | ||
413 | diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S | ||
414 | index 56845de6b5df..f01188c044ee 100644 | ||
415 | --- a/arch/parisc/lib/lusercopy.S | ||
416 | +++ b/arch/parisc/lib/lusercopy.S | ||
417 | @@ -5,6 +5,8 @@ | ||
418 | * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org> | ||
419 | * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr> | ||
420 | * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org> | ||
421 | + * Copyright (C) 2017 Helge Deller <deller@gmx.de> | ||
422 | + * Copyright (C) 2017 John David Anglin <dave.anglin@bell.net> | ||
423 | * | ||
424 | * | ||
425 | * This program is free software; you can redistribute it and/or modify | ||
426 | @@ -132,4 +134,320 @@ ENDPROC_CFI(lstrnlen_user) | ||
427 | |||
428 | .procend | ||
429 | |||
430 | + | ||
431 | + | ||
432 | +/* | ||
433 | + * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) | ||
434 | + * | ||
435 | + * Inputs: | ||
436 | + * - sr1 already contains space of source region | ||
437 | + * - sr2 already contains space of destination region | ||
438 | + * | ||
439 | + * Returns: | ||
440 | + * - number of bytes that could not be copied. | ||
441 | + * On success, this will be zero. | ||
442 | + * | ||
443 | + * This code is based on a C-implementation of a copy routine written by | ||
444 | + * Randolph Chung, which in turn was derived from the glibc. | ||
445 | + * | ||
446 | + * Several strategies are tried to try to get the best performance for various | ||
447 | + * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes | ||
448 | + * at a time using general registers. Unaligned copies are handled either by | ||
449 | + * aligning the destination and then using shift-and-write method, or in a few | ||
450 | + * cases by falling back to a byte-at-a-time copy. | ||
451 | + * | ||
452 | + * Testing with various alignments and buffer sizes shows that this code is | ||
453 | + * often >10x faster than a simple byte-at-a-time copy, even for strangely | ||
454 | + * aligned operands. It is interesting to note that the glibc version of memcpy | ||
455 | + * (written in C) is actually quite fast already. This routine is able to beat | ||
456 | + * it by 30-40% for aligned copies because of the loop unrolling, but in some | ||
457 | + * cases the glibc version is still slightly faster. This lends more | ||
458 | + * credibility that gcc can generate very good code as long as we are careful. | ||
459 | + * | ||
460 | + * Possible optimizations: | ||
461 | + * - add cache prefetching | ||
462 | + * - try not to use the post-increment address modifiers; they may create | ||
463 | + * additional interlocks. Assumption is that those were only efficient on old | ||
464 | + * machines (pre PA8000 processors) | ||
465 | + */ | ||
466 | + | ||
467 | + dst = arg0 | ||
468 | + src = arg1 | ||
469 | + len = arg2 | ||
470 | + end = arg3 | ||
471 | + t1 = r19 | ||
472 | + t2 = r20 | ||
473 | + t3 = r21 | ||
474 | + t4 = r22 | ||
475 | + srcspc = sr1 | ||
476 | + dstspc = sr2 | ||
477 | + | ||
478 | + t0 = r1 | ||
479 | + a1 = t1 | ||
480 | + a2 = t2 | ||
481 | + a3 = t3 | ||
482 | + a0 = t4 | ||
483 | + | ||
484 | + save_src = ret0 | ||
485 | + save_dst = ret1 | ||
486 | + save_len = r31 | ||
487 | + | ||
488 | +ENTRY_CFI(pa_memcpy) | ||
489 | + .proc | ||
490 | + .callinfo NO_CALLS | ||
491 | + .entry | ||
492 | + | ||
493 | + /* Last destination address */ | ||
494 | + add dst,len,end | ||
495 | + | ||
496 | + /* short copy with less than 16 bytes? */ | ||
497 | + cmpib,>>=,n 15,len,.Lbyte_loop | ||
498 | + | ||
499 | + /* same alignment? */ | ||
500 | + xor src,dst,t0 | ||
501 | + extru t0,31,2,t1 | ||
502 | + cmpib,<>,n 0,t1,.Lunaligned_copy | ||
503 | + | ||
504 | +#ifdef CONFIG_64BIT | ||
505 | + /* only do 64-bit copies if we can get aligned. */ | ||
506 | + extru t0,31,3,t1 | ||
507 | + cmpib,<>,n 0,t1,.Lalign_loop32 | ||
508 | + | ||
509 | + /* loop until we are 64-bit aligned */ | ||
510 | +.Lalign_loop64: | ||
511 | + extru dst,31,3,t1 | ||
512 | + cmpib,=,n 0,t1,.Lcopy_loop_16 | ||
513 | +20: ldb,ma 1(srcspc,src),t1 | ||
514 | +21: stb,ma t1,1(dstspc,dst) | ||
515 | + b .Lalign_loop64 | ||
516 | + ldo -1(len),len | ||
517 | + | ||
518 | + ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) | ||
519 | + ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) | ||
520 | + | ||
521 | + ldi 31,t0 | ||
522 | +.Lcopy_loop_16: | ||
523 | + cmpb,COND(>>=),n t0,len,.Lword_loop | ||
524 | + | ||
525 | +10: ldd 0(srcspc,src),t1 | ||
526 | +11: ldd 8(srcspc,src),t2 | ||
527 | + ldo 16(src),src | ||
528 | +12: std,ma t1,8(dstspc,dst) | ||
529 | +13: std,ma t2,8(dstspc,dst) | ||
530 | +14: ldd 0(srcspc,src),t1 | ||
531 | +15: ldd 8(srcspc,src),t2 | ||
532 | + ldo 16(src),src | ||
533 | +16: std,ma t1,8(dstspc,dst) | ||
534 | +17: std,ma t2,8(dstspc,dst) | ||
535 | + | ||
536 | + ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) | ||
537 | + ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault) | ||
538 | + ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) | ||
539 | + ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) | ||
540 | + ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) | ||
541 | + ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault) | ||
542 | + ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) | ||
543 | + ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) | ||
544 | + | ||
545 | + b .Lcopy_loop_16 | ||
546 | + ldo -32(len),len | ||
547 | + | ||
548 | +.Lword_loop: | ||
549 | + cmpib,COND(>>=),n 3,len,.Lbyte_loop | ||
550 | +20: ldw,ma 4(srcspc,src),t1 | ||
551 | +21: stw,ma t1,4(dstspc,dst) | ||
552 | + b .Lword_loop | ||
553 | + ldo -4(len),len | ||
554 | + | ||
555 | + ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) | ||
556 | + ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) | ||
557 | + | ||
558 | +#endif /* CONFIG_64BIT */ | ||
559 | + | ||
560 | + /* loop until we are 32-bit aligned */ | ||
561 | +.Lalign_loop32: | ||
562 | + extru dst,31,2,t1 | ||
563 | + cmpib,=,n 0,t1,.Lcopy_loop_4 | ||
564 | +20: ldb,ma 1(srcspc,src),t1 | ||
565 | +21: stb,ma t1,1(dstspc,dst) | ||
566 | + b .Lalign_loop32 | ||
567 | + ldo -1(len),len | ||
568 | + | ||
569 | + ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) | ||
570 | + ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) | ||
571 | + | ||
572 | + | ||
573 | +.Lcopy_loop_4: | ||
574 | + cmpib,COND(>>=),n 15,len,.Lbyte_loop | ||
575 | + | ||
576 | +10: ldw 0(srcspc,src),t1 | ||
577 | +11: ldw 4(srcspc,src),t2 | ||
578 | +12: stw,ma t1,4(dstspc,dst) | ||
579 | +13: stw,ma t2,4(dstspc,dst) | ||
580 | +14: ldw 8(srcspc,src),t1 | ||
581 | +15: ldw 12(srcspc,src),t2 | ||
582 | + ldo 16(src),src | ||
583 | +16: stw,ma t1,4(dstspc,dst) | ||
584 | +17: stw,ma t2,4(dstspc,dst) | ||
585 | + | ||
586 | + ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) | ||
587 | + ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault) | ||
588 | + ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) | ||
589 | + ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) | ||
590 | + ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) | ||
591 | + ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault) | ||
592 | + ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) | ||
593 | + ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) | ||
594 | + | ||
595 | + b .Lcopy_loop_4 | ||
596 | + ldo -16(len),len | ||
597 | + | ||
598 | +.Lbyte_loop: | ||
599 | + cmpclr,COND(<>) len,%r0,%r0 | ||
600 | + b,n .Lcopy_done | ||
601 | +20: ldb 0(srcspc,src),t1 | ||
602 | + ldo 1(src),src | ||
603 | +21: stb,ma t1,1(dstspc,dst) | ||
604 | + b .Lbyte_loop | ||
605 | + ldo -1(len),len | ||
606 | + | ||
607 | + ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) | ||
608 | + ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) | ||
609 | + | ||
610 | +.Lcopy_done: | ||
611 | + bv %r0(%r2) | ||
612 | + sub end,dst,ret0 | ||
613 | + | ||
614 | + | ||
615 | + /* src and dst are not aligned the same way. */ | ||
616 | + /* need to go the hard way */ | ||
617 | +.Lunaligned_copy: | ||
618 | + /* align until dst is 32bit-word-aligned */ | ||
619 | + extru dst,31,2,t1 | ||
620 | + cmpib,COND(=),n 0,t1,.Lcopy_dstaligned | ||
621 | +20: ldb 0(srcspc,src),t1 | ||
622 | + ldo 1(src),src | ||
623 | +21: stb,ma t1,1(dstspc,dst) | ||
624 | + b .Lunaligned_copy | ||
625 | + ldo -1(len),len | ||
626 | + | ||
627 | + ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) | ||
628 | + ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) | ||
629 | + | ||
630 | +.Lcopy_dstaligned: | ||
631 | + | ||
632 | + /* store src, dst and len in safe place */ | ||
633 | + copy src,save_src | ||
634 | + copy dst,save_dst | ||
635 | + copy len,save_len | ||
636 | + | ||
637 | + /* len now needs give number of words to copy */ | ||
638 | + SHRREG len,2,len | ||
639 | + | ||
640 | + /* | ||
641 | + * Copy from a not-aligned src to an aligned dst using shifts. | ||
642 | + * Handles 4 words per loop. | ||
643 | + */ | ||
644 | + | ||
645 | + depw,z src,28,2,t0 | ||
646 | + subi 32,t0,t0 | ||
647 | + mtsar t0 | ||
648 | + extru len,31,2,t0 | ||
649 | + cmpib,= 2,t0,.Lcase2 | ||
650 | + /* Make src aligned by rounding it down. */ | ||
651 | + depi 0,31,2,src | ||
652 | + | ||
653 | + cmpiclr,<> 3,t0,%r0 | ||
654 | + b,n .Lcase3 | ||
655 | + cmpiclr,<> 1,t0,%r0 | ||
656 | + b,n .Lcase1 | ||
657 | +.Lcase0: | ||
658 | + cmpb,= %r0,len,.Lcda_finish | ||
659 | + nop | ||
660 | + | ||
661 | +1: ldw,ma 4(srcspc,src), a3 | ||
662 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) | ||
663 | +1: ldw,ma 4(srcspc,src), a0 | ||
664 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) | ||
665 | + b,n .Ldo3 | ||
666 | +.Lcase1: | ||
667 | +1: ldw,ma 4(srcspc,src), a2 | ||
668 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) | ||
669 | +1: ldw,ma 4(srcspc,src), a3 | ||
670 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) | ||
671 | + ldo -1(len),len | ||
672 | + cmpb,=,n %r0,len,.Ldo0 | ||
673 | +.Ldo4: | ||
674 | +1: ldw,ma 4(srcspc,src), a0 | ||
675 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) | ||
676 | + shrpw a2, a3, %sar, t0 | ||
677 | +1: stw,ma t0, 4(dstspc,dst) | ||
678 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) | ||
679 | +.Ldo3: | ||
680 | +1: ldw,ma 4(srcspc,src), a1 | ||
681 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) | ||
682 | + shrpw a3, a0, %sar, t0 | ||
683 | +1: stw,ma t0, 4(dstspc,dst) | ||
684 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) | ||
685 | +.Ldo2: | ||
686 | +1: ldw,ma 4(srcspc,src), a2 | ||
687 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) | ||
688 | + shrpw a0, a1, %sar, t0 | ||
689 | +1: stw,ma t0, 4(dstspc,dst) | ||
690 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) | ||
691 | +.Ldo1: | ||
692 | +1: ldw,ma 4(srcspc,src), a3 | ||
693 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) | ||
694 | + shrpw a1, a2, %sar, t0 | ||
695 | +1: stw,ma t0, 4(dstspc,dst) | ||
696 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) | ||
697 | + ldo -4(len),len | ||
698 | + cmpb,<> %r0,len,.Ldo4 | ||
699 | + nop | ||
700 | +.Ldo0: | ||
701 | + shrpw a2, a3, %sar, t0 | ||
702 | +1: stw,ma t0, 4(dstspc,dst) | ||
703 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) | ||
704 | + | ||
705 | +.Lcda_rdfault: | ||
706 | +.Lcda_finish: | ||
707 | + /* calculate new src, dst and len and jump to byte-copy loop */ | ||
708 | + sub dst,save_dst,t0 | ||
709 | + add save_src,t0,src | ||
710 | + b .Lbyte_loop | ||
711 | + sub save_len,t0,len | ||
712 | + | ||
713 | +.Lcase3: | ||
714 | +1: ldw,ma 4(srcspc,src), a0 | ||
715 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) | ||
716 | +1: ldw,ma 4(srcspc,src), a1 | ||
717 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) | ||
718 | + b .Ldo2 | ||
719 | + ldo 1(len),len | ||
720 | +.Lcase2: | ||
721 | +1: ldw,ma 4(srcspc,src), a1 | ||
722 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) | ||
723 | +1: ldw,ma 4(srcspc,src), a2 | ||
724 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) | ||
725 | + b .Ldo1 | ||
726 | + ldo 2(len),len | ||
727 | + | ||
728 | + | ||
729 | + /* fault exception fixup handlers: */ | ||
730 | +#ifdef CONFIG_64BIT | ||
731 | +.Lcopy16_fault: | ||
732 | +10: b .Lcopy_done | ||
733 | + std,ma t1,8(dstspc,dst) | ||
734 | + ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) | ||
735 | +#endif | ||
736 | + | ||
737 | +.Lcopy8_fault: | ||
738 | +10: b .Lcopy_done | ||
739 | + stw,ma t1,4(dstspc,dst) | ||
740 | + ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) | ||
741 | + | ||
742 | + .exit | ||
743 | +ENDPROC_CFI(pa_memcpy) | ||
744 | + .procend | ||
745 | + | ||
746 | .end | ||
747 | diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c | ||
748 | index f82ff10ed974..b3d47ec1d80a 100644 | ||
749 | --- a/arch/parisc/lib/memcpy.c | ||
750 | +++ b/arch/parisc/lib/memcpy.c | ||
751 | @@ -2,7 +2,7 @@ | ||
752 | * Optimized memory copy routines. | ||
753 | * | ||
754 | * Copyright (C) 2004 Randolph Chung <tausq@debian.org> | ||
755 | - * Copyright (C) 2013 Helge Deller <deller@gmx.de> | ||
756 | + * Copyright (C) 2013-2017 Helge Deller <deller@gmx.de> | ||
757 | * | ||
758 | * This program is free software; you can redistribute it and/or modify | ||
759 | * it under the terms of the GNU General Public License as published by | ||
760 | @@ -21,474 +21,21 @@ | ||
761 | * Portions derived from the GNU C Library | ||
762 | * Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc. | ||
763 | * | ||
764 | - * Several strategies are tried to try to get the best performance for various | ||
765 | - * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using | ||
766 | - * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using | ||
767 | - * general registers. Unaligned copies are handled either by aligning the | ||
768 | - * destination and then using shift-and-write method, or in a few cases by | ||
769 | - * falling back to a byte-at-a-time copy. | ||
770 | - * | ||
771 | - * I chose to implement this in C because it is easier to maintain and debug, | ||
772 | - * and in my experiments it appears that the C code generated by gcc (3.3/3.4 | ||
773 | - * at the time of writing) is fairly optimal. Unfortunately some of the | ||
774 | - * semantics of the copy routine (exception handling) is difficult to express | ||
775 | - * in C, so we have to play some tricks to get it to work. | ||
776 | - * | ||
777 | - * All the loads and stores are done via explicit asm() code in order to use | ||
778 | - * the right space registers. | ||
779 | - * | ||
780 | - * Testing with various alignments and buffer sizes shows that this code is | ||
781 | - * often >10x faster than a simple byte-at-a-time copy, even for strangely | ||
782 | - * aligned operands. It is interesting to note that the glibc version | ||
783 | - * of memcpy (written in C) is actually quite fast already. This routine is | ||
784 | - * able to beat it by 30-40% for aligned copies because of the loop unrolling, | ||
785 | - * but in some cases the glibc version is still slightly faster. This lends | ||
786 | - * more credibility that gcc can generate very good code as long as we are | ||
787 | - * careful. | ||
788 | - * | ||
789 | - * TODO: | ||
790 | - * - cache prefetching needs more experimentation to get optimal settings | ||
791 | - * - try not to use the post-increment address modifiers; they create additional | ||
792 | - * interlocks | ||
793 | - * - replace byte-copy loops with stybs sequences | ||
794 | */ | ||
795 | |||
796 | -#ifdef __KERNEL__ | ||
797 | #include <linux/module.h> | ||
798 | #include <linux/compiler.h> | ||
799 | #include <linux/uaccess.h> | ||
800 | -#define s_space "%%sr1" | ||
801 | -#define d_space "%%sr2" | ||
802 | -#else | ||
803 | -#include "memcpy.h" | ||
804 | -#define s_space "%%sr0" | ||
805 | -#define d_space "%%sr0" | ||
806 | -#define pa_memcpy new2_copy | ||
807 | -#endif | ||
808 | |||
809 | DECLARE_PER_CPU(struct exception_data, exception_data); | ||
810 | |||
811 | -#define preserve_branch(label) do { \ | ||
812 | - volatile int dummy = 0; \ | ||
813 | - /* The following branch is never taken, it's just here to */ \ | ||
814 | - /* prevent gcc from optimizing away our exception code. */ \ | ||
815 | - if (unlikely(dummy != dummy)) \ | ||
816 | - goto label; \ | ||
817 | -} while (0) | ||
818 | - | ||
819 | #define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3)) | ||
820 | #define get_kernel_space() (0) | ||
821 | |||
822 | -#define MERGE(w0, sh_1, w1, sh_2) ({ \ | ||
823 | - unsigned int _r; \ | ||
824 | - asm volatile ( \ | ||
825 | - "mtsar %3\n" \ | ||
826 | - "shrpw %1, %2, %%sar, %0\n" \ | ||
827 | - : "=r"(_r) \ | ||
828 | - : "r"(w0), "r"(w1), "r"(sh_2) \ | ||
829 | - ); \ | ||
830 | - _r; \ | ||
831 | -}) | ||
832 | -#define THRESHOLD 16 | ||
833 | - | ||
834 | -#ifdef DEBUG_MEMCPY | ||
835 | -#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __func__ ); printk(KERN_DEBUG fmt, ##args ); } while (0) | ||
836 | -#else | ||
837 | -#define DPRINTF(fmt, args...) | ||
838 | -#endif | ||
839 | - | ||
840 | -#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \ | ||
841 | - __asm__ __volatile__ ( \ | ||
842 | - "1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n\t" \ | ||
843 | - ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ | ||
844 | - : _tt(_t), "+r"(_a) \ | ||
845 | - : \ | ||
846 | - : "r8") | ||
847 | - | ||
848 | -#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \ | ||
849 | - __asm__ __volatile__ ( \ | ||
850 | - "1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n\t" \ | ||
851 | - ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ | ||
852 | - : "+r"(_a) \ | ||
853 | - : _tt(_t) \ | ||
854 | - : "r8") | ||
855 | - | ||
856 | -#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e) | ||
857 | -#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e) | ||
858 | -#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e) | ||
859 | -#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e) | ||
860 | -#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e) | ||
861 | -#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e) | ||
862 | - | ||
863 | -#define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e) \ | ||
864 | - __asm__ __volatile__ ( \ | ||
865 | - "1:\t" #_insn " " #_o "(" _s ",%1), %0\n\t" \ | ||
866 | - ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ | ||
867 | - : _tt(_t) \ | ||
868 | - : "r"(_a) \ | ||
869 | - : "r8") | ||
870 | - | ||
871 | -#define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e) \ | ||
872 | - __asm__ __volatile__ ( \ | ||
873 | - "1:\t" #_insn " %0, " #_o "(" _s ",%1)\n\t" \ | ||
874 | - ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ | ||
875 | - : \ | ||
876 | - : _tt(_t), "r"(_a) \ | ||
877 | - : "r8") | ||
878 | - | ||
879 | -#define ldw(_s,_o,_a,_t,_e) def_load_insn(ldw,"=r",_s,_o,_a,_t,_e) | ||
880 | -#define stw(_s,_t,_o,_a,_e) def_store_insn(stw,"r",_s,_t,_o,_a,_e) | ||
881 | - | ||
882 | -#ifdef CONFIG_PREFETCH | ||
883 | -static inline void prefetch_src(const void *addr) | ||
884 | -{ | ||
885 | - __asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr)); | ||
886 | -} | ||
887 | - | ||
888 | -static inline void prefetch_dst(const void *addr) | ||
889 | -{ | ||
890 | - __asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr)); | ||
891 | -} | ||
892 | -#else | ||
893 | -#define prefetch_src(addr) do { } while(0) | ||
894 | -#define prefetch_dst(addr) do { } while(0) | ||
895 | -#endif | ||
896 | - | ||
897 | -#define PA_MEMCPY_OK 0 | ||
898 | -#define PA_MEMCPY_LOAD_ERROR 1 | ||
899 | -#define PA_MEMCPY_STORE_ERROR 2 | ||
900 | - | ||
901 | -/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words | ||
902 | - * per loop. This code is derived from glibc. | ||
903 | - */ | ||
904 | -static noinline unsigned long copy_dstaligned(unsigned long dst, | ||
905 | - unsigned long src, unsigned long len) | ||
906 | -{ | ||
907 | - /* gcc complains that a2 and a3 may be uninitialized, but actually | ||
908 | - * they cannot be. Initialize a2/a3 to shut gcc up. | ||
909 | - */ | ||
910 | - register unsigned int a0, a1, a2 = 0, a3 = 0; | ||
911 | - int sh_1, sh_2; | ||
912 | - | ||
913 | - /* prefetch_src((const void *)src); */ | ||
914 | - | ||
915 | - /* Calculate how to shift a word read at the memory operation | ||
916 | - aligned srcp to make it aligned for copy. */ | ||
917 | - sh_1 = 8 * (src % sizeof(unsigned int)); | ||
918 | - sh_2 = 8 * sizeof(unsigned int) - sh_1; | ||
919 | - | ||
920 | - /* Make src aligned by rounding it down. */ | ||
921 | - src &= -sizeof(unsigned int); | ||
922 | - | ||
923 | - switch (len % 4) | ||
924 | - { | ||
925 | - case 2: | ||
926 | - /* a1 = ((unsigned int *) src)[0]; | ||
927 | - a2 = ((unsigned int *) src)[1]; */ | ||
928 | - ldw(s_space, 0, src, a1, cda_ldw_exc); | ||
929 | - ldw(s_space, 4, src, a2, cda_ldw_exc); | ||
930 | - src -= 1 * sizeof(unsigned int); | ||
931 | - dst -= 3 * sizeof(unsigned int); | ||
932 | - len += 2; | ||
933 | - goto do1; | ||
934 | - case 3: | ||
935 | - /* a0 = ((unsigned int *) src)[0]; | ||
936 | - a1 = ((unsigned int *) src)[1]; */ | ||
937 | - ldw(s_space, 0, src, a0, cda_ldw_exc); | ||
938 | - ldw(s_space, 4, src, a1, cda_ldw_exc); | ||
939 | - src -= 0 * sizeof(unsigned int); | ||
940 | - dst -= 2 * sizeof(unsigned int); | ||
941 | - len += 1; | ||
942 | - goto do2; | ||
943 | - case 0: | ||
944 | - if (len == 0) | ||
945 | - return PA_MEMCPY_OK; | ||
946 | - /* a3 = ((unsigned int *) src)[0]; | ||
947 | - a0 = ((unsigned int *) src)[1]; */ | ||
948 | - ldw(s_space, 0, src, a3, cda_ldw_exc); | ||
949 | - ldw(s_space, 4, src, a0, cda_ldw_exc); | ||
950 | - src -=-1 * sizeof(unsigned int); | ||
951 | - dst -= 1 * sizeof(unsigned int); | ||
952 | - len += 0; | ||
953 | - goto do3; | ||
954 | - case 1: | ||
955 | - /* a2 = ((unsigned int *) src)[0]; | ||
956 | - a3 = ((unsigned int *) src)[1]; */ | ||
957 | - ldw(s_space, 0, src, a2, cda_ldw_exc); | ||
958 | - ldw(s_space, 4, src, a3, cda_ldw_exc); | ||
959 | - src -=-2 * sizeof(unsigned int); | ||
960 | - dst -= 0 * sizeof(unsigned int); | ||
961 | - len -= 1; | ||
962 | - if (len == 0) | ||
963 | - goto do0; | ||
964 | - goto do4; /* No-op. */ | ||
965 | - } | ||
966 | - | ||
967 | - do | ||
968 | - { | ||
969 | - /* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */ | ||
970 | -do4: | ||
971 | - /* a0 = ((unsigned int *) src)[0]; */ | ||
972 | - ldw(s_space, 0, src, a0, cda_ldw_exc); | ||
973 | - /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */ | ||
974 | - stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc); | ||
975 | -do3: | ||
976 | - /* a1 = ((unsigned int *) src)[1]; */ | ||
977 | - ldw(s_space, 4, src, a1, cda_ldw_exc); | ||
978 | - /* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */ | ||
979 | - stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc); | ||
980 | -do2: | ||
981 | - /* a2 = ((unsigned int *) src)[2]; */ | ||
982 | - ldw(s_space, 8, src, a2, cda_ldw_exc); | ||
983 | - /* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */ | ||
984 | - stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc); | ||
985 | -do1: | ||
986 | - /* a3 = ((unsigned int *) src)[3]; */ | ||
987 | - ldw(s_space, 12, src, a3, cda_ldw_exc); | ||
988 | - /* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */ | ||
989 | - stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc); | ||
990 | - | ||
991 | - src += 4 * sizeof(unsigned int); | ||
992 | - dst += 4 * sizeof(unsigned int); | ||
993 | - len -= 4; | ||
994 | - } | ||
995 | - while (len != 0); | ||
996 | - | ||
997 | -do0: | ||
998 | - /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */ | ||
999 | - stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc); | ||
1000 | - | ||
1001 | - preserve_branch(handle_load_error); | ||
1002 | - preserve_branch(handle_store_error); | ||
1003 | - | ||
1004 | - return PA_MEMCPY_OK; | ||
1005 | - | ||
1006 | -handle_load_error: | ||
1007 | - __asm__ __volatile__ ("cda_ldw_exc:\n"); | ||
1008 | - return PA_MEMCPY_LOAD_ERROR; | ||
1009 | - | ||
1010 | -handle_store_error: | ||
1011 | - __asm__ __volatile__ ("cda_stw_exc:\n"); | ||
1012 | - return PA_MEMCPY_STORE_ERROR; | ||
1013 | -} | ||
1014 | - | ||
1015 | - | ||
1016 | -/* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR. | ||
1017 | - * In case of an access fault the faulty address can be read from the per_cpu | ||
1018 | - * exception data struct. */ | ||
1019 | -static noinline unsigned long pa_memcpy_internal(void *dstp, const void *srcp, | ||
1020 | - unsigned long len) | ||
1021 | -{ | ||
1022 | - register unsigned long src, dst, t1, t2, t3; | ||
1023 | - register unsigned char *pcs, *pcd; | ||
1024 | - register unsigned int *pws, *pwd; | ||
1025 | - register double *pds, *pdd; | ||
1026 | - unsigned long ret; | ||
1027 | - | ||
1028 | - src = (unsigned long)srcp; | ||
1029 | - dst = (unsigned long)dstp; | ||
1030 | - pcs = (unsigned char *)srcp; | ||
1031 | - pcd = (unsigned char *)dstp; | ||
1032 | - | ||
1033 | - /* prefetch_src((const void *)srcp); */ | ||
1034 | - | ||
1035 | - if (len < THRESHOLD) | ||
1036 | - goto byte_copy; | ||
1037 | - | ||
1038 | - /* Check alignment */ | ||
1039 | - t1 = (src ^ dst); | ||
1040 | - if (unlikely(t1 & (sizeof(double)-1))) | ||
1041 | - goto unaligned_copy; | ||
1042 | - | ||
1043 | - /* src and dst have same alignment. */ | ||
1044 | - | ||
1045 | - /* Copy bytes till we are double-aligned. */ | ||
1046 | - t2 = src & (sizeof(double) - 1); | ||
1047 | - if (unlikely(t2 != 0)) { | ||
1048 | - t2 = sizeof(double) - t2; | ||
1049 | - while (t2 && len) { | ||
1050 | - /* *pcd++ = *pcs++; */ | ||
1051 | - ldbma(s_space, pcs, t3, pmc_load_exc); | ||
1052 | - len--; | ||
1053 | - stbma(d_space, t3, pcd, pmc_store_exc); | ||
1054 | - t2--; | ||
1055 | - } | ||
1056 | - } | ||
1057 | - | ||
1058 | - pds = (double *)pcs; | ||
1059 | - pdd = (double *)pcd; | ||
1060 | - | ||
1061 | -#if 0 | ||
1062 | - /* Copy 8 doubles at a time */ | ||
1063 | - while (len >= 8*sizeof(double)) { | ||
1064 | - register double r1, r2, r3, r4, r5, r6, r7, r8; | ||
1065 | - /* prefetch_src((char *)pds + L1_CACHE_BYTES); */ | ||
1066 | - flddma(s_space, pds, r1, pmc_load_exc); | ||
1067 | - flddma(s_space, pds, r2, pmc_load_exc); | ||
1068 | - flddma(s_space, pds, r3, pmc_load_exc); | ||
1069 | - flddma(s_space, pds, r4, pmc_load_exc); | ||
1070 | - fstdma(d_space, r1, pdd, pmc_store_exc); | ||
1071 | - fstdma(d_space, r2, pdd, pmc_store_exc); | ||
1072 | - fstdma(d_space, r3, pdd, pmc_store_exc); | ||
1073 | - fstdma(d_space, r4, pdd, pmc_store_exc); | ||
1074 | - | ||
1075 | -#if 0 | ||
1076 | - if (L1_CACHE_BYTES <= 32) | ||
1077 | - prefetch_src((char *)pds + L1_CACHE_BYTES); | ||
1078 | -#endif | ||
1079 | - flddma(s_space, pds, r5, pmc_load_exc); | ||
1080 | - flddma(s_space, pds, r6, pmc_load_exc); | ||
1081 | - flddma(s_space, pds, r7, pmc_load_exc); | ||
1082 | - flddma(s_space, pds, r8, pmc_load_exc); | ||
1083 | - fstdma(d_space, r5, pdd, pmc_store_exc); | ||
1084 | - fstdma(d_space, r6, pdd, pmc_store_exc); | ||
1085 | - fstdma(d_space, r7, pdd, pmc_store_exc); | ||
1086 | - fstdma(d_space, r8, pdd, pmc_store_exc); | ||
1087 | - len -= 8*sizeof(double); | ||
1088 | - } | ||
1089 | -#endif | ||
1090 | - | ||
1091 | - pws = (unsigned int *)pds; | ||
1092 | - pwd = (unsigned int *)pdd; | ||
1093 | - | ||
1094 | -word_copy: | ||
1095 | - while (len >= 8*sizeof(unsigned int)) { | ||
1096 | - register unsigned int r1,r2,r3,r4,r5,r6,r7,r8; | ||
1097 | - /* prefetch_src((char *)pws + L1_CACHE_BYTES); */ | ||
1098 | - ldwma(s_space, pws, r1, pmc_load_exc); | ||
1099 | - ldwma(s_space, pws, r2, pmc_load_exc); | ||
1100 | - ldwma(s_space, pws, r3, pmc_load_exc); | ||
1101 | - ldwma(s_space, pws, r4, pmc_load_exc); | ||
1102 | - stwma(d_space, r1, pwd, pmc_store_exc); | ||
1103 | - stwma(d_space, r2, pwd, pmc_store_exc); | ||
1104 | - stwma(d_space, r3, pwd, pmc_store_exc); | ||
1105 | - stwma(d_space, r4, pwd, pmc_store_exc); | ||
1106 | - | ||
1107 | - ldwma(s_space, pws, r5, pmc_load_exc); | ||
1108 | - ldwma(s_space, pws, r6, pmc_load_exc); | ||
1109 | - ldwma(s_space, pws, r7, pmc_load_exc); | ||
1110 | - ldwma(s_space, pws, r8, pmc_load_exc); | ||
1111 | - stwma(d_space, r5, pwd, pmc_store_exc); | ||
1112 | - stwma(d_space, r6, pwd, pmc_store_exc); | ||
1113 | - stwma(d_space, r7, pwd, pmc_store_exc); | ||
1114 | - stwma(d_space, r8, pwd, pmc_store_exc); | ||
1115 | - len -= 8*sizeof(unsigned int); | ||
1116 | - } | ||
1117 | - | ||
1118 | - while (len >= 4*sizeof(unsigned int)) { | ||
1119 | - register unsigned int r1,r2,r3,r4; | ||
1120 | - ldwma(s_space, pws, r1, pmc_load_exc); | ||
1121 | - ldwma(s_space, pws, r2, pmc_load_exc); | ||
1122 | - ldwma(s_space, pws, r3, pmc_load_exc); | ||
1123 | - ldwma(s_space, pws, r4, pmc_load_exc); | ||
1124 | - stwma(d_space, r1, pwd, pmc_store_exc); | ||
1125 | - stwma(d_space, r2, pwd, pmc_store_exc); | ||
1126 | - stwma(d_space, r3, pwd, pmc_store_exc); | ||
1127 | - stwma(d_space, r4, pwd, pmc_store_exc); | ||
1128 | - len -= 4*sizeof(unsigned int); | ||
1129 | - } | ||
1130 | - | ||
1131 | - pcs = (unsigned char *)pws; | ||
1132 | - pcd = (unsigned char *)pwd; | ||
1133 | - | ||
1134 | -byte_copy: | ||
1135 | - while (len) { | ||
1136 | - /* *pcd++ = *pcs++; */ | ||
1137 | - ldbma(s_space, pcs, t3, pmc_load_exc); | ||
1138 | - stbma(d_space, t3, pcd, pmc_store_exc); | ||
1139 | - len--; | ||
1140 | - } | ||
1141 | - | ||
1142 | - return PA_MEMCPY_OK; | ||
1143 | - | ||
1144 | -unaligned_copy: | ||
1145 | - /* possibly we are aligned on a word, but not on a double... */ | ||
1146 | - if (likely((t1 & (sizeof(unsigned int)-1)) == 0)) { | ||
1147 | - t2 = src & (sizeof(unsigned int) - 1); | ||
1148 | - | ||
1149 | - if (unlikely(t2 != 0)) { | ||
1150 | - t2 = sizeof(unsigned int) - t2; | ||
1151 | - while (t2) { | ||
1152 | - /* *pcd++ = *pcs++; */ | ||
1153 | - ldbma(s_space, pcs, t3, pmc_load_exc); | ||
1154 | - stbma(d_space, t3, pcd, pmc_store_exc); | ||
1155 | - len--; | ||
1156 | - t2--; | ||
1157 | - } | ||
1158 | - } | ||
1159 | - | ||
1160 | - pws = (unsigned int *)pcs; | ||
1161 | - pwd = (unsigned int *)pcd; | ||
1162 | - goto word_copy; | ||
1163 | - } | ||
1164 | - | ||
1165 | - /* Align the destination. */ | ||
1166 | - if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) { | ||
1167 | - t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1)); | ||
1168 | - while (t2) { | ||
1169 | - /* *pcd++ = *pcs++; */ | ||
1170 | - ldbma(s_space, pcs, t3, pmc_load_exc); | ||
1171 | - stbma(d_space, t3, pcd, pmc_store_exc); | ||
1172 | - len--; | ||
1173 | - t2--; | ||
1174 | - } | ||
1175 | - dst = (unsigned long)pcd; | ||
1176 | - src = (unsigned long)pcs; | ||
1177 | - } | ||
1178 | - | ||
1179 | - ret = copy_dstaligned(dst, src, len / sizeof(unsigned int)); | ||
1180 | - if (ret) | ||
1181 | - return ret; | ||
1182 | - | ||
1183 | - pcs += (len & -sizeof(unsigned int)); | ||
1184 | - pcd += (len & -sizeof(unsigned int)); | ||
1185 | - len %= sizeof(unsigned int); | ||
1186 | - | ||
1187 | - preserve_branch(handle_load_error); | ||
1188 | - preserve_branch(handle_store_error); | ||
1189 | - | ||
1190 | - goto byte_copy; | ||
1191 | - | ||
1192 | -handle_load_error: | ||
1193 | - __asm__ __volatile__ ("pmc_load_exc:\n"); | ||
1194 | - return PA_MEMCPY_LOAD_ERROR; | ||
1195 | - | ||
1196 | -handle_store_error: | ||
1197 | - __asm__ __volatile__ ("pmc_store_exc:\n"); | ||
1198 | - return PA_MEMCPY_STORE_ERROR; | ||
1199 | -} | ||
1200 | - | ||
1201 | - | ||
1202 | /* Returns 0 for success, otherwise, returns number of bytes not transferred. */ | ||
1203 | -static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) | ||
1204 | -{ | ||
1205 | - unsigned long ret, fault_addr, reference; | ||
1206 | - struct exception_data *d; | ||
1207 | - | ||
1208 | - ret = pa_memcpy_internal(dstp, srcp, len); | ||
1209 | - if (likely(ret == PA_MEMCPY_OK)) | ||
1210 | - return 0; | ||
1211 | - | ||
1212 | - /* if a load or store fault occured we can get the faulty addr */ | ||
1213 | - d = this_cpu_ptr(&exception_data); | ||
1214 | - fault_addr = d->fault_addr; | ||
1215 | - | ||
1216 | - /* error in load or store? */ | ||
1217 | - if (ret == PA_MEMCPY_LOAD_ERROR) | ||
1218 | - reference = (unsigned long) srcp; | ||
1219 | - else | ||
1220 | - reference = (unsigned long) dstp; | ||
1221 | +extern unsigned long pa_memcpy(void *dst, const void *src, | ||
1222 | + unsigned long len); | ||
1223 | |||
1224 | - DPRINTF("pa_memcpy: fault type = %lu, len=%lu fault_addr=%lu ref=%lu\n", | ||
1225 | - ret, len, fault_addr, reference); | ||
1226 | - | ||
1227 | - if (fault_addr >= reference) | ||
1228 | - return len - (fault_addr - reference); | ||
1229 | - else | ||
1230 | - return len; | ||
1231 | -} | ||
1232 | - | ||
1233 | -#ifdef __KERNEL__ | ||
1234 | unsigned long __copy_to_user(void __user *dst, const void *src, | ||
1235 | unsigned long len) | ||
1236 | { | ||
1237 | @@ -537,5 +84,3 @@ long probe_kernel_read(void *dst, const void *src, size_t size) | ||
1238 | |||
1239 | return __probe_kernel_read(dst, src, size); | ||
1240 | } | ||
1241 | - | ||
1242 | -#endif | ||
1243 | diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c | ||
1244 | index 1a0b4f63f0e9..040c48fc5391 100644 | ||
1245 | --- a/arch/parisc/mm/fault.c | ||
1246 | +++ b/arch/parisc/mm/fault.c | ||
1247 | @@ -149,6 +149,23 @@ int fixup_exception(struct pt_regs *regs) | ||
1248 | d->fault_space = regs->isr; | ||
1249 | d->fault_addr = regs->ior; | ||
1250 | |||
1251 | + /* | ||
1252 | + * Fix up get_user() and put_user(). | ||
1253 | + * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant | ||
1254 | + * bit in the relative address of the fixup routine to indicate | ||
1255 | + * that %r8 should be loaded with -EFAULT to report a userspace | ||
1256 | + * access error. | ||
1257 | + */ | ||
1258 | + if (fix->fixup & 1) { | ||
1259 | + regs->gr[8] = -EFAULT; | ||
1260 | + | ||
1261 | + /* zero target register for get_user() */ | ||
1262 | + if (parisc_acctyp(0, regs->iir) == VM_READ) { | ||
1263 | + int treg = regs->iir & 0x1f; | ||
1264 | + regs->gr[treg] = 0; | ||
1265 | + } | ||
1266 | + } | ||
1267 | + | ||
1268 | regs->iaoq[0] = (unsigned long)&fix->fixup + fix->fixup; | ||
1269 | regs->iaoq[0] &= ~3; | ||
1270 | /* | ||
1271 | diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S | ||
1272 | index 779782f58324..9a53a06e5a3e 100644 | ||
1273 | --- a/arch/x86/lib/memcpy_64.S | ||
1274 | +++ b/arch/x86/lib/memcpy_64.S | ||
1275 | @@ -290,7 +290,7 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) | ||
1276 | _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) | ||
1277 | _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) | ||
1278 | _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) | ||
1279 | - _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) | ||
1280 | + _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) | ||
1281 | _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) | ||
1282 | _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) | ||
1283 | _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) | ||
1284 | diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c | ||
1285 | index 887e57182716..aed206475aa7 100644 | ||
1286 | --- a/arch/x86/mm/kaslr.c | ||
1287 | +++ b/arch/x86/mm/kaslr.c | ||
1288 | @@ -48,7 +48,7 @@ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; | ||
1289 | #if defined(CONFIG_X86_ESPFIX64) | ||
1290 | static const unsigned long vaddr_end = ESPFIX_BASE_ADDR; | ||
1291 | #elif defined(CONFIG_EFI) | ||
1292 | -static const unsigned long vaddr_end = EFI_VA_START; | ||
1293 | +static const unsigned long vaddr_end = EFI_VA_END; | ||
1294 | #else | ||
1295 | static const unsigned long vaddr_end = __START_KERNEL_map; | ||
1296 | #endif | ||
1297 | @@ -105,7 +105,7 @@ void __init kernel_randomize_memory(void) | ||
1298 | */ | ||
1299 | BUILD_BUG_ON(vaddr_start >= vaddr_end); | ||
1300 | BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && | ||
1301 | - vaddr_end >= EFI_VA_START); | ||
1302 | + vaddr_end >= EFI_VA_END); | ||
1303 | BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) || | ||
1304 | IS_ENABLED(CONFIG_EFI)) && | ||
1305 | vaddr_end >= __START_KERNEL_map); | ||
1306 | diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c | ||
1307 | index f8960fca0827..9f21b0c5945d 100644 | ||
1308 | --- a/arch/x86/xen/setup.c | ||
1309 | +++ b/arch/x86/xen/setup.c | ||
1310 | @@ -713,10 +713,9 @@ static void __init xen_reserve_xen_mfnlist(void) | ||
1311 | size = PFN_PHYS(xen_start_info->nr_p2m_frames); | ||
1312 | } | ||
1313 | |||
1314 | - if (!xen_is_e820_reserved(start, size)) { | ||
1315 | - memblock_reserve(start, size); | ||
1316 | + memblock_reserve(start, size); | ||
1317 | + if (!xen_is_e820_reserved(start, size)) | ||
1318 | return; | ||
1319 | - } | ||
1320 | |||
1321 | #ifdef CONFIG_X86_32 | ||
1322 | /* | ||
1323 | @@ -727,6 +726,7 @@ static void __init xen_reserve_xen_mfnlist(void) | ||
1324 | BUG(); | ||
1325 | #else | ||
1326 | xen_relocate_p2m(); | ||
1327 | + memblock_free(start, size); | ||
1328 | #endif | ||
1329 | } | ||
1330 | |||
1331 | diff --git a/block/bio.c b/block/bio.c | ||
1332 | index db85c5753a76..655c9016052a 100644 | ||
1333 | --- a/block/bio.c | ||
1334 | +++ b/block/bio.c | ||
1335 | @@ -372,10 +372,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs) | ||
1336 | bio_list_init(&punt); | ||
1337 | bio_list_init(&nopunt); | ||
1338 | |||
1339 | - while ((bio = bio_list_pop(current->bio_list))) | ||
1340 | + while ((bio = bio_list_pop(¤t->bio_list[0]))) | ||
1341 | bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); | ||
1342 | + current->bio_list[0] = nopunt; | ||
1343 | |||
1344 | - *current->bio_list = nopunt; | ||
1345 | + bio_list_init(&nopunt); | ||
1346 | + while ((bio = bio_list_pop(¤t->bio_list[1]))) | ||
1347 | + bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); | ||
1348 | + current->bio_list[1] = nopunt; | ||
1349 | |||
1350 | spin_lock(&bs->rescue_lock); | ||
1351 | bio_list_merge(&bs->rescue_list, &punt); | ||
1352 | @@ -462,7 +466,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | ||
1353 | * we retry with the original gfp_flags. | ||
1354 | */ | ||
1355 | |||
1356 | - if (current->bio_list && !bio_list_empty(current->bio_list)) | ||
1357 | + if (current->bio_list && | ||
1358 | + (!bio_list_empty(¤t->bio_list[0]) || | ||
1359 | + !bio_list_empty(¤t->bio_list[1]))) | ||
1360 | gfp_mask &= ~__GFP_DIRECT_RECLAIM; | ||
1361 | |||
1362 | p = mempool_alloc(bs->bio_pool, gfp_mask); | ||
1363 | diff --git a/block/blk-core.c b/block/blk-core.c | ||
1364 | index 14d7c0740dc0..d1f2801ce836 100644 | ||
1365 | --- a/block/blk-core.c | ||
1366 | +++ b/block/blk-core.c | ||
1367 | @@ -1994,7 +1994,14 @@ generic_make_request_checks(struct bio *bio) | ||
1368 | */ | ||
1369 | blk_qc_t generic_make_request(struct bio *bio) | ||
1370 | { | ||
1371 | - struct bio_list bio_list_on_stack; | ||
1372 | + /* | ||
1373 | + * bio_list_on_stack[0] contains bios submitted by the current | ||
1374 | + * make_request_fn. | ||
1375 | + * bio_list_on_stack[1] contains bios that were submitted before | ||
1376 | + * the current make_request_fn, but that haven't been processed | ||
1377 | + * yet. | ||
1378 | + */ | ||
1379 | + struct bio_list bio_list_on_stack[2]; | ||
1380 | blk_qc_t ret = BLK_QC_T_NONE; | ||
1381 | |||
1382 | if (!generic_make_request_checks(bio)) | ||
1383 | @@ -2011,7 +2018,7 @@ blk_qc_t generic_make_request(struct bio *bio) | ||
1384 | * should be added at the tail | ||
1385 | */ | ||
1386 | if (current->bio_list) { | ||
1387 | - bio_list_add(current->bio_list, bio); | ||
1388 | + bio_list_add(¤t->bio_list[0], bio); | ||
1389 | goto out; | ||
1390 | } | ||
1391 | |||
1392 | @@ -2030,23 +2037,39 @@ blk_qc_t generic_make_request(struct bio *bio) | ||
1393 | * bio_list, and call into ->make_request() again. | ||
1394 | */ | ||
1395 | BUG_ON(bio->bi_next); | ||
1396 | - bio_list_init(&bio_list_on_stack); | ||
1397 | - current->bio_list = &bio_list_on_stack; | ||
1398 | + bio_list_init(&bio_list_on_stack[0]); | ||
1399 | + current->bio_list = bio_list_on_stack; | ||
1400 | do { | ||
1401 | struct request_queue *q = bdev_get_queue(bio->bi_bdev); | ||
1402 | |||
1403 | if (likely(blk_queue_enter(q, false) == 0)) { | ||
1404 | + struct bio_list lower, same; | ||
1405 | + | ||
1406 | + /* Create a fresh bio_list for all subordinate requests */ | ||
1407 | + bio_list_on_stack[1] = bio_list_on_stack[0]; | ||
1408 | + bio_list_init(&bio_list_on_stack[0]); | ||
1409 | ret = q->make_request_fn(q, bio); | ||
1410 | |||
1411 | blk_queue_exit(q); | ||
1412 | |||
1413 | - bio = bio_list_pop(current->bio_list); | ||
1414 | + /* sort new bios into those for a lower level | ||
1415 | + * and those for the same level | ||
1416 | + */ | ||
1417 | + bio_list_init(&lower); | ||
1418 | + bio_list_init(&same); | ||
1419 | + while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) | ||
1420 | + if (q == bdev_get_queue(bio->bi_bdev)) | ||
1421 | + bio_list_add(&same, bio); | ||
1422 | + else | ||
1423 | + bio_list_add(&lower, bio); | ||
1424 | + /* now assemble so we handle the lowest level first */ | ||
1425 | + bio_list_merge(&bio_list_on_stack[0], &lower); | ||
1426 | + bio_list_merge(&bio_list_on_stack[0], &same); | ||
1427 | + bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); | ||
1428 | } else { | ||
1429 | - struct bio *bio_next = bio_list_pop(current->bio_list); | ||
1430 | - | ||
1431 | bio_io_error(bio); | ||
1432 | - bio = bio_next; | ||
1433 | } | ||
1434 | + bio = bio_list_pop(&bio_list_on_stack[0]); | ||
1435 | } while (bio); | ||
1436 | current->bio_list = NULL; /* deactivate */ | ||
1437 | |||
1438 | diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile | ||
1439 | index 9ed087853dee..4c5678cfa9c4 100644 | ||
1440 | --- a/drivers/acpi/Makefile | ||
1441 | +++ b/drivers/acpi/Makefile | ||
1442 | @@ -2,7 +2,6 @@ | ||
1443 | # Makefile for the Linux ACPI interpreter | ||
1444 | # | ||
1445 | |||
1446 | -ccflags-y := -Os | ||
1447 | ccflags-$(CONFIG_ACPI_DEBUG) += -DACPI_DEBUG_OUTPUT | ||
1448 | |||
1449 | # | ||
1450 | diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c | ||
1451 | index b4c1a6a51da4..03250e1f1103 100644 | ||
1452 | --- a/drivers/acpi/acpi_platform.c | ||
1453 | +++ b/drivers/acpi/acpi_platform.c | ||
1454 | @@ -25,9 +25,11 @@ | ||
1455 | ACPI_MODULE_NAME("platform"); | ||
1456 | |||
1457 | static const struct acpi_device_id forbidden_id_list[] = { | ||
1458 | - {"PNP0000", 0}, /* PIC */ | ||
1459 | - {"PNP0100", 0}, /* Timer */ | ||
1460 | - {"PNP0200", 0}, /* AT DMA Controller */ | ||
1461 | + {"PNP0000", 0}, /* PIC */ | ||
1462 | + {"PNP0100", 0}, /* Timer */ | ||
1463 | + {"PNP0200", 0}, /* AT DMA Controller */ | ||
1464 | + {"ACPI0009", 0}, /* IOxAPIC */ | ||
1465 | + {"ACPI000A", 0}, /* IOAPIC */ | ||
1466 | {"", 0}, | ||
1467 | }; | ||
1468 | |||
1469 | diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c | ||
1470 | index b1254f885fed..b87d27859141 100644 | ||
1471 | --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c | ||
1472 | +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c | ||
1473 | @@ -1299,6 +1299,8 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, | ||
1474 | goto out_pm_put; | ||
1475 | } | ||
1476 | |||
1477 | + mutex_lock(&gpu->lock); | ||
1478 | + | ||
1479 | fence = etnaviv_gpu_fence_alloc(gpu); | ||
1480 | if (!fence) { | ||
1481 | event_free(gpu, event); | ||
1482 | @@ -1306,8 +1308,6 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, | ||
1483 | goto out_pm_put; | ||
1484 | } | ||
1485 | |||
1486 | - mutex_lock(&gpu->lock); | ||
1487 | - | ||
1488 | gpu->event[event].fence = fence; | ||
1489 | submit->fence = fence->seqno; | ||
1490 | gpu->active_fence = submit->fence; | ||
1491 | diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c | ||
1492 | index 3de5e6e21662..4ce04e06d9ac 100644 | ||
1493 | --- a/drivers/gpu/drm/radeon/radeon_ttm.c | ||
1494 | +++ b/drivers/gpu/drm/radeon/radeon_ttm.c | ||
1495 | @@ -213,8 +213,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo, | ||
1496 | rbo->placement.num_busy_placement = 0; | ||
1497 | for (i = 0; i < rbo->placement.num_placement; i++) { | ||
1498 | if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) { | ||
1499 | - if (rbo->placements[0].fpfn < fpfn) | ||
1500 | - rbo->placements[0].fpfn = fpfn; | ||
1501 | + if (rbo->placements[i].fpfn < fpfn) | ||
1502 | + rbo->placements[i].fpfn = fpfn; | ||
1503 | } else { | ||
1504 | rbo->placement.busy_placement = | ||
1505 | &rbo->placements[i]; | ||
1506 | diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c | ||
1507 | index 7aadce1f7e7a..c7e6c9839c9a 100644 | ||
1508 | --- a/drivers/gpu/drm/vc4/vc4_crtc.c | ||
1509 | +++ b/drivers/gpu/drm/vc4/vc4_crtc.c | ||
1510 | @@ -842,6 +842,17 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc, | ||
1511 | drm_atomic_helper_crtc_destroy_state(crtc, state); | ||
1512 | } | ||
1513 | |||
1514 | +static void | ||
1515 | +vc4_crtc_reset(struct drm_crtc *crtc) | ||
1516 | +{ | ||
1517 | + if (crtc->state) | ||
1518 | + __drm_atomic_helper_crtc_destroy_state(crtc->state); | ||
1519 | + | ||
1520 | + crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL); | ||
1521 | + if (crtc->state) | ||
1522 | + crtc->state->crtc = crtc; | ||
1523 | +} | ||
1524 | + | ||
1525 | static const struct drm_crtc_funcs vc4_crtc_funcs = { | ||
1526 | .set_config = drm_atomic_helper_set_config, | ||
1527 | .destroy = vc4_crtc_destroy, | ||
1528 | @@ -849,7 +860,7 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = { | ||
1529 | .set_property = NULL, | ||
1530 | .cursor_set = NULL, /* handled by drm_mode_cursor_universal */ | ||
1531 | .cursor_move = NULL, /* handled by drm_mode_cursor_universal */ | ||
1532 | - .reset = drm_atomic_helper_crtc_reset, | ||
1533 | + .reset = vc4_crtc_reset, | ||
1534 | .atomic_duplicate_state = vc4_crtc_duplicate_state, | ||
1535 | .atomic_destroy_state = vc4_crtc_destroy_state, | ||
1536 | .gamma_set = vc4_crtc_gamma_set, | ||
1537 | diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c | ||
1538 | index 5e7a5648e708..0c535d0f3b95 100644 | ||
1539 | --- a/drivers/hid/wacom_sys.c | ||
1540 | +++ b/drivers/hid/wacom_sys.c | ||
1541 | @@ -2017,6 +2017,14 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) | ||
1542 | |||
1543 | wacom_update_name(wacom, wireless ? " (WL)" : ""); | ||
1544 | |||
1545 | + /* pen only Bamboo neither support touch nor pad */ | ||
1546 | + if ((features->type == BAMBOO_PEN) && | ||
1547 | + ((features->device_type & WACOM_DEVICETYPE_TOUCH) || | ||
1548 | + (features->device_type & WACOM_DEVICETYPE_PAD))) { | ||
1549 | + error = -ENODEV; | ||
1550 | + goto fail; | ||
1551 | + } | ||
1552 | + | ||
1553 | error = wacom_add_shared_data(hdev); | ||
1554 | if (error) | ||
1555 | goto fail; | ||
1556 | @@ -2064,14 +2072,6 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) | ||
1557 | goto fail_quirks; | ||
1558 | } | ||
1559 | |||
1560 | - /* pen only Bamboo neither support touch nor pad */ | ||
1561 | - if ((features->type == BAMBOO_PEN) && | ||
1562 | - ((features->device_type & WACOM_DEVICETYPE_TOUCH) || | ||
1563 | - (features->device_type & WACOM_DEVICETYPE_PAD))) { | ||
1564 | - error = -ENODEV; | ||
1565 | - goto fail_quirks; | ||
1566 | - } | ||
1567 | - | ||
1568 | if (features->device_type & WACOM_DEVICETYPE_WL_MONITOR) | ||
1569 | error = hid_hw_open(hdev); | ||
1570 | |||
1571 | diff --git a/drivers/md/dm.c b/drivers/md/dm.c | ||
1572 | index 628ba001bb3c..e66f4040d84b 100644 | ||
1573 | --- a/drivers/md/dm.c | ||
1574 | +++ b/drivers/md/dm.c | ||
1575 | @@ -986,26 +986,29 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule) | ||
1576 | struct dm_offload *o = container_of(cb, struct dm_offload, cb); | ||
1577 | struct bio_list list; | ||
1578 | struct bio *bio; | ||
1579 | + int i; | ||
1580 | |||
1581 | INIT_LIST_HEAD(&o->cb.list); | ||
1582 | |||
1583 | if (unlikely(!current->bio_list)) | ||
1584 | return; | ||
1585 | |||
1586 | - list = *current->bio_list; | ||
1587 | - bio_list_init(current->bio_list); | ||
1588 | - | ||
1589 | - while ((bio = bio_list_pop(&list))) { | ||
1590 | - struct bio_set *bs = bio->bi_pool; | ||
1591 | - if (unlikely(!bs) || bs == fs_bio_set) { | ||
1592 | - bio_list_add(current->bio_list, bio); | ||
1593 | - continue; | ||
1594 | + for (i = 0; i < 2; i++) { | ||
1595 | + list = current->bio_list[i]; | ||
1596 | + bio_list_init(¤t->bio_list[i]); | ||
1597 | + | ||
1598 | + while ((bio = bio_list_pop(&list))) { | ||
1599 | + struct bio_set *bs = bio->bi_pool; | ||
1600 | + if (unlikely(!bs) || bs == fs_bio_set) { | ||
1601 | + bio_list_add(¤t->bio_list[i], bio); | ||
1602 | + continue; | ||
1603 | + } | ||
1604 | + | ||
1605 | + spin_lock(&bs->rescue_lock); | ||
1606 | + bio_list_add(&bs->rescue_list, bio); | ||
1607 | + queue_work(bs->rescue_workqueue, &bs->rescue_work); | ||
1608 | + spin_unlock(&bs->rescue_lock); | ||
1609 | } | ||
1610 | - | ||
1611 | - spin_lock(&bs->rescue_lock); | ||
1612 | - bio_list_add(&bs->rescue_list, bio); | ||
1613 | - queue_work(bs->rescue_workqueue, &bs->rescue_work); | ||
1614 | - spin_unlock(&bs->rescue_lock); | ||
1615 | } | ||
1616 | } | ||
1617 | |||
1618 | diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c | ||
1619 | index 55b5e0e77b17..4c4aab02e311 100644 | ||
1620 | --- a/drivers/md/raid10.c | ||
1621 | +++ b/drivers/md/raid10.c | ||
1622 | @@ -941,7 +941,8 @@ static void wait_barrier(struct r10conf *conf) | ||
1623 | !conf->barrier || | ||
1624 | (atomic_read(&conf->nr_pending) && | ||
1625 | current->bio_list && | ||
1626 | - !bio_list_empty(current->bio_list)), | ||
1627 | + (!bio_list_empty(¤t->bio_list[0]) || | ||
1628 | + !bio_list_empty(¤t->bio_list[1]))), | ||
1629 | conf->resync_lock); | ||
1630 | conf->nr_waiting--; | ||
1631 | if (!conf->nr_waiting) | ||
1632 | diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c | ||
1633 | index 387ae1cbf698..a8b430ff117b 100644 | ||
1634 | --- a/drivers/mmc/host/sdhci-of-at91.c | ||
1635 | +++ b/drivers/mmc/host/sdhci-of-at91.c | ||
1636 | @@ -29,6 +29,8 @@ | ||
1637 | |||
1638 | #include "sdhci-pltfm.h" | ||
1639 | |||
1640 | +#define SDMMC_MC1R 0x204 | ||
1641 | +#define SDMMC_MC1R_DDR BIT(3) | ||
1642 | #define SDMMC_CACR 0x230 | ||
1643 | #define SDMMC_CACR_CAPWREN BIT(0) | ||
1644 | #define SDMMC_CACR_KEY (0x46 << 8) | ||
1645 | @@ -103,11 +105,18 @@ static void sdhci_at91_set_power(struct sdhci_host *host, unsigned char mode, | ||
1646 | sdhci_set_power_noreg(host, mode, vdd); | ||
1647 | } | ||
1648 | |||
1649 | +void sdhci_at91_set_uhs_signaling(struct sdhci_host *host, unsigned int timing) | ||
1650 | +{ | ||
1651 | + if (timing == MMC_TIMING_MMC_DDR52) | ||
1652 | + sdhci_writeb(host, SDMMC_MC1R_DDR, SDMMC_MC1R); | ||
1653 | + sdhci_set_uhs_signaling(host, timing); | ||
1654 | +} | ||
1655 | + | ||
1656 | static const struct sdhci_ops sdhci_at91_sama5d2_ops = { | ||
1657 | .set_clock = sdhci_at91_set_clock, | ||
1658 | .set_bus_width = sdhci_set_bus_width, | ||
1659 | .reset = sdhci_reset, | ||
1660 | - .set_uhs_signaling = sdhci_set_uhs_signaling, | ||
1661 | + .set_uhs_signaling = sdhci_at91_set_uhs_signaling, | ||
1662 | .set_power = sdhci_at91_set_power, | ||
1663 | }; | ||
1664 | |||
1665 | diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c | ||
1666 | index a983ba0349fb..7d275e72903a 100644 | ||
1667 | --- a/drivers/mmc/host/sdhci.c | ||
1668 | +++ b/drivers/mmc/host/sdhci.c | ||
1669 | @@ -1823,6 +1823,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable) | ||
1670 | struct sdhci_host *host = mmc_priv(mmc); | ||
1671 | unsigned long flags; | ||
1672 | |||
1673 | + if (enable) | ||
1674 | + pm_runtime_get_noresume(host->mmc->parent); | ||
1675 | + | ||
1676 | spin_lock_irqsave(&host->lock, flags); | ||
1677 | if (enable) | ||
1678 | host->flags |= SDHCI_SDIO_IRQ_ENABLED; | ||
1679 | @@ -1831,6 +1834,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable) | ||
1680 | |||
1681 | sdhci_enable_sdio_irq_nolock(host, enable); | ||
1682 | spin_unlock_irqrestore(&host->lock, flags); | ||
1683 | + | ||
1684 | + if (!enable) | ||
1685 | + pm_runtime_put_noidle(host->mmc->parent); | ||
1686 | } | ||
1687 | |||
1688 | static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc, | ||
1689 | diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c | ||
1690 | index da10b484bd25..bde769b11e3b 100644 | ||
1691 | --- a/drivers/nvme/host/core.c | ||
1692 | +++ b/drivers/nvme/host/core.c | ||
1693 | @@ -2057,9 +2057,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) | ||
1694 | * Revalidating a dead namespace sets capacity to 0. This will | ||
1695 | * end buffered writers dirtying pages that can't be synced. | ||
1696 | */ | ||
1697 | - if (ns->disk && !test_and_set_bit(NVME_NS_DEAD, &ns->flags)) | ||
1698 | - revalidate_disk(ns->disk); | ||
1699 | - | ||
1700 | + if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags)) | ||
1701 | + continue; | ||
1702 | + revalidate_disk(ns->disk); | ||
1703 | blk_set_queue_dying(ns->queue); | ||
1704 | blk_mq_abort_requeue_list(ns->queue); | ||
1705 | blk_mq_start_stopped_hw_queues(ns->queue, true); | ||
1706 | diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c | ||
1707 | index 5e52034ab010..8a9c186898c7 100644 | ||
1708 | --- a/drivers/nvme/host/pci.c | ||
1709 | +++ b/drivers/nvme/host/pci.c | ||
1710 | @@ -1983,8 +1983,10 @@ static void nvme_remove(struct pci_dev *pdev) | ||
1711 | |||
1712 | pci_set_drvdata(pdev, NULL); | ||
1713 | |||
1714 | - if (!pci_device_is_present(pdev)) | ||
1715 | + if (!pci_device_is_present(pdev)) { | ||
1716 | nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD); | ||
1717 | + nvme_dev_disable(dev, false); | ||
1718 | + } | ||
1719 | |||
1720 | flush_work(&dev->reset_work); | ||
1721 | nvme_uninit_ctrl(&dev->ctrl); | ||
1722 | diff --git a/drivers/pci/host/pcie-iproc-bcma.c b/drivers/pci/host/pcie-iproc-bcma.c | ||
1723 | index 8ce089043a27..46ca8ed031fe 100644 | ||
1724 | --- a/drivers/pci/host/pcie-iproc-bcma.c | ||
1725 | +++ b/drivers/pci/host/pcie-iproc-bcma.c | ||
1726 | @@ -44,8 +44,7 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev) | ||
1727 | { | ||
1728 | struct device *dev = &bdev->dev; | ||
1729 | struct iproc_pcie *pcie; | ||
1730 | - LIST_HEAD(res); | ||
1731 | - struct resource res_mem; | ||
1732 | + LIST_HEAD(resources); | ||
1733 | int ret; | ||
1734 | |||
1735 | pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); | ||
1736 | @@ -62,22 +61,23 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev) | ||
1737 | |||
1738 | pcie->base_addr = bdev->addr; | ||
1739 | |||
1740 | - res_mem.start = bdev->addr_s[0]; | ||
1741 | - res_mem.end = bdev->addr_s[0] + SZ_128M - 1; | ||
1742 | - res_mem.name = "PCIe MEM space"; | ||
1743 | - res_mem.flags = IORESOURCE_MEM; | ||
1744 | - pci_add_resource(&res, &res_mem); | ||
1745 | + pcie->mem.start = bdev->addr_s[0]; | ||
1746 | + pcie->mem.end = bdev->addr_s[0] + SZ_128M - 1; | ||
1747 | + pcie->mem.name = "PCIe MEM space"; | ||
1748 | + pcie->mem.flags = IORESOURCE_MEM; | ||
1749 | + pci_add_resource(&resources, &pcie->mem); | ||
1750 | |||
1751 | pcie->map_irq = iproc_pcie_bcma_map_irq; | ||
1752 | |||
1753 | - ret = iproc_pcie_setup(pcie, &res); | ||
1754 | - if (ret) | ||
1755 | + ret = iproc_pcie_setup(pcie, &resources); | ||
1756 | + if (ret) { | ||
1757 | dev_err(dev, "PCIe controller setup failed\n"); | ||
1758 | - | ||
1759 | - pci_free_resource_list(&res); | ||
1760 | + pci_free_resource_list(&resources); | ||
1761 | + return ret; | ||
1762 | + } | ||
1763 | |||
1764 | bcma_set_drvdata(bdev, pcie); | ||
1765 | - return ret; | ||
1766 | + return 0; | ||
1767 | } | ||
1768 | |||
1769 | static void iproc_pcie_bcma_remove(struct bcma_device *bdev) | ||
1770 | diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c | ||
1771 | index a3de087976b3..7dcaddcd2f16 100644 | ||
1772 | --- a/drivers/pci/host/pcie-iproc-platform.c | ||
1773 | +++ b/drivers/pci/host/pcie-iproc-platform.c | ||
1774 | @@ -46,7 +46,7 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev) | ||
1775 | struct device_node *np = dev->of_node; | ||
1776 | struct resource reg; | ||
1777 | resource_size_t iobase = 0; | ||
1778 | - LIST_HEAD(res); | ||
1779 | + LIST_HEAD(resources); | ||
1780 | int ret; | ||
1781 | |||
1782 | of_id = of_match_device(iproc_pcie_of_match_table, dev); | ||
1783 | @@ -108,23 +108,24 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev) | ||
1784 | pcie->phy = NULL; | ||
1785 | } | ||
1786 | |||
1787 | - ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &res, &iobase); | ||
1788 | + ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &resources, | ||
1789 | + &iobase); | ||
1790 | if (ret) { | ||
1791 | - dev_err(dev, | ||
1792 | - "unable to get PCI host bridge resources\n"); | ||
1793 | + dev_err(dev, "unable to get PCI host bridge resources\n"); | ||
1794 | return ret; | ||
1795 | } | ||
1796 | |||
1797 | pcie->map_irq = of_irq_parse_and_map_pci; | ||
1798 | |||
1799 | - ret = iproc_pcie_setup(pcie, &res); | ||
1800 | - if (ret) | ||
1801 | + ret = iproc_pcie_setup(pcie, &resources); | ||
1802 | + if (ret) { | ||
1803 | dev_err(dev, "PCIe controller setup failed\n"); | ||
1804 | - | ||
1805 | - pci_free_resource_list(&res); | ||
1806 | + pci_free_resource_list(&resources); | ||
1807 | + return ret; | ||
1808 | + } | ||
1809 | |||
1810 | platform_set_drvdata(pdev, pcie); | ||
1811 | - return ret; | ||
1812 | + return 0; | ||
1813 | } | ||
1814 | |||
1815 | static int iproc_pcie_pltfm_remove(struct platform_device *pdev) | ||
1816 | diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h | ||
1817 | index e84d93c53c7b..fa4226742bcd 100644 | ||
1818 | --- a/drivers/pci/host/pcie-iproc.h | ||
1819 | +++ b/drivers/pci/host/pcie-iproc.h | ||
1820 | @@ -68,6 +68,7 @@ struct iproc_pcie { | ||
1821 | #ifdef CONFIG_ARM | ||
1822 | struct pci_sys_data sysdata; | ||
1823 | #endif | ||
1824 | + struct resource mem; | ||
1825 | struct pci_bus *root_bus; | ||
1826 | struct phy *phy; | ||
1827 | int (*map_irq)(const struct pci_dev *, u8, u8); | ||
1828 | diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c | ||
1829 | index 7bb20684e9fa..d3145799b92f 100644 | ||
1830 | --- a/drivers/scsi/device_handler/scsi_dh_alua.c | ||
1831 | +++ b/drivers/scsi/device_handler/scsi_dh_alua.c | ||
1832 | @@ -113,7 +113,7 @@ struct alua_queue_data { | ||
1833 | #define ALUA_POLICY_SWITCH_ALL 1 | ||
1834 | |||
1835 | static void alua_rtpg_work(struct work_struct *work); | ||
1836 | -static void alua_rtpg_queue(struct alua_port_group *pg, | ||
1837 | +static bool alua_rtpg_queue(struct alua_port_group *pg, | ||
1838 | struct scsi_device *sdev, | ||
1839 | struct alua_queue_data *qdata, bool force); | ||
1840 | static void alua_check(struct scsi_device *sdev, bool force); | ||
1841 | @@ -862,7 +862,13 @@ static void alua_rtpg_work(struct work_struct *work) | ||
1842 | kref_put(&pg->kref, release_port_group); | ||
1843 | } | ||
1844 | |||
1845 | -static void alua_rtpg_queue(struct alua_port_group *pg, | ||
1846 | +/** | ||
1847 | + * alua_rtpg_queue() - cause RTPG to be submitted asynchronously | ||
1848 | + * | ||
1849 | + * Returns true if and only if alua_rtpg_work() will be called asynchronously. | ||
1850 | + * That function is responsible for calling @qdata->fn(). | ||
1851 | + */ | ||
1852 | +static bool alua_rtpg_queue(struct alua_port_group *pg, | ||
1853 | struct scsi_device *sdev, | ||
1854 | struct alua_queue_data *qdata, bool force) | ||
1855 | { | ||
1856 | @@ -870,8 +876,8 @@ static void alua_rtpg_queue(struct alua_port_group *pg, | ||
1857 | unsigned long flags; | ||
1858 | struct workqueue_struct *alua_wq = kaluad_wq; | ||
1859 | |||
1860 | - if (!pg) | ||
1861 | - return; | ||
1862 | + if (!pg || scsi_device_get(sdev)) | ||
1863 | + return false; | ||
1864 | |||
1865 | spin_lock_irqsave(&pg->lock, flags); | ||
1866 | if (qdata) { | ||
1867 | @@ -884,14 +890,12 @@ static void alua_rtpg_queue(struct alua_port_group *pg, | ||
1868 | pg->flags |= ALUA_PG_RUN_RTPG; | ||
1869 | kref_get(&pg->kref); | ||
1870 | pg->rtpg_sdev = sdev; | ||
1871 | - scsi_device_get(sdev); | ||
1872 | start_queue = 1; | ||
1873 | } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) { | ||
1874 | pg->flags |= ALUA_PG_RUN_RTPG; | ||
1875 | /* Do not queue if the worker is already running */ | ||
1876 | if (!(pg->flags & ALUA_PG_RUNNING)) { | ||
1877 | kref_get(&pg->kref); | ||
1878 | - sdev = NULL; | ||
1879 | start_queue = 1; | ||
1880 | } | ||
1881 | } | ||
1882 | @@ -900,13 +904,17 @@ static void alua_rtpg_queue(struct alua_port_group *pg, | ||
1883 | alua_wq = kaluad_sync_wq; | ||
1884 | spin_unlock_irqrestore(&pg->lock, flags); | ||
1885 | |||
1886 | - if (start_queue && | ||
1887 | - !queue_delayed_work(alua_wq, &pg->rtpg_work, | ||
1888 | - msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) { | ||
1889 | - if (sdev) | ||
1890 | - scsi_device_put(sdev); | ||
1891 | - kref_put(&pg->kref, release_port_group); | ||
1892 | + if (start_queue) { | ||
1893 | + if (queue_delayed_work(alua_wq, &pg->rtpg_work, | ||
1894 | + msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) | ||
1895 | + sdev = NULL; | ||
1896 | + else | ||
1897 | + kref_put(&pg->kref, release_port_group); | ||
1898 | } | ||
1899 | + if (sdev) | ||
1900 | + scsi_device_put(sdev); | ||
1901 | + | ||
1902 | + return true; | ||
1903 | } | ||
1904 | |||
1905 | /* | ||
1906 | @@ -1007,11 +1015,13 @@ static int alua_activate(struct scsi_device *sdev, | ||
1907 | mutex_unlock(&h->init_mutex); | ||
1908 | goto out; | ||
1909 | } | ||
1910 | - fn = NULL; | ||
1911 | rcu_read_unlock(); | ||
1912 | mutex_unlock(&h->init_mutex); | ||
1913 | |||
1914 | - alua_rtpg_queue(pg, sdev, qdata, true); | ||
1915 | + if (alua_rtpg_queue(pg, sdev, qdata, true)) | ||
1916 | + fn = NULL; | ||
1917 | + else | ||
1918 | + err = SCSI_DH_DEV_OFFLINED; | ||
1919 | kref_put(&pg->kref, release_port_group); | ||
1920 | out: | ||
1921 | if (fn) | ||
1922 | diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c | ||
1923 | index 763f012fdeca..87f5e694dbed 100644 | ||
1924 | --- a/drivers/scsi/libsas/sas_ata.c | ||
1925 | +++ b/drivers/scsi/libsas/sas_ata.c | ||
1926 | @@ -221,7 +221,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) | ||
1927 | task->num_scatter = qc->n_elem; | ||
1928 | } else { | ||
1929 | for_each_sg(qc->sg, sg, qc->n_elem, si) | ||
1930 | - xfer += sg->length; | ||
1931 | + xfer += sg_dma_len(sg); | ||
1932 | |||
1933 | task->total_xfer_len = xfer; | ||
1934 | task->num_scatter = si; | ||
1935 | diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c | ||
1936 | index fe7469c901f7..ad33238cef17 100644 | ||
1937 | --- a/drivers/scsi/qla2xxx/qla_attr.c | ||
1938 | +++ b/drivers/scsi/qla2xxx/qla_attr.c | ||
1939 | @@ -2153,8 +2153,6 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) | ||
1940 | "Timer for the VP[%d] has stopped\n", vha->vp_idx); | ||
1941 | } | ||
1942 | |||
1943 | - BUG_ON(atomic_read(&vha->vref_count)); | ||
1944 | - | ||
1945 | qla2x00_free_fcports(vha); | ||
1946 | |||
1947 | mutex_lock(&ha->vport_lock); | ||
1948 | diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h | ||
1949 | index 73b12e41d992..8e63a7b90277 100644 | ||
1950 | --- a/drivers/scsi/qla2xxx/qla_def.h | ||
1951 | +++ b/drivers/scsi/qla2xxx/qla_def.h | ||
1952 | @@ -3742,6 +3742,7 @@ typedef struct scsi_qla_host { | ||
1953 | struct qla8044_reset_template reset_tmplt; | ||
1954 | struct qla_tgt_counters tgt_counters; | ||
1955 | uint16_t bbcr; | ||
1956 | + wait_queue_head_t vref_waitq; | ||
1957 | } scsi_qla_host_t; | ||
1958 | |||
1959 | struct qla27xx_image_status { | ||
1960 | @@ -3780,6 +3781,7 @@ struct qla_tgt_vp_map { | ||
1961 | mb(); \ | ||
1962 | if (__vha->flags.delete_progress) { \ | ||
1963 | atomic_dec(&__vha->vref_count); \ | ||
1964 | + wake_up(&__vha->vref_waitq); \ | ||
1965 | __bail = 1; \ | ||
1966 | } else { \ | ||
1967 | __bail = 0; \ | ||
1968 | @@ -3788,6 +3790,7 @@ struct qla_tgt_vp_map { | ||
1969 | |||
1970 | #define QLA_VHA_MARK_NOT_BUSY(__vha) do { \ | ||
1971 | atomic_dec(&__vha->vref_count); \ | ||
1972 | + wake_up(&__vha->vref_waitq); \ | ||
1973 | } while (0) | ||
1974 | |||
1975 | /* | ||
1976 | diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c | ||
1977 | index 5b09296b46a3..8f12f6baa6b8 100644 | ||
1978 | --- a/drivers/scsi/qla2xxx/qla_init.c | ||
1979 | +++ b/drivers/scsi/qla2xxx/qla_init.c | ||
1980 | @@ -4356,6 +4356,7 @@ qla2x00_update_fcports(scsi_qla_host_t *base_vha) | ||
1981 | } | ||
1982 | } | ||
1983 | atomic_dec(&vha->vref_count); | ||
1984 | + wake_up(&vha->vref_waitq); | ||
1985 | } | ||
1986 | spin_unlock_irqrestore(&ha->vport_slock, flags); | ||
1987 | } | ||
1988 | diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c | ||
1989 | index cf7ba52bae66..3dfb54abc874 100644 | ||
1990 | --- a/drivers/scsi/qla2xxx/qla_mid.c | ||
1991 | +++ b/drivers/scsi/qla2xxx/qla_mid.c | ||
1992 | @@ -74,13 +74,14 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha) | ||
1993 | * ensures no active vp_list traversal while the vport is removed | ||
1994 | * from the queue) | ||
1995 | */ | ||
1996 | - spin_lock_irqsave(&ha->vport_slock, flags); | ||
1997 | - while (atomic_read(&vha->vref_count)) { | ||
1998 | - spin_unlock_irqrestore(&ha->vport_slock, flags); | ||
1999 | - | ||
2000 | - msleep(500); | ||
2001 | + wait_event_timeout(vha->vref_waitq, atomic_read(&vha->vref_count), | ||
2002 | + 10*HZ); | ||
2003 | |||
2004 | - spin_lock_irqsave(&ha->vport_slock, flags); | ||
2005 | + spin_lock_irqsave(&ha->vport_slock, flags); | ||
2006 | + if (atomic_read(&vha->vref_count)) { | ||
2007 | + ql_dbg(ql_dbg_vport, vha, 0xfffa, | ||
2008 | + "vha->vref_count=%u timeout\n", vha->vref_count.counter); | ||
2009 | + vha->vref_count = (atomic_t)ATOMIC_INIT(0); | ||
2010 | } | ||
2011 | list_del(&vha->list); | ||
2012 | qlt_update_vp_map(vha, RESET_VP_IDX); | ||
2013 | @@ -269,6 +270,7 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb) | ||
2014 | |||
2015 | spin_lock_irqsave(&ha->vport_slock, flags); | ||
2016 | atomic_dec(&vha->vref_count); | ||
2017 | + wake_up(&vha->vref_waitq); | ||
2018 | } | ||
2019 | i++; | ||
2020 | } | ||
2021 | diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c | ||
2022 | index bea819e5336d..4f361d8d84be 100644 | ||
2023 | --- a/drivers/scsi/qla2xxx/qla_os.c | ||
2024 | +++ b/drivers/scsi/qla2xxx/qla_os.c | ||
2025 | @@ -4045,6 +4045,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht, | ||
2026 | |||
2027 | spin_lock_init(&vha->work_lock); | ||
2028 | spin_lock_init(&vha->cmd_list_lock); | ||
2029 | + init_waitqueue_head(&vha->vref_waitq); | ||
2030 | |||
2031 | sprintf(vha->host_str, "%s_%ld", QLA2XXX_DRIVER_NAME, vha->host_no); | ||
2032 | ql_dbg(ql_dbg_init, vha, 0x0041, | ||
2033 | diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c | ||
2034 | index 121de0aaa6ad..f753df25ba34 100644 | ||
2035 | --- a/drivers/scsi/sg.c | ||
2036 | +++ b/drivers/scsi/sg.c | ||
2037 | @@ -998,6 +998,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) | ||
2038 | result = get_user(val, ip); | ||
2039 | if (result) | ||
2040 | return result; | ||
2041 | + if (val > SG_MAX_CDB_SIZE) | ||
2042 | + return -ENOMEM; | ||
2043 | sfp->next_cmd_len = (val > 0) ? val : 0; | ||
2044 | return 0; | ||
2045 | case SG_GET_VERSION_NUM: | ||
2046 | diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c | ||
2047 | index fabbe76203bb..4d079cdaa7a3 100644 | ||
2048 | --- a/drivers/tty/serial/atmel_serial.c | ||
2049 | +++ b/drivers/tty/serial/atmel_serial.c | ||
2050 | @@ -1938,6 +1938,11 @@ static void atmel_flush_buffer(struct uart_port *port) | ||
2051 | atmel_uart_writel(port, ATMEL_PDC_TCR, 0); | ||
2052 | atmel_port->pdc_tx.ofs = 0; | ||
2053 | } | ||
2054 | + /* | ||
2055 | + * in uart_flush_buffer(), the xmit circular buffer has just | ||
2056 | + * been cleared, so we have to reset tx_len accordingly. | ||
2057 | + */ | ||
2058 | + atmel_port->tx_len = 0; | ||
2059 | } | ||
2060 | |||
2061 | /* | ||
2062 | @@ -2471,6 +2476,9 @@ static void atmel_console_write(struct console *co, const char *s, u_int count) | ||
2063 | pdc_tx = atmel_uart_readl(port, ATMEL_PDC_PTSR) & ATMEL_PDC_TXTEN; | ||
2064 | atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS); | ||
2065 | |||
2066 | + /* Make sure that tx path is actually able to send characters */ | ||
2067 | + atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN); | ||
2068 | + | ||
2069 | uart_console_write(port, s, count, atmel_console_putchar); | ||
2070 | |||
2071 | /* | ||
2072 | diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c | ||
2073 | index 770454e0dfa3..07390f8c3681 100644 | ||
2074 | --- a/drivers/tty/serial/mxs-auart.c | ||
2075 | +++ b/drivers/tty/serial/mxs-auart.c | ||
2076 | @@ -1085,7 +1085,7 @@ static void mxs_auart_settermios(struct uart_port *u, | ||
2077 | AUART_LINECTRL_BAUD_DIV_MAX); | ||
2078 | baud_max = u->uartclk * 32 / AUART_LINECTRL_BAUD_DIV_MIN; | ||
2079 | baud = uart_get_baud_rate(u, termios, old, baud_min, baud_max); | ||
2080 | - div = u->uartclk * 32 / baud; | ||
2081 | + div = DIV_ROUND_CLOSEST(u->uartclk * 32, baud); | ||
2082 | } | ||
2083 | |||
2084 | ctrl |= AUART_LINECTRL_BAUD_DIVFRAC(div & 0x3F); | ||
2085 | diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c | ||
2086 | index 479e223f9cff..f029aad67183 100644 | ||
2087 | --- a/drivers/usb/core/hcd.c | ||
2088 | +++ b/drivers/usb/core/hcd.c | ||
2089 | @@ -520,8 +520,10 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb) | ||
2090 | */ | ||
2091 | tbuf_size = max_t(u16, sizeof(struct usb_hub_descriptor), wLength); | ||
2092 | tbuf = kzalloc(tbuf_size, GFP_KERNEL); | ||
2093 | - if (!tbuf) | ||
2094 | - return -ENOMEM; | ||
2095 | + if (!tbuf) { | ||
2096 | + status = -ENOMEM; | ||
2097 | + goto err_alloc; | ||
2098 | + } | ||
2099 | |||
2100 | bufp = tbuf; | ||
2101 | |||
2102 | @@ -734,6 +736,7 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb) | ||
2103 | } | ||
2104 | |||
2105 | kfree(tbuf); | ||
2106 | + err_alloc: | ||
2107 | |||
2108 | /* any errors get returned through the urb completion */ | ||
2109 | spin_lock_irq(&hcd_root_hub_lock); | ||
2110 | diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c | ||
2111 | index 1536aeb0abab..4e894d301c88 100644 | ||
2112 | --- a/fs/nfs/nfs4proc.c | ||
2113 | +++ b/fs/nfs/nfs4proc.c | ||
2114 | @@ -2532,17 +2532,14 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) | ||
2115 | } | ||
2116 | |||
2117 | nfs4_stateid_copy(&stateid, &delegation->stateid); | ||
2118 | - if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { | ||
2119 | + if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) || | ||
2120 | + !test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, | ||
2121 | + &delegation->flags)) { | ||
2122 | rcu_read_unlock(); | ||
2123 | nfs_finish_clear_delegation_stateid(state, &stateid); | ||
2124 | return; | ||
2125 | } | ||
2126 | |||
2127 | - if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) { | ||
2128 | - rcu_read_unlock(); | ||
2129 | - return; | ||
2130 | - } | ||
2131 | - | ||
2132 | cred = get_rpccred(delegation->cred); | ||
2133 | rcu_read_unlock(); | ||
2134 | status = nfs41_test_and_free_expired_stateid(server, &stateid, cred); | ||
2135 | diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c | ||
2136 | index 010aff5c5a79..536009e50387 100644 | ||
2137 | --- a/fs/nfsd/nfsproc.c | ||
2138 | +++ b/fs/nfsd/nfsproc.c | ||
2139 | @@ -790,6 +790,7 @@ nfserrno (int errno) | ||
2140 | { nfserr_serverfault, -ESERVERFAULT }, | ||
2141 | { nfserr_serverfault, -ENFILE }, | ||
2142 | { nfserr_io, -EUCLEAN }, | ||
2143 | + { nfserr_perm, -ENOKEY }, | ||
2144 | }; | ||
2145 | int i; | ||
2146 | |||
2147 | diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c | ||
2148 | index d346d42c54d1..33db69be4832 100644 | ||
2149 | --- a/fs/xfs/libxfs/xfs_ag_resv.c | ||
2150 | +++ b/fs/xfs/libxfs/xfs_ag_resv.c | ||
2151 | @@ -39,6 +39,7 @@ | ||
2152 | #include "xfs_rmap_btree.h" | ||
2153 | #include "xfs_btree.h" | ||
2154 | #include "xfs_refcount_btree.h" | ||
2155 | +#include "xfs_ialloc_btree.h" | ||
2156 | |||
2157 | /* | ||
2158 | * Per-AG Block Reservations | ||
2159 | @@ -200,22 +201,30 @@ __xfs_ag_resv_init( | ||
2160 | struct xfs_mount *mp = pag->pag_mount; | ||
2161 | struct xfs_ag_resv *resv; | ||
2162 | int error; | ||
2163 | + xfs_extlen_t reserved; | ||
2164 | |||
2165 | - resv = xfs_perag_resv(pag, type); | ||
2166 | if (used > ask) | ||
2167 | ask = used; | ||
2168 | - resv->ar_asked = ask; | ||
2169 | - resv->ar_reserved = resv->ar_orig_reserved = ask - used; | ||
2170 | - mp->m_ag_max_usable -= ask; | ||
2171 | + reserved = ask - used; | ||
2172 | |||
2173 | - trace_xfs_ag_resv_init(pag, type, ask); | ||
2174 | - | ||
2175 | - error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true); | ||
2176 | - if (error) | ||
2177 | + error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true); | ||
2178 | + if (error) { | ||
2179 | trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, | ||
2180 | error, _RET_IP_); | ||
2181 | + xfs_warn(mp, | ||
2182 | +"Per-AG reservation for AG %u failed. Filesystem may run out of space.", | ||
2183 | + pag->pag_agno); | ||
2184 | + return error; | ||
2185 | + } | ||
2186 | |||
2187 | - return error; | ||
2188 | + mp->m_ag_max_usable -= ask; | ||
2189 | + | ||
2190 | + resv = xfs_perag_resv(pag, type); | ||
2191 | + resv->ar_asked = ask; | ||
2192 | + resv->ar_reserved = resv->ar_orig_reserved = reserved; | ||
2193 | + | ||
2194 | + trace_xfs_ag_resv_init(pag, type, ask); | ||
2195 | + return 0; | ||
2196 | } | ||
2197 | |||
2198 | /* Create a per-AG block reservation. */ | ||
2199 | @@ -223,6 +232,8 @@ int | ||
2200 | xfs_ag_resv_init( | ||
2201 | struct xfs_perag *pag) | ||
2202 | { | ||
2203 | + struct xfs_mount *mp = pag->pag_mount; | ||
2204 | + xfs_agnumber_t agno = pag->pag_agno; | ||
2205 | xfs_extlen_t ask; | ||
2206 | xfs_extlen_t used; | ||
2207 | int error = 0; | ||
2208 | @@ -231,23 +242,45 @@ xfs_ag_resv_init( | ||
2209 | if (pag->pag_meta_resv.ar_asked == 0) { | ||
2210 | ask = used = 0; | ||
2211 | |||
2212 | - error = xfs_refcountbt_calc_reserves(pag->pag_mount, | ||
2213 | - pag->pag_agno, &ask, &used); | ||
2214 | + error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used); | ||
2215 | if (error) | ||
2216 | goto out; | ||
2217 | |||
2218 | - error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, | ||
2219 | - ask, used); | ||
2220 | + error = xfs_finobt_calc_reserves(mp, agno, &ask, &used); | ||
2221 | if (error) | ||
2222 | goto out; | ||
2223 | + | ||
2224 | + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, | ||
2225 | + ask, used); | ||
2226 | + if (error) { | ||
2227 | + /* | ||
2228 | + * Because we didn't have per-AG reservations when the | ||
2229 | + * finobt feature was added we might not be able to | ||
2230 | + * reserve all needed blocks. Warn and fall back to the | ||
2231 | + * old and potentially buggy code in that case, but | ||
2232 | + * ensure we do have the reservation for the refcountbt. | ||
2233 | + */ | ||
2234 | + ask = used = 0; | ||
2235 | + | ||
2236 | + mp->m_inotbt_nores = true; | ||
2237 | + | ||
2238 | + error = xfs_refcountbt_calc_reserves(mp, agno, &ask, | ||
2239 | + &used); | ||
2240 | + if (error) | ||
2241 | + goto out; | ||
2242 | + | ||
2243 | + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, | ||
2244 | + ask, used); | ||
2245 | + if (error) | ||
2246 | + goto out; | ||
2247 | + } | ||
2248 | } | ||
2249 | |||
2250 | /* Create the AGFL metadata reservation */ | ||
2251 | if (pag->pag_agfl_resv.ar_asked == 0) { | ||
2252 | ask = used = 0; | ||
2253 | |||
2254 | - error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno, | ||
2255 | - &ask, &used); | ||
2256 | + error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used); | ||
2257 | if (error) | ||
2258 | goto out; | ||
2259 | |||
2260 | @@ -256,9 +289,16 @@ xfs_ag_resv_init( | ||
2261 | goto out; | ||
2262 | } | ||
2263 | |||
2264 | +#ifdef DEBUG | ||
2265 | + /* need to read in the AGF for the ASSERT below to work */ | ||
2266 | + error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0); | ||
2267 | + if (error) | ||
2268 | + return error; | ||
2269 | + | ||
2270 | ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + | ||
2271 | xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= | ||
2272 | pag->pagf_freeblks + pag->pagf_flcount); | ||
2273 | +#endif | ||
2274 | out: | ||
2275 | return error; | ||
2276 | } | ||
2277 | diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c | ||
2278 | index f52fd63fce19..5a508b011e27 100644 | ||
2279 | --- a/fs/xfs/libxfs/xfs_bmap.c | ||
2280 | +++ b/fs/xfs/libxfs/xfs_bmap.c | ||
2281 | @@ -769,8 +769,8 @@ xfs_bmap_extents_to_btree( | ||
2282 | args.type = XFS_ALLOCTYPE_START_BNO; | ||
2283 | args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); | ||
2284 | } else if (dfops->dop_low) { | ||
2285 | -try_another_ag: | ||
2286 | args.type = XFS_ALLOCTYPE_START_BNO; | ||
2287 | +try_another_ag: | ||
2288 | args.fsbno = *firstblock; | ||
2289 | } else { | ||
2290 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | ||
2291 | @@ -796,17 +796,19 @@ xfs_bmap_extents_to_btree( | ||
2292 | if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && | ||
2293 | args.fsbno == NULLFSBLOCK && | ||
2294 | args.type == XFS_ALLOCTYPE_NEAR_BNO) { | ||
2295 | - dfops->dop_low = true; | ||
2296 | + args.type = XFS_ALLOCTYPE_FIRST_AG; | ||
2297 | goto try_another_ag; | ||
2298 | } | ||
2299 | + if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { | ||
2300 | + xfs_iroot_realloc(ip, -1, whichfork); | ||
2301 | + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
2302 | + return -ENOSPC; | ||
2303 | + } | ||
2304 | /* | ||
2305 | * Allocation can't fail, the space was reserved. | ||
2306 | */ | ||
2307 | - ASSERT(args.fsbno != NULLFSBLOCK); | ||
2308 | ASSERT(*firstblock == NULLFSBLOCK || | ||
2309 | - args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) || | ||
2310 | - (dfops->dop_low && | ||
2311 | - args.agno > XFS_FSB_TO_AGNO(mp, *firstblock))); | ||
2312 | + args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock)); | ||
2313 | *firstblock = cur->bc_private.b.firstblock = args.fsbno; | ||
2314 | cur->bc_private.b.allocated++; | ||
2315 | ip->i_d.di_nblocks++; | ||
2316 | @@ -1278,7 +1280,6 @@ xfs_bmap_read_extents( | ||
2317 | /* REFERENCED */ | ||
2318 | xfs_extnum_t room; /* number of entries there's room for */ | ||
2319 | |||
2320 | - bno = NULLFSBLOCK; | ||
2321 | mp = ip->i_mount; | ||
2322 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
2323 | exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE : | ||
2324 | @@ -1291,9 +1292,7 @@ xfs_bmap_read_extents( | ||
2325 | ASSERT(level > 0); | ||
2326 | pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); | ||
2327 | bno = be64_to_cpu(*pp); | ||
2328 | - ASSERT(bno != NULLFSBLOCK); | ||
2329 | - ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); | ||
2330 | - ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); | ||
2331 | + | ||
2332 | /* | ||
2333 | * Go down the tree until leaf level is reached, following the first | ||
2334 | * pointer (leftmost) at each level. | ||
2335 | @@ -1955,6 +1954,7 @@ xfs_bmap_add_extent_delay_real( | ||
2336 | */ | ||
2337 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
2338 | xfs_bmbt_set_startblock(ep, new->br_startblock); | ||
2339 | + xfs_bmbt_set_state(ep, new->br_state); | ||
2340 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
2341 | |||
2342 | (*nextents)++; | ||
2343 | @@ -2293,6 +2293,7 @@ STATIC int /* error */ | ||
2344 | xfs_bmap_add_extent_unwritten_real( | ||
2345 | struct xfs_trans *tp, | ||
2346 | xfs_inode_t *ip, /* incore inode pointer */ | ||
2347 | + int whichfork, | ||
2348 | xfs_extnum_t *idx, /* extent number to update/insert */ | ||
2349 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | ||
2350 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | ||
2351 | @@ -2312,12 +2313,14 @@ xfs_bmap_add_extent_unwritten_real( | ||
2352 | /* left is 0, right is 1, prev is 2 */ | ||
2353 | int rval=0; /* return value (logging flags) */ | ||
2354 | int state = 0;/* state bits, accessed thru macros */ | ||
2355 | - struct xfs_mount *mp = tp->t_mountp; | ||
2356 | + struct xfs_mount *mp = ip->i_mount; | ||
2357 | |||
2358 | *logflagsp = 0; | ||
2359 | |||
2360 | cur = *curp; | ||
2361 | - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | ||
2362 | + ifp = XFS_IFORK_PTR(ip, whichfork); | ||
2363 | + if (whichfork == XFS_COW_FORK) | ||
2364 | + state |= BMAP_COWFORK; | ||
2365 | |||
2366 | ASSERT(*idx >= 0); | ||
2367 | ASSERT(*idx <= xfs_iext_count(ifp)); | ||
2368 | @@ -2376,7 +2379,7 @@ xfs_bmap_add_extent_unwritten_real( | ||
2369 | * Don't set contiguous if the combined extent would be too large. | ||
2370 | * Also check for all-three-contiguous being too large. | ||
2371 | */ | ||
2372 | - if (*idx < xfs_iext_count(&ip->i_df) - 1) { | ||
2373 | + if (*idx < xfs_iext_count(ifp) - 1) { | ||
2374 | state |= BMAP_RIGHT_VALID; | ||
2375 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); | ||
2376 | if (isnullstartblock(RIGHT.br_startblock)) | ||
2377 | @@ -2416,7 +2419,8 @@ xfs_bmap_add_extent_unwritten_real( | ||
2378 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2379 | |||
2380 | xfs_iext_remove(ip, *idx + 1, 2, state); | ||
2381 | - ip->i_d.di_nextents -= 2; | ||
2382 | + XFS_IFORK_NEXT_SET(ip, whichfork, | ||
2383 | + XFS_IFORK_NEXTENTS(ip, whichfork) - 2); | ||
2384 | if (cur == NULL) | ||
2385 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | ||
2386 | else { | ||
2387 | @@ -2459,7 +2463,8 @@ xfs_bmap_add_extent_unwritten_real( | ||
2388 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2389 | |||
2390 | xfs_iext_remove(ip, *idx + 1, 1, state); | ||
2391 | - ip->i_d.di_nextents--; | ||
2392 | + XFS_IFORK_NEXT_SET(ip, whichfork, | ||
2393 | + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); | ||
2394 | if (cur == NULL) | ||
2395 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | ||
2396 | else { | ||
2397 | @@ -2494,7 +2499,8 @@ xfs_bmap_add_extent_unwritten_real( | ||
2398 | xfs_bmbt_set_state(ep, newext); | ||
2399 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2400 | xfs_iext_remove(ip, *idx + 1, 1, state); | ||
2401 | - ip->i_d.di_nextents--; | ||
2402 | + XFS_IFORK_NEXT_SET(ip, whichfork, | ||
2403 | + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); | ||
2404 | if (cur == NULL) | ||
2405 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | ||
2406 | else { | ||
2407 | @@ -2606,7 +2612,8 @@ xfs_bmap_add_extent_unwritten_real( | ||
2408 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2409 | |||
2410 | xfs_iext_insert(ip, *idx, 1, new, state); | ||
2411 | - ip->i_d.di_nextents++; | ||
2412 | + XFS_IFORK_NEXT_SET(ip, whichfork, | ||
2413 | + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); | ||
2414 | if (cur == NULL) | ||
2415 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | ||
2416 | else { | ||
2417 | @@ -2684,7 +2691,8 @@ xfs_bmap_add_extent_unwritten_real( | ||
2418 | ++*idx; | ||
2419 | xfs_iext_insert(ip, *idx, 1, new, state); | ||
2420 | |||
2421 | - ip->i_d.di_nextents++; | ||
2422 | + XFS_IFORK_NEXT_SET(ip, whichfork, | ||
2423 | + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); | ||
2424 | if (cur == NULL) | ||
2425 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | ||
2426 | else { | ||
2427 | @@ -2732,7 +2740,8 @@ xfs_bmap_add_extent_unwritten_real( | ||
2428 | ++*idx; | ||
2429 | xfs_iext_insert(ip, *idx, 2, &r[0], state); | ||
2430 | |||
2431 | - ip->i_d.di_nextents += 2; | ||
2432 | + XFS_IFORK_NEXT_SET(ip, whichfork, | ||
2433 | + XFS_IFORK_NEXTENTS(ip, whichfork) + 2); | ||
2434 | if (cur == NULL) | ||
2435 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | ||
2436 | else { | ||
2437 | @@ -2786,17 +2795,17 @@ xfs_bmap_add_extent_unwritten_real( | ||
2438 | } | ||
2439 | |||
2440 | /* update reverse mappings */ | ||
2441 | - error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new); | ||
2442 | + error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new); | ||
2443 | if (error) | ||
2444 | goto done; | ||
2445 | |||
2446 | /* convert to a btree if necessary */ | ||
2447 | - if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { | ||
2448 | + if (xfs_bmap_needs_btree(ip, whichfork)) { | ||
2449 | int tmp_logflags; /* partial log flag return val */ | ||
2450 | |||
2451 | ASSERT(cur == NULL); | ||
2452 | error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur, | ||
2453 | - 0, &tmp_logflags, XFS_DATA_FORK); | ||
2454 | + 0, &tmp_logflags, whichfork); | ||
2455 | *logflagsp |= tmp_logflags; | ||
2456 | if (error) | ||
2457 | goto done; | ||
2458 | @@ -2808,7 +2817,7 @@ xfs_bmap_add_extent_unwritten_real( | ||
2459 | *curp = cur; | ||
2460 | } | ||
2461 | |||
2462 | - xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK); | ||
2463 | + xfs_bmap_check_leaf_extents(*curp, ip, whichfork); | ||
2464 | done: | ||
2465 | *logflagsp |= rval; | ||
2466 | return error; | ||
2467 | @@ -2900,7 +2909,8 @@ xfs_bmap_add_extent_hole_delay( | ||
2468 | oldlen = startblockval(left.br_startblock) + | ||
2469 | startblockval(new->br_startblock) + | ||
2470 | startblockval(right.br_startblock); | ||
2471 | - newlen = xfs_bmap_worst_indlen(ip, temp); | ||
2472 | + newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | ||
2473 | + oldlen); | ||
2474 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), | ||
2475 | nullstartblock((int)newlen)); | ||
2476 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2477 | @@ -2921,7 +2931,8 @@ xfs_bmap_add_extent_hole_delay( | ||
2478 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); | ||
2479 | oldlen = startblockval(left.br_startblock) + | ||
2480 | startblockval(new->br_startblock); | ||
2481 | - newlen = xfs_bmap_worst_indlen(ip, temp); | ||
2482 | + newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | ||
2483 | + oldlen); | ||
2484 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), | ||
2485 | nullstartblock((int)newlen)); | ||
2486 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2487 | @@ -2937,7 +2948,8 @@ xfs_bmap_add_extent_hole_delay( | ||
2488 | temp = new->br_blockcount + right.br_blockcount; | ||
2489 | oldlen = startblockval(new->br_startblock) + | ||
2490 | startblockval(right.br_startblock); | ||
2491 | - newlen = xfs_bmap_worst_indlen(ip, temp); | ||
2492 | + newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | ||
2493 | + oldlen); | ||
2494 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), | ||
2495 | new->br_startoff, | ||
2496 | nullstartblock((int)newlen), temp, right.br_state); | ||
2497 | @@ -3913,17 +3925,13 @@ xfs_bmap_btalloc( | ||
2498 | * the first block that was allocated. | ||
2499 | */ | ||
2500 | ASSERT(*ap->firstblock == NULLFSBLOCK || | ||
2501 | - XFS_FSB_TO_AGNO(mp, *ap->firstblock) == | ||
2502 | - XFS_FSB_TO_AGNO(mp, args.fsbno) || | ||
2503 | - (ap->dfops->dop_low && | ||
2504 | - XFS_FSB_TO_AGNO(mp, *ap->firstblock) < | ||
2505 | - XFS_FSB_TO_AGNO(mp, args.fsbno))); | ||
2506 | + XFS_FSB_TO_AGNO(mp, *ap->firstblock) <= | ||
2507 | + XFS_FSB_TO_AGNO(mp, args.fsbno)); | ||
2508 | |||
2509 | ap->blkno = args.fsbno; | ||
2510 | if (*ap->firstblock == NULLFSBLOCK) | ||
2511 | *ap->firstblock = args.fsbno; | ||
2512 | - ASSERT(nullfb || fb_agno == args.agno || | ||
2513 | - (ap->dfops->dop_low && fb_agno < args.agno)); | ||
2514 | + ASSERT(nullfb || fb_agno <= args.agno); | ||
2515 | ap->length = args.len; | ||
2516 | if (!(ap->flags & XFS_BMAPI_COWFORK)) | ||
2517 | ap->ip->i_d.di_nblocks += args.len; | ||
2518 | @@ -4249,6 +4257,19 @@ xfs_bmapi_read( | ||
2519 | return 0; | ||
2520 | } | ||
2521 | |||
2522 | +/* | ||
2523 | + * Add a delayed allocation extent to an inode. Blocks are reserved from the | ||
2524 | + * global pool and the extent inserted into the inode in-core extent tree. | ||
2525 | + * | ||
2526 | + * On entry, got refers to the first extent beyond the offset of the extent to | ||
2527 | + * allocate or eof is specified if no such extent exists. On return, got refers | ||
2528 | + * to the extent record that was inserted to the inode fork. | ||
2529 | + * | ||
2530 | + * Note that the allocated extent may have been merged with contiguous extents | ||
2531 | + * during insertion into the inode fork. Thus, got does not reflect the current | ||
2532 | + * state of the inode fork on return. If necessary, the caller can use lastx to | ||
2533 | + * look up the updated record in the inode fork. | ||
2534 | + */ | ||
2535 | int | ||
2536 | xfs_bmapi_reserve_delalloc( | ||
2537 | struct xfs_inode *ip, | ||
2538 | @@ -4335,13 +4356,8 @@ xfs_bmapi_reserve_delalloc( | ||
2539 | got->br_startblock = nullstartblock(indlen); | ||
2540 | got->br_blockcount = alen; | ||
2541 | got->br_state = XFS_EXT_NORM; | ||
2542 | - xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); | ||
2543 | |||
2544 | - /* | ||
2545 | - * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay | ||
2546 | - * might have merged it into one of the neighbouring ones. | ||
2547 | - */ | ||
2548 | - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); | ||
2549 | + xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); | ||
2550 | |||
2551 | /* | ||
2552 | * Tag the inode if blocks were preallocated. Note that COW fork | ||
2553 | @@ -4353,10 +4369,6 @@ xfs_bmapi_reserve_delalloc( | ||
2554 | if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) | ||
2555 | xfs_inode_set_cowblocks_tag(ip); | ||
2556 | |||
2557 | - ASSERT(got->br_startoff <= aoff); | ||
2558 | - ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); | ||
2559 | - ASSERT(isnullstartblock(got->br_startblock)); | ||
2560 | - ASSERT(got->br_state == XFS_EXT_NORM); | ||
2561 | return 0; | ||
2562 | |||
2563 | out_unreserve_blocks: | ||
2564 | @@ -4461,10 +4473,16 @@ xfs_bmapi_allocate( | ||
2565 | bma->got.br_state = XFS_EXT_NORM; | ||
2566 | |||
2567 | /* | ||
2568 | - * A wasdelay extent has been initialized, so shouldn't be flagged | ||
2569 | - * as unwritten. | ||
2570 | + * In the data fork, a wasdelay extent has been initialized, so | ||
2571 | + * shouldn't be flagged as unwritten. | ||
2572 | + * | ||
2573 | + * For the cow fork, however, we convert delalloc reservations | ||
2574 | + * (extents allocated for speculative preallocation) to | ||
2575 | + * allocated unwritten extents, and only convert the unwritten | ||
2576 | + * extents to real extents when we're about to write the data. | ||
2577 | */ | ||
2578 | - if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) && | ||
2579 | + if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) && | ||
2580 | + (bma->flags & XFS_BMAPI_PREALLOC) && | ||
2581 | xfs_sb_version_hasextflgbit(&mp->m_sb)) | ||
2582 | bma->got.br_state = XFS_EXT_UNWRITTEN; | ||
2583 | |||
2584 | @@ -4515,8 +4533,6 @@ xfs_bmapi_convert_unwritten( | ||
2585 | (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) | ||
2586 | return 0; | ||
2587 | |||
2588 | - ASSERT(whichfork != XFS_COW_FORK); | ||
2589 | - | ||
2590 | /* | ||
2591 | * Modify (by adding) the state flag, if writing. | ||
2592 | */ | ||
2593 | @@ -4541,8 +4557,8 @@ xfs_bmapi_convert_unwritten( | ||
2594 | return error; | ||
2595 | } | ||
2596 | |||
2597 | - error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, | ||
2598 | - &bma->cur, mval, bma->firstblock, bma->dfops, | ||
2599 | + error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork, | ||
2600 | + &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops, | ||
2601 | &tmp_logflags); | ||
2602 | /* | ||
2603 | * Log the inode core unconditionally in the unwritten extent conversion | ||
2604 | @@ -4551,8 +4567,12 @@ xfs_bmapi_convert_unwritten( | ||
2605 | * in the transaction for the sake of fsync(), even if nothing has | ||
2606 | * changed, because fsync() will not force the log for this transaction | ||
2607 | * unless it sees the inode pinned. | ||
2608 | + * | ||
2609 | + * Note: If we're only converting cow fork extents, there aren't | ||
2610 | + * any on-disk updates to make, so we don't need to log anything. | ||
2611 | */ | ||
2612 | - bma->logflags |= tmp_logflags | XFS_ILOG_CORE; | ||
2613 | + if (whichfork != XFS_COW_FORK) | ||
2614 | + bma->logflags |= tmp_logflags | XFS_ILOG_CORE; | ||
2615 | if (error) | ||
2616 | return error; | ||
2617 | |||
2618 | @@ -4626,15 +4646,15 @@ xfs_bmapi_write( | ||
2619 | ASSERT(*nmap >= 1); | ||
2620 | ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); | ||
2621 | ASSERT(!(flags & XFS_BMAPI_IGSTATE)); | ||
2622 | - ASSERT(tp != NULL); | ||
2623 | + ASSERT(tp != NULL || | ||
2624 | + (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) == | ||
2625 | + (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)); | ||
2626 | ASSERT(len > 0); | ||
2627 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); | ||
2628 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
2629 | ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK); | ||
2630 | ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP)); | ||
2631 | ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP)); | ||
2632 | - ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK); | ||
2633 | - ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK); | ||
2634 | |||
2635 | /* zeroing is for currently only for data extents, not metadata */ | ||
2636 | ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != | ||
2637 | @@ -4840,13 +4860,9 @@ xfs_bmapi_write( | ||
2638 | if (bma.cur) { | ||
2639 | if (!error) { | ||
2640 | ASSERT(*firstblock == NULLFSBLOCK || | ||
2641 | - XFS_FSB_TO_AGNO(mp, *firstblock) == | ||
2642 | + XFS_FSB_TO_AGNO(mp, *firstblock) <= | ||
2643 | XFS_FSB_TO_AGNO(mp, | ||
2644 | - bma.cur->bc_private.b.firstblock) || | ||
2645 | - (dfops->dop_low && | ||
2646 | - XFS_FSB_TO_AGNO(mp, *firstblock) < | ||
2647 | - XFS_FSB_TO_AGNO(mp, | ||
2648 | - bma.cur->bc_private.b.firstblock))); | ||
2649 | + bma.cur->bc_private.b.firstblock)); | ||
2650 | *firstblock = bma.cur->bc_private.b.firstblock; | ||
2651 | } | ||
2652 | xfs_btree_del_cursor(bma.cur, | ||
2653 | @@ -4881,34 +4897,59 @@ xfs_bmap_split_indlen( | ||
2654 | xfs_filblks_t len2 = *indlen2; | ||
2655 | xfs_filblks_t nres = len1 + len2; /* new total res. */ | ||
2656 | xfs_filblks_t stolen = 0; | ||
2657 | + xfs_filblks_t resfactor; | ||
2658 | |||
2659 | /* | ||
2660 | * Steal as many blocks as we can to try and satisfy the worst case | ||
2661 | * indlen for both new extents. | ||
2662 | */ | ||
2663 | - while (nres > ores && avail) { | ||
2664 | - nres--; | ||
2665 | - avail--; | ||
2666 | - stolen++; | ||
2667 | - } | ||
2668 | + if (ores < nres && avail) | ||
2669 | + stolen = XFS_FILBLKS_MIN(nres - ores, avail); | ||
2670 | + ores += stolen; | ||
2671 | + | ||
2672 | + /* nothing else to do if we've satisfied the new reservation */ | ||
2673 | + if (ores >= nres) | ||
2674 | + return stolen; | ||
2675 | + | ||
2676 | + /* | ||
2677 | + * We can't meet the total required reservation for the two extents. | ||
2678 | + * Calculate the percent of the overall shortage between both extents | ||
2679 | + * and apply this percentage to each of the requested indlen values. | ||
2680 | + * This distributes the shortage fairly and reduces the chances that one | ||
2681 | + * of the two extents is left with nothing when extents are repeatedly | ||
2682 | + * split. | ||
2683 | + */ | ||
2684 | + resfactor = (ores * 100); | ||
2685 | + do_div(resfactor, nres); | ||
2686 | + len1 *= resfactor; | ||
2687 | + do_div(len1, 100); | ||
2688 | + len2 *= resfactor; | ||
2689 | + do_div(len2, 100); | ||
2690 | + ASSERT(len1 + len2 <= ores); | ||
2691 | + ASSERT(len1 < *indlen1 && len2 < *indlen2); | ||
2692 | |||
2693 | /* | ||
2694 | - * The only blocks available are those reserved for the original | ||
2695 | - * extent and what we can steal from the extent being removed. | ||
2696 | - * If this still isn't enough to satisfy the combined | ||
2697 | - * requirements for the two new extents, skim blocks off of each | ||
2698 | - * of the new reservations until they match what is available. | ||
2699 | + * Hand out the remainder to each extent. If one of the two reservations | ||
2700 | + * is zero, we want to make sure that one gets a block first. The loop | ||
2701 | + * below starts with len1, so hand len2 a block right off the bat if it | ||
2702 | + * is zero. | ||
2703 | */ | ||
2704 | - while (nres > ores) { | ||
2705 | - if (len1) { | ||
2706 | - len1--; | ||
2707 | - nres--; | ||
2708 | + ores -= (len1 + len2); | ||
2709 | + ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores); | ||
2710 | + if (ores && !len2 && *indlen2) { | ||
2711 | + len2++; | ||
2712 | + ores--; | ||
2713 | + } | ||
2714 | + while (ores) { | ||
2715 | + if (len1 < *indlen1) { | ||
2716 | + len1++; | ||
2717 | + ores--; | ||
2718 | } | ||
2719 | - if (nres == ores) | ||
2720 | + if (!ores) | ||
2721 | break; | ||
2722 | - if (len2) { | ||
2723 | - len2--; | ||
2724 | - nres--; | ||
2725 | + if (len2 < *indlen2) { | ||
2726 | + len2++; | ||
2727 | + ores--; | ||
2728 | } | ||
2729 | } | ||
2730 | |||
2731 | @@ -5656,8 +5697,8 @@ __xfs_bunmapi( | ||
2732 | } | ||
2733 | del.br_state = XFS_EXT_UNWRITTEN; | ||
2734 | error = xfs_bmap_add_extent_unwritten_real(tp, ip, | ||
2735 | - &lastx, &cur, &del, firstblock, dfops, | ||
2736 | - &logflags); | ||
2737 | + whichfork, &lastx, &cur, &del, | ||
2738 | + firstblock, dfops, &logflags); | ||
2739 | if (error) | ||
2740 | goto error0; | ||
2741 | goto nodelete; | ||
2742 | @@ -5714,8 +5755,9 @@ __xfs_bunmapi( | ||
2743 | prev.br_state = XFS_EXT_UNWRITTEN; | ||
2744 | lastx--; | ||
2745 | error = xfs_bmap_add_extent_unwritten_real(tp, | ||
2746 | - ip, &lastx, &cur, &prev, | ||
2747 | - firstblock, dfops, &logflags); | ||
2748 | + ip, whichfork, &lastx, &cur, | ||
2749 | + &prev, firstblock, dfops, | ||
2750 | + &logflags); | ||
2751 | if (error) | ||
2752 | goto error0; | ||
2753 | goto nodelete; | ||
2754 | @@ -5723,8 +5765,9 @@ __xfs_bunmapi( | ||
2755 | ASSERT(del.br_state == XFS_EXT_NORM); | ||
2756 | del.br_state = XFS_EXT_UNWRITTEN; | ||
2757 | error = xfs_bmap_add_extent_unwritten_real(tp, | ||
2758 | - ip, &lastx, &cur, &del, | ||
2759 | - firstblock, dfops, &logflags); | ||
2760 | + ip, whichfork, &lastx, &cur, | ||
2761 | + &del, firstblock, dfops, | ||
2762 | + &logflags); | ||
2763 | if (error) | ||
2764 | goto error0; | ||
2765 | goto nodelete; | ||
2766 | diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c | ||
2767 | index f76c1693ff01..5c3918678bb6 100644 | ||
2768 | --- a/fs/xfs/libxfs/xfs_bmap_btree.c | ||
2769 | +++ b/fs/xfs/libxfs/xfs_bmap_btree.c | ||
2770 | @@ -453,8 +453,8 @@ xfs_bmbt_alloc_block( | ||
2771 | |||
2772 | if (args.fsbno == NULLFSBLOCK) { | ||
2773 | args.fsbno = be64_to_cpu(start->l); | ||
2774 | -try_another_ag: | ||
2775 | args.type = XFS_ALLOCTYPE_START_BNO; | ||
2776 | +try_another_ag: | ||
2777 | /* | ||
2778 | * Make sure there is sufficient room left in the AG to | ||
2779 | * complete a full tree split for an extent insert. If | ||
2780 | @@ -494,8 +494,8 @@ xfs_bmbt_alloc_block( | ||
2781 | if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && | ||
2782 | args.fsbno == NULLFSBLOCK && | ||
2783 | args.type == XFS_ALLOCTYPE_NEAR_BNO) { | ||
2784 | - cur->bc_private.b.dfops->dop_low = true; | ||
2785 | args.fsbno = cur->bc_private.b.firstblock; | ||
2786 | + args.type = XFS_ALLOCTYPE_FIRST_AG; | ||
2787 | goto try_another_ag; | ||
2788 | } | ||
2789 | |||
2790 | @@ -512,7 +512,7 @@ xfs_bmbt_alloc_block( | ||
2791 | goto error0; | ||
2792 | cur->bc_private.b.dfops->dop_low = true; | ||
2793 | } | ||
2794 | - if (args.fsbno == NULLFSBLOCK) { | ||
2795 | + if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { | ||
2796 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
2797 | *stat = 0; | ||
2798 | return 0; | ||
2799 | diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c | ||
2800 | index 21e6a6ab6b9a..2849d3fa3d0b 100644 | ||
2801 | --- a/fs/xfs/libxfs/xfs_btree.c | ||
2802 | +++ b/fs/xfs/libxfs/xfs_btree.c | ||
2803 | @@ -810,7 +810,8 @@ xfs_btree_read_bufl( | ||
2804 | xfs_daddr_t d; /* real disk block address */ | ||
2805 | int error; | ||
2806 | |||
2807 | - ASSERT(fsbno != NULLFSBLOCK); | ||
2808 | + if (!XFS_FSB_SANITY_CHECK(mp, fsbno)) | ||
2809 | + return -EFSCORRUPTED; | ||
2810 | d = XFS_FSB_TO_DADDR(mp, fsbno); | ||
2811 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, | ||
2812 | mp->m_bsize, lock, &bp, ops); | ||
2813 | diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h | ||
2814 | index c2b01d1c79ee..3b0fc1afada5 100644 | ||
2815 | --- a/fs/xfs/libxfs/xfs_btree.h | ||
2816 | +++ b/fs/xfs/libxfs/xfs_btree.h | ||
2817 | @@ -491,7 +491,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) | ||
2818 | #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) | ||
2819 | |||
2820 | #define XFS_FSB_SANITY_CHECK(mp,fsb) \ | ||
2821 | - (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ | ||
2822 | + (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ | ||
2823 | XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) | ||
2824 | |||
2825 | /* | ||
2826 | diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c | ||
2827 | index f2dc1a950c85..1bdf2888295b 100644 | ||
2828 | --- a/fs/xfs/libxfs/xfs_da_btree.c | ||
2829 | +++ b/fs/xfs/libxfs/xfs_da_btree.c | ||
2830 | @@ -2633,7 +2633,7 @@ xfs_da_read_buf( | ||
2831 | /* | ||
2832 | * Readahead the dir/attr block. | ||
2833 | */ | ||
2834 | -xfs_daddr_t | ||
2835 | +int | ||
2836 | xfs_da_reada_buf( | ||
2837 | struct xfs_inode *dp, | ||
2838 | xfs_dablk_t bno, | ||
2839 | @@ -2664,7 +2664,5 @@ xfs_da_reada_buf( | ||
2840 | if (mapp != &map) | ||
2841 | kmem_free(mapp); | ||
2842 | |||
2843 | - if (error) | ||
2844 | - return -1; | ||
2845 | - return mappedbno; | ||
2846 | + return error; | ||
2847 | } | ||
2848 | diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h | ||
2849 | index 98c75cbe6ac2..4e29cb6a3627 100644 | ||
2850 | --- a/fs/xfs/libxfs/xfs_da_btree.h | ||
2851 | +++ b/fs/xfs/libxfs/xfs_da_btree.h | ||
2852 | @@ -201,7 +201,7 @@ int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp, | ||
2853 | xfs_dablk_t bno, xfs_daddr_t mappedbno, | ||
2854 | struct xfs_buf **bpp, int whichfork, | ||
2855 | const struct xfs_buf_ops *ops); | ||
2856 | -xfs_daddr_t xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, | ||
2857 | +int xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, | ||
2858 | xfs_daddr_t mapped_bno, int whichfork, | ||
2859 | const struct xfs_buf_ops *ops); | ||
2860 | int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, | ||
2861 | diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c | ||
2862 | index 75a557432d0f..bbd1238852b3 100644 | ||
2863 | --- a/fs/xfs/libxfs/xfs_dir2_node.c | ||
2864 | +++ b/fs/xfs/libxfs/xfs_dir2_node.c | ||
2865 | @@ -155,6 +155,42 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = { | ||
2866 | .verify_write = xfs_dir3_free_write_verify, | ||
2867 | }; | ||
2868 | |||
2869 | +/* Everything ok in the free block header? */ | ||
2870 | +static bool | ||
2871 | +xfs_dir3_free_header_check( | ||
2872 | + struct xfs_inode *dp, | ||
2873 | + xfs_dablk_t fbno, | ||
2874 | + struct xfs_buf *bp) | ||
2875 | +{ | ||
2876 | + struct xfs_mount *mp = dp->i_mount; | ||
2877 | + unsigned int firstdb; | ||
2878 | + int maxbests; | ||
2879 | + | ||
2880 | + maxbests = dp->d_ops->free_max_bests(mp->m_dir_geo); | ||
2881 | + firstdb = (xfs_dir2_da_to_db(mp->m_dir_geo, fbno) - | ||
2882 | + xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) * | ||
2883 | + maxbests; | ||
2884 | + if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
2885 | + struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; | ||
2886 | + | ||
2887 | + if (be32_to_cpu(hdr3->firstdb) != firstdb) | ||
2888 | + return false; | ||
2889 | + if (be32_to_cpu(hdr3->nvalid) > maxbests) | ||
2890 | + return false; | ||
2891 | + if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused)) | ||
2892 | + return false; | ||
2893 | + } else { | ||
2894 | + struct xfs_dir2_free_hdr *hdr = bp->b_addr; | ||
2895 | + | ||
2896 | + if (be32_to_cpu(hdr->firstdb) != firstdb) | ||
2897 | + return false; | ||
2898 | + if (be32_to_cpu(hdr->nvalid) > maxbests) | ||
2899 | + return false; | ||
2900 | + if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused)) | ||
2901 | + return false; | ||
2902 | + } | ||
2903 | + return true; | ||
2904 | +} | ||
2905 | |||
2906 | static int | ||
2907 | __xfs_dir3_free_read( | ||
2908 | @@ -168,11 +204,22 @@ __xfs_dir3_free_read( | ||
2909 | |||
2910 | err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, | ||
2911 | XFS_DATA_FORK, &xfs_dir3_free_buf_ops); | ||
2912 | + if (err || !*bpp) | ||
2913 | + return err; | ||
2914 | + | ||
2915 | + /* Check things that we can't do in the verifier. */ | ||
2916 | + if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) { | ||
2917 | + xfs_buf_ioerror(*bpp, -EFSCORRUPTED); | ||
2918 | + xfs_verifier_error(*bpp); | ||
2919 | + xfs_trans_brelse(tp, *bpp); | ||
2920 | + return -EFSCORRUPTED; | ||
2921 | + } | ||
2922 | |||
2923 | /* try read returns without an error or *bpp if it lands in a hole */ | ||
2924 | - if (!err && tp && *bpp) | ||
2925 | + if (tp) | ||
2926 | xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF); | ||
2927 | - return err; | ||
2928 | + | ||
2929 | + return 0; | ||
2930 | } | ||
2931 | |||
2932 | int | ||
2933 | diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c | ||
2934 | index d45c03779dae..a2818f6e8598 100644 | ||
2935 | --- a/fs/xfs/libxfs/xfs_ialloc.c | ||
2936 | +++ b/fs/xfs/libxfs/xfs_ialloc.c | ||
2937 | @@ -51,8 +51,7 @@ xfs_ialloc_cluster_alignment( | ||
2938 | struct xfs_mount *mp) | ||
2939 | { | ||
2940 | if (xfs_sb_version_hasalign(&mp->m_sb) && | ||
2941 | - mp->m_sb.sb_inoalignmt >= | ||
2942 | - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) | ||
2943 | + mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) | ||
2944 | return mp->m_sb.sb_inoalignmt; | ||
2945 | return 1; | ||
2946 | } | ||
2947 | diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c | ||
2948 | index 6c6b95947e71..b9c351ff0422 100644 | ||
2949 | --- a/fs/xfs/libxfs/xfs_ialloc_btree.c | ||
2950 | +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c | ||
2951 | @@ -82,11 +82,12 @@ xfs_finobt_set_root( | ||
2952 | } | ||
2953 | |||
2954 | STATIC int | ||
2955 | -xfs_inobt_alloc_block( | ||
2956 | +__xfs_inobt_alloc_block( | ||
2957 | struct xfs_btree_cur *cur, | ||
2958 | union xfs_btree_ptr *start, | ||
2959 | union xfs_btree_ptr *new, | ||
2960 | - int *stat) | ||
2961 | + int *stat, | ||
2962 | + enum xfs_ag_resv_type resv) | ||
2963 | { | ||
2964 | xfs_alloc_arg_t args; /* block allocation args */ | ||
2965 | int error; /* error return value */ | ||
2966 | @@ -103,6 +104,7 @@ xfs_inobt_alloc_block( | ||
2967 | args.maxlen = 1; | ||
2968 | args.prod = 1; | ||
2969 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | ||
2970 | + args.resv = resv; | ||
2971 | |||
2972 | error = xfs_alloc_vextent(&args); | ||
2973 | if (error) { | ||
2974 | @@ -123,6 +125,27 @@ xfs_inobt_alloc_block( | ||
2975 | } | ||
2976 | |||
2977 | STATIC int | ||
2978 | +xfs_inobt_alloc_block( | ||
2979 | + struct xfs_btree_cur *cur, | ||
2980 | + union xfs_btree_ptr *start, | ||
2981 | + union xfs_btree_ptr *new, | ||
2982 | + int *stat) | ||
2983 | +{ | ||
2984 | + return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE); | ||
2985 | +} | ||
2986 | + | ||
2987 | +STATIC int | ||
2988 | +xfs_finobt_alloc_block( | ||
2989 | + struct xfs_btree_cur *cur, | ||
2990 | + union xfs_btree_ptr *start, | ||
2991 | + union xfs_btree_ptr *new, | ||
2992 | + int *stat) | ||
2993 | +{ | ||
2994 | + return __xfs_inobt_alloc_block(cur, start, new, stat, | ||
2995 | + XFS_AG_RESV_METADATA); | ||
2996 | +} | ||
2997 | + | ||
2998 | +STATIC int | ||
2999 | xfs_inobt_free_block( | ||
3000 | struct xfs_btree_cur *cur, | ||
3001 | struct xfs_buf *bp) | ||
3002 | @@ -328,7 +351,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = { | ||
3003 | |||
3004 | .dup_cursor = xfs_inobt_dup_cursor, | ||
3005 | .set_root = xfs_finobt_set_root, | ||
3006 | - .alloc_block = xfs_inobt_alloc_block, | ||
3007 | + .alloc_block = xfs_finobt_alloc_block, | ||
3008 | .free_block = xfs_inobt_free_block, | ||
3009 | .get_minrecs = xfs_inobt_get_minrecs, | ||
3010 | .get_maxrecs = xfs_inobt_get_maxrecs, | ||
3011 | @@ -478,3 +501,64 @@ xfs_inobt_rec_check_count( | ||
3012 | return 0; | ||
3013 | } | ||
3014 | #endif /* DEBUG */ | ||
3015 | + | ||
3016 | +static xfs_extlen_t | ||
3017 | +xfs_inobt_max_size( | ||
3018 | + struct xfs_mount *mp) | ||
3019 | +{ | ||
3020 | + /* Bail out if we're uninitialized, which can happen in mkfs. */ | ||
3021 | + if (mp->m_inobt_mxr[0] == 0) | ||
3022 | + return 0; | ||
3023 | + | ||
3024 | + return xfs_btree_calc_size(mp, mp->m_inobt_mnr, | ||
3025 | + (uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock / | ||
3026 | + XFS_INODES_PER_CHUNK); | ||
3027 | +} | ||
3028 | + | ||
3029 | +static int | ||
3030 | +xfs_inobt_count_blocks( | ||
3031 | + struct xfs_mount *mp, | ||
3032 | + xfs_agnumber_t agno, | ||
3033 | + xfs_btnum_t btnum, | ||
3034 | + xfs_extlen_t *tree_blocks) | ||
3035 | +{ | ||
3036 | + struct xfs_buf *agbp; | ||
3037 | + struct xfs_btree_cur *cur; | ||
3038 | + int error; | ||
3039 | + | ||
3040 | + error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); | ||
3041 | + if (error) | ||
3042 | + return error; | ||
3043 | + | ||
3044 | + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, btnum); | ||
3045 | + error = xfs_btree_count_blocks(cur, tree_blocks); | ||
3046 | + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | ||
3047 | + xfs_buf_relse(agbp); | ||
3048 | + | ||
3049 | + return error; | ||
3050 | +} | ||
3051 | + | ||
3052 | +/* | ||
3053 | + * Figure out how many blocks to reserve and how many are used by this btree. | ||
3054 | + */ | ||
3055 | +int | ||
3056 | +xfs_finobt_calc_reserves( | ||
3057 | + struct xfs_mount *mp, | ||
3058 | + xfs_agnumber_t agno, | ||
3059 | + xfs_extlen_t *ask, | ||
3060 | + xfs_extlen_t *used) | ||
3061 | +{ | ||
3062 | + xfs_extlen_t tree_len = 0; | ||
3063 | + int error; | ||
3064 | + | ||
3065 | + if (!xfs_sb_version_hasfinobt(&mp->m_sb)) | ||
3066 | + return 0; | ||
3067 | + | ||
3068 | + error = xfs_inobt_count_blocks(mp, agno, XFS_BTNUM_FINO, &tree_len); | ||
3069 | + if (error) | ||
3070 | + return error; | ||
3071 | + | ||
3072 | + *ask += xfs_inobt_max_size(mp); | ||
3073 | + *used += tree_len; | ||
3074 | + return 0; | ||
3075 | +} | ||
3076 | diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h | ||
3077 | index bd88453217ce..aa81e2e63f3f 100644 | ||
3078 | --- a/fs/xfs/libxfs/xfs_ialloc_btree.h | ||
3079 | +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h | ||
3080 | @@ -72,4 +72,7 @@ int xfs_inobt_rec_check_count(struct xfs_mount *, | ||
3081 | #define xfs_inobt_rec_check_count(mp, rec) 0 | ||
3082 | #endif /* DEBUG */ | ||
3083 | |||
3084 | +int xfs_finobt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
3085 | + xfs_extlen_t *ask, xfs_extlen_t *used); | ||
3086 | + | ||
3087 | #endif /* __XFS_IALLOC_BTREE_H__ */ | ||
3088 | diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c | ||
3089 | index 222e103356c6..25c1e078aef6 100644 | ||
3090 | --- a/fs/xfs/libxfs/xfs_inode_fork.c | ||
3091 | +++ b/fs/xfs/libxfs/xfs_inode_fork.c | ||
3092 | @@ -26,6 +26,7 @@ | ||
3093 | #include "xfs_inode.h" | ||
3094 | #include "xfs_trans.h" | ||
3095 | #include "xfs_inode_item.h" | ||
3096 | +#include "xfs_btree.h" | ||
3097 | #include "xfs_bmap_btree.h" | ||
3098 | #include "xfs_bmap.h" | ||
3099 | #include "xfs_error.h" | ||
3100 | @@ -429,11 +430,13 @@ xfs_iformat_btree( | ||
3101 | /* REFERENCED */ | ||
3102 | int nrecs; | ||
3103 | int size; | ||
3104 | + int level; | ||
3105 | |||
3106 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
3107 | dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); | ||
3108 | size = XFS_BMAP_BROOT_SPACE(mp, dfp); | ||
3109 | nrecs = be16_to_cpu(dfp->bb_numrecs); | ||
3110 | + level = be16_to_cpu(dfp->bb_level); | ||
3111 | |||
3112 | /* | ||
3113 | * blow out if -- fork has less extents than can fit in | ||
3114 | @@ -446,7 +449,8 @@ xfs_iformat_btree( | ||
3115 | XFS_IFORK_MAXEXT(ip, whichfork) || | ||
3116 | XFS_BMDR_SPACE_CALC(nrecs) > | ||
3117 | XFS_DFORK_SIZE(dip, mp, whichfork) || | ||
3118 | - XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { | ||
3119 | + XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) || | ||
3120 | + level == 0 || level > XFS_BTREE_MAXLEVELS) { | ||
3121 | xfs_warn(mp, "corrupt inode %Lu (btree).", | ||
3122 | (unsigned long long) ip->i_ino); | ||
3123 | XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, | ||
3124 | @@ -497,15 +501,14 @@ xfs_iread_extents( | ||
3125 | * We know that the size is valid (it's checked in iformat_btree) | ||
3126 | */ | ||
3127 | ifp->if_bytes = ifp->if_real_bytes = 0; | ||
3128 | - ifp->if_flags |= XFS_IFEXTENTS; | ||
3129 | xfs_iext_add(ifp, 0, nextents); | ||
3130 | error = xfs_bmap_read_extents(tp, ip, whichfork); | ||
3131 | if (error) { | ||
3132 | xfs_iext_destroy(ifp); | ||
3133 | - ifp->if_flags &= ~XFS_IFEXTENTS; | ||
3134 | return error; | ||
3135 | } | ||
3136 | xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); | ||
3137 | + ifp->if_flags |= XFS_IFEXTENTS; | ||
3138 | return 0; | ||
3139 | } | ||
3140 | /* | ||
3141 | diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c | ||
3142 | index 06763f5cc701..0457abe4118a 100644 | ||
3143 | --- a/fs/xfs/xfs_aops.c | ||
3144 | +++ b/fs/xfs/xfs_aops.c | ||
3145 | @@ -279,54 +279,49 @@ xfs_end_io( | ||
3146 | struct xfs_ioend *ioend = | ||
3147 | container_of(work, struct xfs_ioend, io_work); | ||
3148 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | ||
3149 | + xfs_off_t offset = ioend->io_offset; | ||
3150 | + size_t size = ioend->io_size; | ||
3151 | int error = ioend->io_bio->bi_error; | ||
3152 | |||
3153 | /* | ||
3154 | - * Set an error if the mount has shut down and proceed with end I/O | ||
3155 | - * processing so it can perform whatever cleanups are necessary. | ||
3156 | + * Just clean up the in-memory strutures if the fs has been shut down. | ||
3157 | */ | ||
3158 | - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | ||
3159 | + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
3160 | error = -EIO; | ||
3161 | + goto done; | ||
3162 | + } | ||
3163 | |||
3164 | /* | ||
3165 | - * For a CoW extent, we need to move the mapping from the CoW fork | ||
3166 | - * to the data fork. If instead an error happened, just dump the | ||
3167 | - * new blocks. | ||
3168 | + * Clean up any COW blocks on an I/O error. | ||
3169 | */ | ||
3170 | - if (ioend->io_type == XFS_IO_COW) { | ||
3171 | - if (error) | ||
3172 | - goto done; | ||
3173 | - if (ioend->io_bio->bi_error) { | ||
3174 | - error = xfs_reflink_cancel_cow_range(ip, | ||
3175 | - ioend->io_offset, ioend->io_size); | ||
3176 | - goto done; | ||
3177 | + if (unlikely(error)) { | ||
3178 | + switch (ioend->io_type) { | ||
3179 | + case XFS_IO_COW: | ||
3180 | + xfs_reflink_cancel_cow_range(ip, offset, size, true); | ||
3181 | + break; | ||
3182 | } | ||
3183 | - error = xfs_reflink_end_cow(ip, ioend->io_offset, | ||
3184 | - ioend->io_size); | ||
3185 | - if (error) | ||
3186 | - goto done; | ||
3187 | + | ||
3188 | + goto done; | ||
3189 | } | ||
3190 | |||
3191 | /* | ||
3192 | - * For unwritten extents we need to issue transactions to convert a | ||
3193 | - * range to normal written extens after the data I/O has finished. | ||
3194 | - * Detecting and handling completion IO errors is done individually | ||
3195 | - * for each case as different cleanup operations need to be performed | ||
3196 | - * on error. | ||
3197 | + * Success: commit the COW or unwritten blocks if needed. | ||
3198 | */ | ||
3199 | - if (ioend->io_type == XFS_IO_UNWRITTEN) { | ||
3200 | - if (error) | ||
3201 | - goto done; | ||
3202 | - error = xfs_iomap_write_unwritten(ip, ioend->io_offset, | ||
3203 | - ioend->io_size); | ||
3204 | - } else if (ioend->io_append_trans) { | ||
3205 | - error = xfs_setfilesize_ioend(ioend, error); | ||
3206 | - } else { | ||
3207 | - ASSERT(!xfs_ioend_is_append(ioend) || | ||
3208 | - ioend->io_type == XFS_IO_COW); | ||
3209 | + switch (ioend->io_type) { | ||
3210 | + case XFS_IO_COW: | ||
3211 | + error = xfs_reflink_end_cow(ip, offset, size); | ||
3212 | + break; | ||
3213 | + case XFS_IO_UNWRITTEN: | ||
3214 | + error = xfs_iomap_write_unwritten(ip, offset, size); | ||
3215 | + break; | ||
3216 | + default: | ||
3217 | + ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); | ||
3218 | + break; | ||
3219 | } | ||
3220 | |||
3221 | done: | ||
3222 | + if (ioend->io_append_trans) | ||
3223 | + error = xfs_setfilesize_ioend(ioend, error); | ||
3224 | xfs_destroy_ioend(ioend, error); | ||
3225 | } | ||
3226 | |||
3227 | @@ -486,6 +481,12 @@ xfs_submit_ioend( | ||
3228 | struct xfs_ioend *ioend, | ||
3229 | int status) | ||
3230 | { | ||
3231 | + /* Convert CoW extents to regular */ | ||
3232 | + if (!status && ioend->io_type == XFS_IO_COW) { | ||
3233 | + status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), | ||
3234 | + ioend->io_offset, ioend->io_size); | ||
3235 | + } | ||
3236 | + | ||
3237 | /* Reserve log space if we might write beyond the on-disk inode size. */ | ||
3238 | if (!status && | ||
3239 | ioend->io_type != XFS_IO_UNWRITTEN && | ||
3240 | @@ -1257,44 +1258,6 @@ xfs_map_trim_size( | ||
3241 | bh_result->b_size = mapping_size; | ||
3242 | } | ||
3243 | |||
3244 | -/* Bounce unaligned directio writes to the page cache. */ | ||
3245 | -static int | ||
3246 | -xfs_bounce_unaligned_dio_write( | ||
3247 | - struct xfs_inode *ip, | ||
3248 | - xfs_fileoff_t offset_fsb, | ||
3249 | - struct xfs_bmbt_irec *imap) | ||
3250 | -{ | ||
3251 | - struct xfs_bmbt_irec irec; | ||
3252 | - xfs_fileoff_t delta; | ||
3253 | - bool shared; | ||
3254 | - bool x; | ||
3255 | - int error; | ||
3256 | - | ||
3257 | - irec = *imap; | ||
3258 | - if (offset_fsb > irec.br_startoff) { | ||
3259 | - delta = offset_fsb - irec.br_startoff; | ||
3260 | - irec.br_blockcount -= delta; | ||
3261 | - irec.br_startblock += delta; | ||
3262 | - irec.br_startoff = offset_fsb; | ||
3263 | - } | ||
3264 | - error = xfs_reflink_trim_around_shared(ip, &irec, &shared, &x); | ||
3265 | - if (error) | ||
3266 | - return error; | ||
3267 | - | ||
3268 | - /* | ||
3269 | - * We're here because we're trying to do a directio write to a | ||
3270 | - * region that isn't aligned to a filesystem block. If any part | ||
3271 | - * of the extent is shared, fall back to buffered mode to handle | ||
3272 | - * the RMW. This is done by returning -EREMCHG ("remote addr | ||
3273 | - * changed"), which is caught further up the call stack. | ||
3274 | - */ | ||
3275 | - if (shared) { | ||
3276 | - trace_xfs_reflink_bounce_dio_write(ip, imap); | ||
3277 | - return -EREMCHG; | ||
3278 | - } | ||
3279 | - return 0; | ||
3280 | -} | ||
3281 | - | ||
3282 | STATIC int | ||
3283 | __xfs_get_blocks( | ||
3284 | struct inode *inode, | ||
3285 | @@ -1432,13 +1395,6 @@ __xfs_get_blocks( | ||
3286 | if (imap.br_startblock != HOLESTARTBLOCK && | ||
3287 | imap.br_startblock != DELAYSTARTBLOCK && | ||
3288 | (create || !ISUNWRITTEN(&imap))) { | ||
3289 | - if (create && direct && !is_cow) { | ||
3290 | - error = xfs_bounce_unaligned_dio_write(ip, offset_fsb, | ||
3291 | - &imap); | ||
3292 | - if (error) | ||
3293 | - return error; | ||
3294 | - } | ||
3295 | - | ||
3296 | xfs_map_buffer(inode, bh_result, &imap, offset); | ||
3297 | if (ISUNWRITTEN(&imap)) | ||
3298 | set_buffer_unwritten(bh_result); | ||
3299 | diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c | ||
3300 | index efb8ccd6bbf2..5c395e485170 100644 | ||
3301 | --- a/fs/xfs/xfs_bmap_util.c | ||
3302 | +++ b/fs/xfs/xfs_bmap_util.c | ||
3303 | @@ -917,17 +917,18 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) | ||
3304 | */ | ||
3305 | int | ||
3306 | xfs_free_eofblocks( | ||
3307 | - xfs_mount_t *mp, | ||
3308 | - xfs_inode_t *ip, | ||
3309 | - bool need_iolock) | ||
3310 | + struct xfs_inode *ip) | ||
3311 | { | ||
3312 | - xfs_trans_t *tp; | ||
3313 | - int error; | ||
3314 | - xfs_fileoff_t end_fsb; | ||
3315 | - xfs_fileoff_t last_fsb; | ||
3316 | - xfs_filblks_t map_len; | ||
3317 | - int nimaps; | ||
3318 | - xfs_bmbt_irec_t imap; | ||
3319 | + struct xfs_trans *tp; | ||
3320 | + int error; | ||
3321 | + xfs_fileoff_t end_fsb; | ||
3322 | + xfs_fileoff_t last_fsb; | ||
3323 | + xfs_filblks_t map_len; | ||
3324 | + int nimaps; | ||
3325 | + struct xfs_bmbt_irec imap; | ||
3326 | + struct xfs_mount *mp = ip->i_mount; | ||
3327 | + | ||
3328 | + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | ||
3329 | |||
3330 | /* | ||
3331 | * Figure out if there are any blocks beyond the end | ||
3332 | @@ -944,6 +945,10 @@ xfs_free_eofblocks( | ||
3333 | error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); | ||
3334 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
3335 | |||
3336 | + /* | ||
3337 | + * If there are blocks after the end of file, truncate the file to its | ||
3338 | + * current size to free them up. | ||
3339 | + */ | ||
3340 | if (!error && (nimaps != 0) && | ||
3341 | (imap.br_startblock != HOLESTARTBLOCK || | ||
3342 | ip->i_delayed_blks)) { | ||
3343 | @@ -954,22 +959,13 @@ xfs_free_eofblocks( | ||
3344 | if (error) | ||
3345 | return error; | ||
3346 | |||
3347 | - /* | ||
3348 | - * There are blocks after the end of file. | ||
3349 | - * Free them up now by truncating the file to | ||
3350 | - * its current size. | ||
3351 | - */ | ||
3352 | - if (need_iolock) { | ||
3353 | - if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) | ||
3354 | - return -EAGAIN; | ||
3355 | - } | ||
3356 | + /* wait on dio to ensure i_size has settled */ | ||
3357 | + inode_dio_wait(VFS_I(ip)); | ||
3358 | |||
3359 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, | ||
3360 | &tp); | ||
3361 | if (error) { | ||
3362 | ASSERT(XFS_FORCED_SHUTDOWN(mp)); | ||
3363 | - if (need_iolock) | ||
3364 | - xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
3365 | return error; | ||
3366 | } | ||
3367 | |||
3368 | @@ -997,8 +993,6 @@ xfs_free_eofblocks( | ||
3369 | } | ||
3370 | |||
3371 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
3372 | - if (need_iolock) | ||
3373 | - xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
3374 | } | ||
3375 | return error; | ||
3376 | } | ||
3377 | @@ -1393,10 +1387,16 @@ xfs_shift_file_space( | ||
3378 | xfs_fileoff_t stop_fsb; | ||
3379 | xfs_fileoff_t next_fsb; | ||
3380 | xfs_fileoff_t shift_fsb; | ||
3381 | + uint resblks; | ||
3382 | |||
3383 | ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT); | ||
3384 | |||
3385 | if (direction == SHIFT_LEFT) { | ||
3386 | + /* | ||
3387 | + * Reserve blocks to cover potential extent merges after left | ||
3388 | + * shift operations. | ||
3389 | + */ | ||
3390 | + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); | ||
3391 | next_fsb = XFS_B_TO_FSB(mp, offset + len); | ||
3392 | stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size); | ||
3393 | } else { | ||
3394 | @@ -1404,6 +1404,7 @@ xfs_shift_file_space( | ||
3395 | * If right shift, delegate the work of initialization of | ||
3396 | * next_fsb to xfs_bmap_shift_extent as it has ilock held. | ||
3397 | */ | ||
3398 | + resblks = 0; | ||
3399 | next_fsb = NULLFSBLOCK; | ||
3400 | stop_fsb = XFS_B_TO_FSB(mp, offset); | ||
3401 | } | ||
3402 | @@ -1415,7 +1416,7 @@ xfs_shift_file_space( | ||
3403 | * into the accessible region of the file. | ||
3404 | */ | ||
3405 | if (xfs_can_free_eofblocks(ip, true)) { | ||
3406 | - error = xfs_free_eofblocks(mp, ip, false); | ||
3407 | + error = xfs_free_eofblocks(ip); | ||
3408 | if (error) | ||
3409 | return error; | ||
3410 | } | ||
3411 | @@ -1445,21 +1446,14 @@ xfs_shift_file_space( | ||
3412 | } | ||
3413 | |||
3414 | while (!error && !done) { | ||
3415 | - /* | ||
3416 | - * We would need to reserve permanent block for transaction. | ||
3417 | - * This will come into picture when after shifting extent into | ||
3418 | - * hole we found that adjacent extents can be merged which | ||
3419 | - * may lead to freeing of a block during record update. | ||
3420 | - */ | ||
3421 | - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, | ||
3422 | - XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); | ||
3423 | + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, | ||
3424 | + &tp); | ||
3425 | if (error) | ||
3426 | break; | ||
3427 | |||
3428 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
3429 | error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, | ||
3430 | - ip->i_gdquot, ip->i_pdquot, | ||
3431 | - XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, | ||
3432 | + ip->i_gdquot, ip->i_pdquot, resblks, 0, | ||
3433 | XFS_QMOPT_RES_REGBLKS); | ||
3434 | if (error) | ||
3435 | goto out_trans_cancel; | ||
3436 | diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h | ||
3437 | index 68a621a8e0c0..f1005393785c 100644 | ||
3438 | --- a/fs/xfs/xfs_bmap_util.h | ||
3439 | +++ b/fs/xfs/xfs_bmap_util.h | ||
3440 | @@ -63,8 +63,7 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset, | ||
3441 | |||
3442 | /* EOF block manipulation functions */ | ||
3443 | bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); | ||
3444 | -int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip, | ||
3445 | - bool need_iolock); | ||
3446 | +int xfs_free_eofblocks(struct xfs_inode *ip); | ||
3447 | |||
3448 | int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, | ||
3449 | struct xfs_swapext *sx); | ||
3450 | diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c | ||
3451 | index 2975cb2319f4..0306168af332 100644 | ||
3452 | --- a/fs/xfs/xfs_buf_item.c | ||
3453 | +++ b/fs/xfs/xfs_buf_item.c | ||
3454 | @@ -1162,6 +1162,7 @@ xfs_buf_iodone_callbacks( | ||
3455 | */ | ||
3456 | bp->b_last_error = 0; | ||
3457 | bp->b_retries = 0; | ||
3458 | + bp->b_first_retry_time = 0; | ||
3459 | |||
3460 | xfs_buf_do_callbacks(bp); | ||
3461 | bp->b_fspriv = NULL; | ||
3462 | diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c | ||
3463 | index 162dc186cf04..29c2f997aedf 100644 | ||
3464 | --- a/fs/xfs/xfs_extent_busy.c | ||
3465 | +++ b/fs/xfs/xfs_extent_busy.c | ||
3466 | @@ -45,18 +45,7 @@ xfs_extent_busy_insert( | ||
3467 | struct rb_node **rbp; | ||
3468 | struct rb_node *parent = NULL; | ||
3469 | |||
3470 | - new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL); | ||
3471 | - if (!new) { | ||
3472 | - /* | ||
3473 | - * No Memory! Since it is now not possible to track the free | ||
3474 | - * block, make this a synchronous transaction to insure that | ||
3475 | - * the block is not reused before this transaction commits. | ||
3476 | - */ | ||
3477 | - trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len); | ||
3478 | - xfs_trans_set_sync(tp); | ||
3479 | - return; | ||
3480 | - } | ||
3481 | - | ||
3482 | + new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_SLEEP); | ||
3483 | new->agno = agno; | ||
3484 | new->bno = bno; | ||
3485 | new->length = len; | ||
3486 | diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c | ||
3487 | index 9a5d64b5f35a..1209ad29e902 100644 | ||
3488 | --- a/fs/xfs/xfs_file.c | ||
3489 | +++ b/fs/xfs/xfs_file.c | ||
3490 | @@ -554,6 +554,15 @@ xfs_file_dio_aio_write( | ||
3491 | if ((iocb->ki_pos & mp->m_blockmask) || | ||
3492 | ((iocb->ki_pos + count) & mp->m_blockmask)) { | ||
3493 | unaligned_io = 1; | ||
3494 | + | ||
3495 | + /* | ||
3496 | + * We can't properly handle unaligned direct I/O to reflink | ||
3497 | + * files yet, as we can't unshare a partial block. | ||
3498 | + */ | ||
3499 | + if (xfs_is_reflink_inode(ip)) { | ||
3500 | + trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count); | ||
3501 | + return -EREMCHG; | ||
3502 | + } | ||
3503 | iolock = XFS_IOLOCK_EXCL; | ||
3504 | } else { | ||
3505 | iolock = XFS_IOLOCK_SHARED; | ||
3506 | @@ -675,8 +684,10 @@ xfs_file_buffered_aio_write( | ||
3507 | struct xfs_inode *ip = XFS_I(inode); | ||
3508 | ssize_t ret; | ||
3509 | int enospc = 0; | ||
3510 | - int iolock = XFS_IOLOCK_EXCL; | ||
3511 | + int iolock; | ||
3512 | |||
3513 | +write_retry: | ||
3514 | + iolock = XFS_IOLOCK_EXCL; | ||
3515 | xfs_rw_ilock(ip, iolock); | ||
3516 | |||
3517 | ret = xfs_file_aio_write_checks(iocb, from, &iolock); | ||
3518 | @@ -686,7 +697,6 @@ xfs_file_buffered_aio_write( | ||
3519 | /* We can write back this queue in page reclaim */ | ||
3520 | current->backing_dev_info = inode_to_bdi(inode); | ||
3521 | |||
3522 | -write_retry: | ||
3523 | trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos); | ||
3524 | ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops); | ||
3525 | if (likely(ret >= 0)) | ||
3526 | @@ -702,18 +712,21 @@ xfs_file_buffered_aio_write( | ||
3527 | * running at the same time. | ||
3528 | */ | ||
3529 | if (ret == -EDQUOT && !enospc) { | ||
3530 | + xfs_rw_iunlock(ip, iolock); | ||
3531 | enospc = xfs_inode_free_quota_eofblocks(ip); | ||
3532 | if (enospc) | ||
3533 | goto write_retry; | ||
3534 | enospc = xfs_inode_free_quota_cowblocks(ip); | ||
3535 | if (enospc) | ||
3536 | goto write_retry; | ||
3537 | + iolock = 0; | ||
3538 | } else if (ret == -ENOSPC && !enospc) { | ||
3539 | struct xfs_eofblocks eofb = {0}; | ||
3540 | |||
3541 | enospc = 1; | ||
3542 | xfs_flush_inodes(ip->i_mount); | ||
3543 | - eofb.eof_scan_owner = ip->i_ino; /* for locking */ | ||
3544 | + | ||
3545 | + xfs_rw_iunlock(ip, iolock); | ||
3546 | eofb.eof_flags = XFS_EOF_FLAGS_SYNC; | ||
3547 | xfs_icache_free_eofblocks(ip->i_mount, &eofb); | ||
3548 | goto write_retry; | ||
3549 | @@ -721,7 +734,8 @@ xfs_file_buffered_aio_write( | ||
3550 | |||
3551 | current->backing_dev_info = NULL; | ||
3552 | out: | ||
3553 | - xfs_rw_iunlock(ip, iolock); | ||
3554 | + if (iolock) | ||
3555 | + xfs_rw_iunlock(ip, iolock); | ||
3556 | return ret; | ||
3557 | } | ||
3558 | |||
3559 | @@ -987,9 +1001,9 @@ xfs_dir_open( | ||
3560 | */ | ||
3561 | mode = xfs_ilock_data_map_shared(ip); | ||
3562 | if (ip->i_d.di_nextents > 0) | ||
3563 | - xfs_dir3_data_readahead(ip, 0, -1); | ||
3564 | + error = xfs_dir3_data_readahead(ip, 0, -1); | ||
3565 | xfs_iunlock(ip, mode); | ||
3566 | - return 0; | ||
3567 | + return error; | ||
3568 | } | ||
3569 | |||
3570 | STATIC int | ||
3571 | diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c | ||
3572 | index 29cc9886a3cb..3fb1f3fb8efe 100644 | ||
3573 | --- a/fs/xfs/xfs_icache.c | ||
3574 | +++ b/fs/xfs/xfs_icache.c | ||
3575 | @@ -1324,13 +1324,10 @@ xfs_inode_free_eofblocks( | ||
3576 | int flags, | ||
3577 | void *args) | ||
3578 | { | ||
3579 | - int ret; | ||
3580 | + int ret = 0; | ||
3581 | struct xfs_eofblocks *eofb = args; | ||
3582 | - bool need_iolock = true; | ||
3583 | int match; | ||
3584 | |||
3585 | - ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); | ||
3586 | - | ||
3587 | if (!xfs_can_free_eofblocks(ip, false)) { | ||
3588 | /* inode could be preallocated or append-only */ | ||
3589 | trace_xfs_inode_free_eofblocks_invalid(ip); | ||
3590 | @@ -1358,21 +1355,19 @@ xfs_inode_free_eofblocks( | ||
3591 | if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && | ||
3592 | XFS_ISIZE(ip) < eofb->eof_min_file_size) | ||
3593 | return 0; | ||
3594 | - | ||
3595 | - /* | ||
3596 | - * A scan owner implies we already hold the iolock. Skip it in | ||
3597 | - * xfs_free_eofblocks() to avoid deadlock. This also eliminates | ||
3598 | - * the possibility of EAGAIN being returned. | ||
3599 | - */ | ||
3600 | - if (eofb->eof_scan_owner == ip->i_ino) | ||
3601 | - need_iolock = false; | ||
3602 | } | ||
3603 | |||
3604 | - ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock); | ||
3605 | - | ||
3606 | - /* don't revisit the inode if we're not waiting */ | ||
3607 | - if (ret == -EAGAIN && !(flags & SYNC_WAIT)) | ||
3608 | - ret = 0; | ||
3609 | + /* | ||
3610 | + * If the caller is waiting, return -EAGAIN to keep the background | ||
3611 | + * scanner moving and revisit the inode in a subsequent pass. | ||
3612 | + */ | ||
3613 | + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { | ||
3614 | + if (flags & SYNC_WAIT) | ||
3615 | + ret = -EAGAIN; | ||
3616 | + return ret; | ||
3617 | + } | ||
3618 | + ret = xfs_free_eofblocks(ip); | ||
3619 | + xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
3620 | |||
3621 | return ret; | ||
3622 | } | ||
3623 | @@ -1419,15 +1414,10 @@ __xfs_inode_free_quota_eofblocks( | ||
3624 | struct xfs_eofblocks eofb = {0}; | ||
3625 | struct xfs_dquot *dq; | ||
3626 | |||
3627 | - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | ||
3628 | - | ||
3629 | /* | ||
3630 | - * Set the scan owner to avoid a potential livelock. Otherwise, the scan | ||
3631 | - * can repeatedly trylock on the inode we're currently processing. We | ||
3632 | - * run a sync scan to increase effectiveness and use the union filter to | ||
3633 | + * Run a sync scan to increase effectiveness and use the union filter to | ||
3634 | * cover all applicable quotas in a single scan. | ||
3635 | */ | ||
3636 | - eofb.eof_scan_owner = ip->i_ino; | ||
3637 | eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC; | ||
3638 | |||
3639 | if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) { | ||
3640 | @@ -1579,12 +1569,9 @@ xfs_inode_free_cowblocks( | ||
3641 | { | ||
3642 | int ret; | ||
3643 | struct xfs_eofblocks *eofb = args; | ||
3644 | - bool need_iolock = true; | ||
3645 | int match; | ||
3646 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); | ||
3647 | |||
3648 | - ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); | ||
3649 | - | ||
3650 | /* | ||
3651 | * Just clear the tag if we have an empty cow fork or none at all. It's | ||
3652 | * possible the inode was fully unshared since it was originally tagged. | ||
3653 | @@ -1617,28 +1604,16 @@ xfs_inode_free_cowblocks( | ||
3654 | if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && | ||
3655 | XFS_ISIZE(ip) < eofb->eof_min_file_size) | ||
3656 | return 0; | ||
3657 | - | ||
3658 | - /* | ||
3659 | - * A scan owner implies we already hold the iolock. Skip it in | ||
3660 | - * xfs_free_eofblocks() to avoid deadlock. This also eliminates | ||
3661 | - * the possibility of EAGAIN being returned. | ||
3662 | - */ | ||
3663 | - if (eofb->eof_scan_owner == ip->i_ino) | ||
3664 | - need_iolock = false; | ||
3665 | } | ||
3666 | |||
3667 | /* Free the CoW blocks */ | ||
3668 | - if (need_iolock) { | ||
3669 | - xfs_ilock(ip, XFS_IOLOCK_EXCL); | ||
3670 | - xfs_ilock(ip, XFS_MMAPLOCK_EXCL); | ||
3671 | - } | ||
3672 | + xfs_ilock(ip, XFS_IOLOCK_EXCL); | ||
3673 | + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); | ||
3674 | |||
3675 | - ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); | ||
3676 | + ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); | ||
3677 | |||
3678 | - if (need_iolock) { | ||
3679 | - xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); | ||
3680 | - xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
3681 | - } | ||
3682 | + xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); | ||
3683 | + xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
3684 | |||
3685 | return ret; | ||
3686 | } | ||
3687 | diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h | ||
3688 | index a1e02f4708ab..8a7c849b4dea 100644 | ||
3689 | --- a/fs/xfs/xfs_icache.h | ||
3690 | +++ b/fs/xfs/xfs_icache.h | ||
3691 | @@ -27,7 +27,6 @@ struct xfs_eofblocks { | ||
3692 | kgid_t eof_gid; | ||
3693 | prid_t eof_prid; | ||
3694 | __u64 eof_min_file_size; | ||
3695 | - xfs_ino_t eof_scan_owner; | ||
3696 | }; | ||
3697 | |||
3698 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ | ||
3699 | @@ -102,7 +101,6 @@ xfs_fs_eofblocks_from_user( | ||
3700 | dst->eof_flags = src->eof_flags; | ||
3701 | dst->eof_prid = src->eof_prid; | ||
3702 | dst->eof_min_file_size = src->eof_min_file_size; | ||
3703 | - dst->eof_scan_owner = NULLFSINO; | ||
3704 | |||
3705 | dst->eof_uid = INVALID_UID; | ||
3706 | if (src->eof_flags & XFS_EOF_FLAGS_UID) { | ||
3707 | diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c | ||
3708 | index 512ff13ed66a..e50636c9a89c 100644 | ||
3709 | --- a/fs/xfs/xfs_inode.c | ||
3710 | +++ b/fs/xfs/xfs_inode.c | ||
3711 | @@ -1624,7 +1624,7 @@ xfs_itruncate_extents( | ||
3712 | |||
3713 | /* Remove all pending CoW reservations. */ | ||
3714 | error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block, | ||
3715 | - last_block); | ||
3716 | + last_block, true); | ||
3717 | if (error) | ||
3718 | goto out; | ||
3719 | |||
3720 | @@ -1701,32 +1701,34 @@ xfs_release( | ||
3721 | if (xfs_can_free_eofblocks(ip, false)) { | ||
3722 | |||
3723 | /* | ||
3724 | + * Check if the inode is being opened, written and closed | ||
3725 | + * frequently and we have delayed allocation blocks outstanding | ||
3726 | + * (e.g. streaming writes from the NFS server), truncating the | ||
3727 | + * blocks past EOF will cause fragmentation to occur. | ||
3728 | + * | ||
3729 | + * In this case don't do the truncation, but we have to be | ||
3730 | + * careful how we detect this case. Blocks beyond EOF show up as | ||
3731 | + * i_delayed_blks even when the inode is clean, so we need to | ||
3732 | + * truncate them away first before checking for a dirty release. | ||
3733 | + * Hence on the first dirty close we will still remove the | ||
3734 | + * speculative allocation, but after that we will leave it in | ||
3735 | + * place. | ||
3736 | + */ | ||
3737 | + if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) | ||
3738 | + return 0; | ||
3739 | + /* | ||
3740 | * If we can't get the iolock just skip truncating the blocks | ||
3741 | * past EOF because we could deadlock with the mmap_sem | ||
3742 | - * otherwise. We'll get another chance to drop them once the | ||
3743 | + * otherwise. We'll get another chance to drop them once the | ||
3744 | * last reference to the inode is dropped, so we'll never leak | ||
3745 | * blocks permanently. | ||
3746 | - * | ||
3747 | - * Further, check if the inode is being opened, written and | ||
3748 | - * closed frequently and we have delayed allocation blocks | ||
3749 | - * outstanding (e.g. streaming writes from the NFS server), | ||
3750 | - * truncating the blocks past EOF will cause fragmentation to | ||
3751 | - * occur. | ||
3752 | - * | ||
3753 | - * In this case don't do the truncation, either, but we have to | ||
3754 | - * be careful how we detect this case. Blocks beyond EOF show | ||
3755 | - * up as i_delayed_blks even when the inode is clean, so we | ||
3756 | - * need to truncate them away first before checking for a dirty | ||
3757 | - * release. Hence on the first dirty close we will still remove | ||
3758 | - * the speculative allocation, but after that we will leave it | ||
3759 | - * in place. | ||
3760 | */ | ||
3761 | - if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) | ||
3762 | - return 0; | ||
3763 | - | ||
3764 | - error = xfs_free_eofblocks(mp, ip, true); | ||
3765 | - if (error && error != -EAGAIN) | ||
3766 | - return error; | ||
3767 | + if (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { | ||
3768 | + error = xfs_free_eofblocks(ip); | ||
3769 | + xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
3770 | + if (error) | ||
3771 | + return error; | ||
3772 | + } | ||
3773 | |||
3774 | /* delalloc blocks after truncation means it really is dirty */ | ||
3775 | if (ip->i_delayed_blks) | ||
3776 | @@ -1801,22 +1803,23 @@ xfs_inactive_ifree( | ||
3777 | int error; | ||
3778 | |||
3779 | /* | ||
3780 | - * The ifree transaction might need to allocate blocks for record | ||
3781 | - * insertion to the finobt. We don't want to fail here at ENOSPC, so | ||
3782 | - * allow ifree to dip into the reserved block pool if necessary. | ||
3783 | - * | ||
3784 | - * Freeing large sets of inodes generally means freeing inode chunks, | ||
3785 | - * directory and file data blocks, so this should be relatively safe. | ||
3786 | - * Only under severe circumstances should it be possible to free enough | ||
3787 | - * inodes to exhaust the reserve block pool via finobt expansion while | ||
3788 | - * at the same time not creating free space in the filesystem. | ||
3789 | + * We try to use a per-AG reservation for any block needed by the finobt | ||
3790 | + * tree, but as the finobt feature predates the per-AG reservation | ||
3791 | + * support a degraded file system might not have enough space for the | ||
3792 | + * reservation at mount time. In that case try to dip into the reserved | ||
3793 | + * pool and pray. | ||
3794 | * | ||
3795 | * Send a warning if the reservation does happen to fail, as the inode | ||
3796 | * now remains allocated and sits on the unlinked list until the fs is | ||
3797 | * repaired. | ||
3798 | */ | ||
3799 | - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, | ||
3800 | - XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); | ||
3801 | + if (unlikely(mp->m_inotbt_nores)) { | ||
3802 | + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, | ||
3803 | + XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, | ||
3804 | + &tp); | ||
3805 | + } else { | ||
3806 | + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp); | ||
3807 | + } | ||
3808 | if (error) { | ||
3809 | if (error == -ENOSPC) { | ||
3810 | xfs_warn_ratelimited(mp, | ||
3811 | @@ -1912,8 +1915,11 @@ xfs_inactive( | ||
3812 | * cache. Post-eof blocks must be freed, lest we end up with | ||
3813 | * broken free space accounting. | ||
3814 | */ | ||
3815 | - if (xfs_can_free_eofblocks(ip, true)) | ||
3816 | - xfs_free_eofblocks(mp, ip, false); | ||
3817 | + if (xfs_can_free_eofblocks(ip, true)) { | ||
3818 | + xfs_ilock(ip, XFS_IOLOCK_EXCL); | ||
3819 | + xfs_free_eofblocks(ip); | ||
3820 | + xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
3821 | + } | ||
3822 | |||
3823 | return; | ||
3824 | } | ||
3825 | diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c | ||
3826 | index e8889614cec3..360562484e7b 100644 | ||
3827 | --- a/fs/xfs/xfs_iomap.c | ||
3828 | +++ b/fs/xfs/xfs_iomap.c | ||
3829 | @@ -637,6 +637,11 @@ xfs_file_iomap_begin_delay( | ||
3830 | goto out_unlock; | ||
3831 | } | ||
3832 | |||
3833 | + /* | ||
3834 | + * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch | ||
3835 | + * them out if the write happens to fail. | ||
3836 | + */ | ||
3837 | + iomap->flags = IOMAP_F_NEW; | ||
3838 | trace_xfs_iomap_alloc(ip, offset, count, 0, &got); | ||
3839 | done: | ||
3840 | if (isnullstartblock(got.br_startblock)) | ||
3841 | @@ -685,7 +690,7 @@ xfs_iomap_write_allocate( | ||
3842 | int nres; | ||
3843 | |||
3844 | if (whichfork == XFS_COW_FORK) | ||
3845 | - flags |= XFS_BMAPI_COWFORK; | ||
3846 | + flags |= XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC; | ||
3847 | |||
3848 | /* | ||
3849 | * Make sure that the dquots are there. | ||
3850 | @@ -1061,7 +1066,8 @@ xfs_file_iomap_end_delalloc( | ||
3851 | struct xfs_inode *ip, | ||
3852 | loff_t offset, | ||
3853 | loff_t length, | ||
3854 | - ssize_t written) | ||
3855 | + ssize_t written, | ||
3856 | + struct iomap *iomap) | ||
3857 | { | ||
3858 | struct xfs_mount *mp = ip->i_mount; | ||
3859 | xfs_fileoff_t start_fsb; | ||
3860 | @@ -1080,14 +1086,14 @@ xfs_file_iomap_end_delalloc( | ||
3861 | end_fsb = XFS_B_TO_FSB(mp, offset + length); | ||
3862 | |||
3863 | /* | ||
3864 | - * Trim back delalloc blocks if we didn't manage to write the whole | ||
3865 | - * range reserved. | ||
3866 | + * Trim delalloc blocks if they were allocated by this write and we | ||
3867 | + * didn't manage to write the whole range. | ||
3868 | * | ||
3869 | * We don't need to care about racing delalloc as we hold i_mutex | ||
3870 | * across the reserve/allocate/unreserve calls. If there are delalloc | ||
3871 | * blocks in the range, they are ours. | ||
3872 | */ | ||
3873 | - if (start_fsb < end_fsb) { | ||
3874 | + if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) { | ||
3875 | truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), | ||
3876 | XFS_FSB_TO_B(mp, end_fsb) - 1); | ||
3877 | |||
3878 | @@ -1117,7 +1123,7 @@ xfs_file_iomap_end( | ||
3879 | { | ||
3880 | if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) | ||
3881 | return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, | ||
3882 | - length, written); | ||
3883 | + length, written, iomap); | ||
3884 | return 0; | ||
3885 | } | ||
3886 | |||
3887 | diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c | ||
3888 | index b341f10cf481..13796f212f98 100644 | ||
3889 | --- a/fs/xfs/xfs_mount.c | ||
3890 | +++ b/fs/xfs/xfs_mount.c | ||
3891 | @@ -502,8 +502,7 @@ STATIC void | ||
3892 | xfs_set_inoalignment(xfs_mount_t *mp) | ||
3893 | { | ||
3894 | if (xfs_sb_version_hasalign(&mp->m_sb) && | ||
3895 | - mp->m_sb.sb_inoalignmt >= | ||
3896 | - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) | ||
3897 | + mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) | ||
3898 | mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; | ||
3899 | else | ||
3900 | mp->m_inoalign_mask = 0; | ||
3901 | diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h | ||
3902 | index 819b80b15bfb..1bf878b0492c 100644 | ||
3903 | --- a/fs/xfs/xfs_mount.h | ||
3904 | +++ b/fs/xfs/xfs_mount.h | ||
3905 | @@ -140,6 +140,7 @@ typedef struct xfs_mount { | ||
3906 | int m_fixedfsid[2]; /* unchanged for life of FS */ | ||
3907 | uint m_dmevmask; /* DMI events for this FS */ | ||
3908 | __uint64_t m_flags; /* global mount flags */ | ||
3909 | + bool m_inotbt_nores; /* no per-AG finobt resv. */ | ||
3910 | int m_ialloc_inos; /* inodes in inode allocation */ | ||
3911 | int m_ialloc_blks; /* blocks in inode allocation */ | ||
3912 | int m_ialloc_min_blks;/* min blocks in sparse inode | ||
3913 | diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c | ||
3914 | index 4d3f74e3c5e1..2252f163c38f 100644 | ||
3915 | --- a/fs/xfs/xfs_reflink.c | ||
3916 | +++ b/fs/xfs/xfs_reflink.c | ||
3917 | @@ -82,11 +82,22 @@ | ||
3918 | * mappings are a reservation against the free space in the filesystem; | ||
3919 | * adjacent mappings can also be combined into fewer larger mappings. | ||
3920 | * | ||
3921 | + * As an optimization, the CoW extent size hint (cowextsz) creates | ||
3922 | + * outsized aligned delalloc reservations in the hope of landing out of | ||
3923 | + * order nearby CoW writes in a single extent on disk, thereby reducing | ||
3924 | + * fragmentation and improving future performance. | ||
3925 | + * | ||
3926 | + * D: --RRRRRRSSSRRRRRRRR--- (data fork) | ||
3927 | + * C: ------DDDDDDD--------- (CoW fork) | ||
3928 | + * | ||
3929 | * When dirty pages are being written out (typically in writepage), the | ||
3930 | - * delalloc reservations are converted into real mappings by allocating | ||
3931 | - * blocks and replacing the delalloc mapping with real ones. A delalloc | ||
3932 | - * mapping can be replaced by several real ones if the free space is | ||
3933 | - * fragmented. | ||
3934 | + * delalloc reservations are converted into unwritten mappings by | ||
3935 | + * allocating blocks and replacing the delalloc mapping with real ones. | ||
3936 | + * A delalloc mapping can be replaced by several unwritten ones if the | ||
3937 | + * free space is fragmented. | ||
3938 | + * | ||
3939 | + * D: --RRRRRRSSSRRRRRRRR--- | ||
3940 | + * C: ------UUUUUUU--------- | ||
3941 | * | ||
3942 | * We want to adapt the delalloc mechanism for copy-on-write, since the | ||
3943 | * write paths are similar. The first two steps (creating the reservation | ||
3944 | @@ -101,13 +112,29 @@ | ||
3945 | * Block-aligned directio writes will use the same mechanism as buffered | ||
3946 | * writes. | ||
3947 | * | ||
3948 | + * Just prior to submitting the actual disk write requests, we convert | ||
3949 | + * the extents representing the range of the file actually being written | ||
3950 | + * (as opposed to extra pieces created for the cowextsize hint) to real | ||
3951 | + * extents. This will become important in the next step: | ||
3952 | + * | ||
3953 | + * D: --RRRRRRSSSRRRRRRRR--- | ||
3954 | + * C: ------UUrrUUU--------- | ||
3955 | + * | ||
3956 | * CoW remapping must be done after the data block write completes, | ||
3957 | * because we don't want to destroy the old data fork map until we're sure | ||
3958 | * the new block has been written. Since the new mappings are kept in a | ||
3959 | * separate fork, we can simply iterate these mappings to find the ones | ||
3960 | * that cover the file blocks that we just CoW'd. For each extent, simply | ||
3961 | * unmap the corresponding range in the data fork, map the new range into | ||
3962 | - * the data fork, and remove the extent from the CoW fork. | ||
3963 | + * the data fork, and remove the extent from the CoW fork. Because of | ||
3964 | + * the presence of the cowextsize hint, however, we must be careful | ||
3965 | + * only to remap the blocks that we've actually written out -- we must | ||
3966 | + * never remap delalloc reservations nor CoW staging blocks that have | ||
3967 | + * yet to be written. This corresponds exactly to the real extents in | ||
3968 | + * the CoW fork: | ||
3969 | + * | ||
3970 | + * D: --RRRRRRrrSRRRRRRRR--- | ||
3971 | + * C: ------UU--UUU--------- | ||
3972 | * | ||
3973 | * Since the remapping operation can be applied to an arbitrary file | ||
3974 | * range, we record the need for the remap step as a flag in the ioend | ||
3975 | @@ -296,6 +323,65 @@ xfs_reflink_reserve_cow( | ||
3976 | return 0; | ||
3977 | } | ||
3978 | |||
3979 | +/* Convert part of an unwritten CoW extent to a real one. */ | ||
3980 | +STATIC int | ||
3981 | +xfs_reflink_convert_cow_extent( | ||
3982 | + struct xfs_inode *ip, | ||
3983 | + struct xfs_bmbt_irec *imap, | ||
3984 | + xfs_fileoff_t offset_fsb, | ||
3985 | + xfs_filblks_t count_fsb, | ||
3986 | + struct xfs_defer_ops *dfops) | ||
3987 | +{ | ||
3988 | + struct xfs_bmbt_irec irec = *imap; | ||
3989 | + xfs_fsblock_t first_block; | ||
3990 | + int nimaps = 1; | ||
3991 | + | ||
3992 | + if (imap->br_state == XFS_EXT_NORM) | ||
3993 | + return 0; | ||
3994 | + | ||
3995 | + xfs_trim_extent(&irec, offset_fsb, count_fsb); | ||
3996 | + trace_xfs_reflink_convert_cow(ip, &irec); | ||
3997 | + if (irec.br_blockcount == 0) | ||
3998 | + return 0; | ||
3999 | + return xfs_bmapi_write(NULL, ip, irec.br_startoff, irec.br_blockcount, | ||
4000 | + XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block, | ||
4001 | + 0, &irec, &nimaps, dfops); | ||
4002 | +} | ||
4003 | + | ||
4004 | +/* Convert all of the unwritten CoW extents in a file's range to real ones. */ | ||
4005 | +int | ||
4006 | +xfs_reflink_convert_cow( | ||
4007 | + struct xfs_inode *ip, | ||
4008 | + xfs_off_t offset, | ||
4009 | + xfs_off_t count) | ||
4010 | +{ | ||
4011 | + struct xfs_bmbt_irec got; | ||
4012 | + struct xfs_defer_ops dfops; | ||
4013 | + struct xfs_mount *mp = ip->i_mount; | ||
4014 | + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); | ||
4015 | + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
4016 | + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); | ||
4017 | + xfs_extnum_t idx; | ||
4018 | + bool found; | ||
4019 | + int error = 0; | ||
4020 | + | ||
4021 | + xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
4022 | + | ||
4023 | + /* Convert all the extents to real from unwritten. */ | ||
4024 | + for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); | ||
4025 | + found && got.br_startoff < end_fsb; | ||
4026 | + found = xfs_iext_get_extent(ifp, ++idx, &got)) { | ||
4027 | + error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb, | ||
4028 | + end_fsb - offset_fsb, &dfops); | ||
4029 | + if (error) | ||
4030 | + break; | ||
4031 | + } | ||
4032 | + | ||
4033 | + /* Finish up. */ | ||
4034 | + xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
4035 | + return error; | ||
4036 | +} | ||
4037 | + | ||
4038 | /* Allocate all CoW reservations covering a range of blocks in a file. */ | ||
4039 | static int | ||
4040 | __xfs_reflink_allocate_cow( | ||
4041 | @@ -328,6 +414,7 @@ __xfs_reflink_allocate_cow( | ||
4042 | goto out_unlock; | ||
4043 | ASSERT(nimaps == 1); | ||
4044 | |||
4045 | + /* Make sure there's a CoW reservation for it. */ | ||
4046 | error = xfs_reflink_reserve_cow(ip, &imap, &shared); | ||
4047 | if (error) | ||
4048 | goto out_trans_cancel; | ||
4049 | @@ -337,14 +424,16 @@ __xfs_reflink_allocate_cow( | ||
4050 | goto out_trans_cancel; | ||
4051 | } | ||
4052 | |||
4053 | + /* Allocate the entire reservation as unwritten blocks. */ | ||
4054 | xfs_trans_ijoin(tp, ip, 0); | ||
4055 | error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount, | ||
4056 | - XFS_BMAPI_COWFORK, &first_block, | ||
4057 | + XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block, | ||
4058 | XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), | ||
4059 | &imap, &nimaps, &dfops); | ||
4060 | if (error) | ||
4061 | goto out_trans_cancel; | ||
4062 | |||
4063 | + /* Finish up. */ | ||
4064 | error = xfs_defer_finish(&tp, &dfops, NULL); | ||
4065 | if (error) | ||
4066 | goto out_trans_cancel; | ||
4067 | @@ -389,11 +478,12 @@ xfs_reflink_allocate_cow_range( | ||
4068 | if (error) { | ||
4069 | trace_xfs_reflink_allocate_cow_range_error(ip, error, | ||
4070 | _RET_IP_); | ||
4071 | - break; | ||
4072 | + return error; | ||
4073 | } | ||
4074 | } | ||
4075 | |||
4076 | - return error; | ||
4077 | + /* Convert the CoW extents to regular. */ | ||
4078 | + return xfs_reflink_convert_cow(ip, offset, count); | ||
4079 | } | ||
4080 | |||
4081 | /* | ||
4082 | @@ -481,14 +571,18 @@ xfs_reflink_trim_irec_to_next_cow( | ||
4083 | } | ||
4084 | |||
4085 | /* | ||
4086 | - * Cancel all pending CoW reservations for some block range of an inode. | ||
4087 | + * Cancel CoW reservations for some block range of an inode. | ||
4088 | + * | ||
4089 | + * If cancel_real is true this function cancels all COW fork extents for the | ||
4090 | + * inode; if cancel_real is false, real extents are not cleared. | ||
4091 | */ | ||
4092 | int | ||
4093 | xfs_reflink_cancel_cow_blocks( | ||
4094 | struct xfs_inode *ip, | ||
4095 | struct xfs_trans **tpp, | ||
4096 | xfs_fileoff_t offset_fsb, | ||
4097 | - xfs_fileoff_t end_fsb) | ||
4098 | + xfs_fileoff_t end_fsb, | ||
4099 | + bool cancel_real) | ||
4100 | { | ||
4101 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); | ||
4102 | struct xfs_bmbt_irec got, prev, del; | ||
4103 | @@ -515,7 +609,7 @@ xfs_reflink_cancel_cow_blocks( | ||
4104 | &idx, &got, &del); | ||
4105 | if (error) | ||
4106 | break; | ||
4107 | - } else { | ||
4108 | + } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { | ||
4109 | xfs_trans_ijoin(*tpp, ip, 0); | ||
4110 | xfs_defer_init(&dfops, &firstfsb); | ||
4111 | |||
4112 | @@ -558,13 +652,17 @@ xfs_reflink_cancel_cow_blocks( | ||
4113 | } | ||
4114 | |||
4115 | /* | ||
4116 | - * Cancel all pending CoW reservations for some byte range of an inode. | ||
4117 | + * Cancel CoW reservations for some byte range of an inode. | ||
4118 | + * | ||
4119 | + * If cancel_real is true this function cancels all COW fork extents for the | ||
4120 | + * inode; if cancel_real is false, real extents are not cleared. | ||
4121 | */ | ||
4122 | int | ||
4123 | xfs_reflink_cancel_cow_range( | ||
4124 | struct xfs_inode *ip, | ||
4125 | xfs_off_t offset, | ||
4126 | - xfs_off_t count) | ||
4127 | + xfs_off_t count, | ||
4128 | + bool cancel_real) | ||
4129 | { | ||
4130 | struct xfs_trans *tp; | ||
4131 | xfs_fileoff_t offset_fsb; | ||
4132 | @@ -590,7 +688,8 @@ xfs_reflink_cancel_cow_range( | ||
4133 | xfs_trans_ijoin(tp, ip, 0); | ||
4134 | |||
4135 | /* Scrape out the old CoW reservations */ | ||
4136 | - error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb); | ||
4137 | + error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb, | ||
4138 | + cancel_real); | ||
4139 | if (error) | ||
4140 | goto out_cancel; | ||
4141 | |||
4142 | @@ -669,6 +768,16 @@ xfs_reflink_end_cow( | ||
4143 | |||
4144 | ASSERT(!isnullstartblock(got.br_startblock)); | ||
4145 | |||
4146 | + /* | ||
4147 | + * Don't remap unwritten extents; these are | ||
4148 | + * speculatively preallocated CoW extents that have been | ||
4149 | + * allocated but have not yet been involved in a write. | ||
4150 | + */ | ||
4151 | + if (got.br_state == XFS_EXT_UNWRITTEN) { | ||
4152 | + idx--; | ||
4153 | + goto next_extent; | ||
4154 | + } | ||
4155 | + | ||
4156 | /* Unmap the old blocks in the data fork. */ | ||
4157 | xfs_defer_init(&dfops, &firstfsb); | ||
4158 | rlen = del.br_blockcount; | ||
4159 | @@ -885,13 +994,14 @@ STATIC int | ||
4160 | xfs_reflink_update_dest( | ||
4161 | struct xfs_inode *dest, | ||
4162 | xfs_off_t newlen, | ||
4163 | - xfs_extlen_t cowextsize) | ||
4164 | + xfs_extlen_t cowextsize, | ||
4165 | + bool is_dedupe) | ||
4166 | { | ||
4167 | struct xfs_mount *mp = dest->i_mount; | ||
4168 | struct xfs_trans *tp; | ||
4169 | int error; | ||
4170 | |||
4171 | - if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) | ||
4172 | + if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) | ||
4173 | return 0; | ||
4174 | |||
4175 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); | ||
4176 | @@ -912,6 +1022,10 @@ xfs_reflink_update_dest( | ||
4177 | dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; | ||
4178 | } | ||
4179 | |||
4180 | + if (!is_dedupe) { | ||
4181 | + xfs_trans_ichgtime(tp, dest, | ||
4182 | + XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
4183 | + } | ||
4184 | xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); | ||
4185 | |||
4186 | error = xfs_trans_commit(tp); | ||
4187 | @@ -1428,7 +1542,8 @@ xfs_reflink_remap_range( | ||
4188 | !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) | ||
4189 | cowextsize = src->i_d.di_cowextsize; | ||
4190 | |||
4191 | - ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize); | ||
4192 | + ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, | ||
4193 | + is_dedupe); | ||
4194 | |||
4195 | out_unlock: | ||
4196 | xfs_iunlock(src, XFS_MMAPLOCK_EXCL); | ||
4197 | @@ -1580,7 +1695,7 @@ xfs_reflink_clear_inode_flag( | ||
4198 | * We didn't find any shared blocks so turn off the reflink flag. | ||
4199 | * First, get rid of any leftover CoW mappings. | ||
4200 | */ | ||
4201 | - error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF); | ||
4202 | + error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true); | ||
4203 | if (error) | ||
4204 | return error; | ||
4205 | |||
4206 | diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h | ||
4207 | index 97ea9b487884..a57966fc7ddd 100644 | ||
4208 | --- a/fs/xfs/xfs_reflink.h | ||
4209 | +++ b/fs/xfs/xfs_reflink.h | ||
4210 | @@ -30,6 +30,8 @@ extern int xfs_reflink_reserve_cow(struct xfs_inode *ip, | ||
4211 | struct xfs_bmbt_irec *imap, bool *shared); | ||
4212 | extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, | ||
4213 | xfs_off_t offset, xfs_off_t count); | ||
4214 | +extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset, | ||
4215 | + xfs_off_t count); | ||
4216 | extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, | ||
4217 | struct xfs_bmbt_irec *imap, bool *need_alloc); | ||
4218 | extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, | ||
4219 | @@ -37,9 +39,9 @@ extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, | ||
4220 | |||
4221 | extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip, | ||
4222 | struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, | ||
4223 | - xfs_fileoff_t end_fsb); | ||
4224 | + xfs_fileoff_t end_fsb, bool cancel_real); | ||
4225 | extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, | ||
4226 | - xfs_off_t count); | ||
4227 | + xfs_off_t count, bool cancel_real); | ||
4228 | extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, | ||
4229 | xfs_off_t count); | ||
4230 | extern int xfs_reflink_recover_cow(struct xfs_mount *mp); | ||
4231 | diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c | ||
4232 | index ade4691e3f74..dbbd3f1fd2b7 100644 | ||
4233 | --- a/fs/xfs/xfs_super.c | ||
4234 | +++ b/fs/xfs/xfs_super.c | ||
4235 | @@ -948,7 +948,7 @@ xfs_fs_destroy_inode( | ||
4236 | XFS_STATS_INC(ip->i_mount, vn_remove); | ||
4237 | |||
4238 | if (xfs_is_reflink_inode(ip)) { | ||
4239 | - error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); | ||
4240 | + error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); | ||
4241 | if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) | ||
4242 | xfs_warn(ip->i_mount, | ||
4243 | "Error %d while evicting CoW blocks for inode %llu.", | ||
4244 | diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h | ||
4245 | index 0907752be62d..828f383df121 100644 | ||
4246 | --- a/fs/xfs/xfs_trace.h | ||
4247 | +++ b/fs/xfs/xfs_trace.h | ||
4248 | @@ -3183,6 +3183,7 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class, | ||
4249 | __field(xfs_fileoff_t, lblk) | ||
4250 | __field(xfs_extlen_t, len) | ||
4251 | __field(xfs_fsblock_t, pblk) | ||
4252 | + __field(int, state) | ||
4253 | ), | ||
4254 | TP_fast_assign( | ||
4255 | __entry->dev = VFS_I(ip)->i_sb->s_dev; | ||
4256 | @@ -3190,13 +3191,15 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class, | ||
4257 | __entry->lblk = irec->br_startoff; | ||
4258 | __entry->len = irec->br_blockcount; | ||
4259 | __entry->pblk = irec->br_startblock; | ||
4260 | + __entry->state = irec->br_state; | ||
4261 | ), | ||
4262 | - TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu", | ||
4263 | + TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu st %d", | ||
4264 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
4265 | __entry->ino, | ||
4266 | __entry->lblk, | ||
4267 | __entry->len, | ||
4268 | - __entry->pblk) | ||
4269 | + __entry->pblk, | ||
4270 | + __entry->state) | ||
4271 | ); | ||
4272 | #define DEFINE_INODE_IREC_EVENT(name) \ | ||
4273 | DEFINE_EVENT(xfs_inode_irec_class, name, \ | ||
4274 | @@ -3345,11 +3348,12 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared); | ||
4275 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc); | ||
4276 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); | ||
4277 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); | ||
4278 | +DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow); | ||
4279 | |||
4280 | DEFINE_RW_EVENT(xfs_reflink_reserve_cow); | ||
4281 | DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range); | ||
4282 | |||
4283 | -DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write); | ||
4284 | +DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write); | ||
4285 | DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping); | ||
4286 | DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec); | ||
4287 | |||
4288 | diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h | ||
4289 | index 01c0b9cc3915..8c58db2c09c6 100644 | ||
4290 | --- a/include/linux/kvm_host.h | ||
4291 | +++ b/include/linux/kvm_host.h | ||
4292 | @@ -162,8 +162,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, | ||
4293 | int len, void *val); | ||
4294 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | ||
4295 | int len, struct kvm_io_device *dev); | ||
4296 | -int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | ||
4297 | - struct kvm_io_device *dev); | ||
4298 | +void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | ||
4299 | + struct kvm_io_device *dev); | ||
4300 | struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, | ||
4301 | gpa_t addr); | ||
4302 | |||
4303 | diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h | ||
4304 | index 254698856b8f..8b35bdbdc214 100644 | ||
4305 | --- a/include/linux/memcontrol.h | ||
4306 | +++ b/include/linux/memcontrol.h | ||
4307 | @@ -739,6 +739,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) | ||
4308 | return false; | ||
4309 | } | ||
4310 | |||
4311 | +static inline void mem_cgroup_update_page_stat(struct page *page, | ||
4312 | + enum mem_cgroup_stat_index idx, | ||
4313 | + int nr) | ||
4314 | +{ | ||
4315 | +} | ||
4316 | + | ||
4317 | static inline void mem_cgroup_inc_page_stat(struct page *page, | ||
4318 | enum mem_cgroup_stat_index idx) | ||
4319 | { | ||
4320 | diff --git a/kernel/padata.c b/kernel/padata.c | ||
4321 | index 7848f0566403..b4a3c0ae649b 100644 | ||
4322 | --- a/kernel/padata.c | ||
4323 | +++ b/kernel/padata.c | ||
4324 | @@ -190,19 +190,20 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) | ||
4325 | |||
4326 | reorder = &next_queue->reorder; | ||
4327 | |||
4328 | + spin_lock(&reorder->lock); | ||
4329 | if (!list_empty(&reorder->list)) { | ||
4330 | padata = list_entry(reorder->list.next, | ||
4331 | struct padata_priv, list); | ||
4332 | |||
4333 | - spin_lock(&reorder->lock); | ||
4334 | list_del_init(&padata->list); | ||
4335 | atomic_dec(&pd->reorder_objects); | ||
4336 | - spin_unlock(&reorder->lock); | ||
4337 | |||
4338 | pd->processed++; | ||
4339 | |||
4340 | + spin_unlock(&reorder->lock); | ||
4341 | goto out; | ||
4342 | } | ||
4343 | + spin_unlock(&reorder->lock); | ||
4344 | |||
4345 | if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) { | ||
4346 | padata = ERR_PTR(-ENODATA); | ||
4347 | diff --git a/lib/syscall.c b/lib/syscall.c | ||
4348 | index 63239e097b13..a72cd0996230 100644 | ||
4349 | --- a/lib/syscall.c | ||
4350 | +++ b/lib/syscall.c | ||
4351 | @@ -11,6 +11,7 @@ static int collect_syscall(struct task_struct *target, long *callno, | ||
4352 | |||
4353 | if (!try_get_task_stack(target)) { | ||
4354 | /* Task has no stack, so the task isn't in a syscall. */ | ||
4355 | + *sp = *pc = 0; | ||
4356 | *callno = -1; | ||
4357 | return 0; | ||
4358 | } | ||
4359 | diff --git a/mm/hugetlb.c b/mm/hugetlb.c | ||
4360 | index b6adedbafaf5..65c36acf8a6b 100644 | ||
4361 | --- a/mm/hugetlb.c | ||
4362 | +++ b/mm/hugetlb.c | ||
4363 | @@ -4471,6 +4471,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, | ||
4364 | { | ||
4365 | struct page *page = NULL; | ||
4366 | spinlock_t *ptl; | ||
4367 | + pte_t pte; | ||
4368 | retry: | ||
4369 | ptl = pmd_lockptr(mm, pmd); | ||
4370 | spin_lock(ptl); | ||
4371 | @@ -4480,12 +4481,13 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, | ||
4372 | */ | ||
4373 | if (!pmd_huge(*pmd)) | ||
4374 | goto out; | ||
4375 | - if (pmd_present(*pmd)) { | ||
4376 | + pte = huge_ptep_get((pte_t *)pmd); | ||
4377 | + if (pte_present(pte)) { | ||
4378 | page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT); | ||
4379 | if (flags & FOLL_GET) | ||
4380 | get_page(page); | ||
4381 | } else { | ||
4382 | - if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) { | ||
4383 | + if (is_hugetlb_entry_migration(pte)) { | ||
4384 | spin_unlock(ptl); | ||
4385 | __migration_entry_wait(mm, (pte_t *)pmd, ptl); | ||
4386 | goto retry; | ||
4387 | diff --git a/mm/rmap.c b/mm/rmap.c | ||
4388 | index 1ef36404e7b2..cd37c1c7e21b 100644 | ||
4389 | --- a/mm/rmap.c | ||
4390 | +++ b/mm/rmap.c | ||
4391 | @@ -1295,7 +1295,7 @@ void page_add_file_rmap(struct page *page, bool compound) | ||
4392 | goto out; | ||
4393 | } | ||
4394 | __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr); | ||
4395 | - mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); | ||
4396 | + mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, nr); | ||
4397 | out: | ||
4398 | unlock_page_memcg(page); | ||
4399 | } | ||
4400 | @@ -1335,7 +1335,7 @@ static void page_remove_file_rmap(struct page *page, bool compound) | ||
4401 | * pte lock(a spinlock) is held, which implies preemption disabled. | ||
4402 | */ | ||
4403 | __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr); | ||
4404 | - mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); | ||
4405 | + mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, -nr); | ||
4406 | |||
4407 | if (unlikely(PageMlocked(page))) | ||
4408 | clear_page_mlock(page); | ||
4409 | diff --git a/mm/workingset.c b/mm/workingset.c | ||
4410 | index 33f6f4db32fd..4c4f05655e6e 100644 | ||
4411 | --- a/mm/workingset.c | ||
4412 | +++ b/mm/workingset.c | ||
4413 | @@ -492,7 +492,7 @@ static int __init workingset_init(void) | ||
4414 | pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", | ||
4415 | timestamp_bits, max_order, bucket_order); | ||
4416 | |||
4417 | - ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key); | ||
4418 | + ret = __list_lru_init(&workingset_shadow_nodes, true, &shadow_nodes_key); | ||
4419 | if (ret) | ||
4420 | goto err; | ||
4421 | ret = register_shrinker(&workingset_shadow_shrinker); | ||
4422 | diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c | ||
4423 | index 2efb335deada..25a30be862e9 100644 | ||
4424 | --- a/net/ceph/messenger.c | ||
4425 | +++ b/net/ceph/messenger.c | ||
4426 | @@ -7,6 +7,7 @@ | ||
4427 | #include <linux/kthread.h> | ||
4428 | #include <linux/net.h> | ||
4429 | #include <linux/nsproxy.h> | ||
4430 | +#include <linux/sched.h> | ||
4431 | #include <linux/slab.h> | ||
4432 | #include <linux/socket.h> | ||
4433 | #include <linux/string.h> | ||
4434 | @@ -469,11 +470,16 @@ static int ceph_tcp_connect(struct ceph_connection *con) | ||
4435 | { | ||
4436 | struct sockaddr_storage *paddr = &con->peer_addr.in_addr; | ||
4437 | struct socket *sock; | ||
4438 | + unsigned int noio_flag; | ||
4439 | int ret; | ||
4440 | |||
4441 | BUG_ON(con->sock); | ||
4442 | + | ||
4443 | + /* sock_create_kern() allocates with GFP_KERNEL */ | ||
4444 | + noio_flag = memalloc_noio_save(); | ||
4445 | ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, | ||
4446 | SOCK_STREAM, IPPROTO_TCP, &sock); | ||
4447 | + memalloc_noio_restore(noio_flag); | ||
4448 | if (ret) | ||
4449 | return ret; | ||
4450 | sock->sk->sk_allocation = GFP_NOFS; | ||
4451 | diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c | ||
4452 | index 3f4efcb85df5..3490d21ab9e7 100644 | ||
4453 | --- a/sound/core/seq/seq_fifo.c | ||
4454 | +++ b/sound/core/seq/seq_fifo.c | ||
4455 | @@ -265,6 +265,10 @@ int snd_seq_fifo_resize(struct snd_seq_fifo *f, int poolsize) | ||
4456 | /* NOTE: overflow flag is not cleared */ | ||
4457 | spin_unlock_irqrestore(&f->lock, flags); | ||
4458 | |||
4459 | + /* close the old pool and wait until all users are gone */ | ||
4460 | + snd_seq_pool_mark_closing(oldpool); | ||
4461 | + snd_use_lock_sync(&f->use_lock); | ||
4462 | + | ||
4463 | /* release cells in old pool */ | ||
4464 | for (cell = oldhead; cell; cell = next) { | ||
4465 | next = cell->next; | ||
4466 | diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c | ||
4467 | index 112caa2d3c14..bb1aad39d987 100644 | ||
4468 | --- a/sound/pci/hda/patch_realtek.c | ||
4469 | +++ b/sound/pci/hda/patch_realtek.c | ||
4470 | @@ -4846,6 +4846,7 @@ enum { | ||
4471 | ALC292_FIXUP_DISABLE_AAMIX, | ||
4472 | ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK, | ||
4473 | ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, | ||
4474 | + ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE, | ||
4475 | ALC275_FIXUP_DELL_XPS, | ||
4476 | ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE, | ||
4477 | ALC293_FIXUP_LENOVO_SPK_NOISE, | ||
4478 | @@ -5446,6 +5447,15 @@ static const struct hda_fixup alc269_fixups[] = { | ||
4479 | .chained = true, | ||
4480 | .chain_id = ALC269_FIXUP_HEADSET_MODE | ||
4481 | }, | ||
4482 | + [ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE] = { | ||
4483 | + .type = HDA_FIXUP_PINS, | ||
4484 | + .v.pins = (const struct hda_pintbl[]) { | ||
4485 | + { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */ | ||
4486 | + { } | ||
4487 | + }, | ||
4488 | + .chained = true, | ||
4489 | + .chain_id = ALC269_FIXUP_HEADSET_MODE | ||
4490 | + }, | ||
4491 | [ALC275_FIXUP_DELL_XPS] = { | ||
4492 | .type = HDA_FIXUP_VERBS, | ||
4493 | .v.verbs = (const struct hda_verb[]) { | ||
4494 | @@ -5518,7 +5528,7 @@ static const struct hda_fixup alc269_fixups[] = { | ||
4495 | .type = HDA_FIXUP_FUNC, | ||
4496 | .v.func = alc298_fixup_speaker_volume, | ||
4497 | .chained = true, | ||
4498 | - .chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, | ||
4499 | + .chain_id = ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE, | ||
4500 | }, | ||
4501 | [ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = { | ||
4502 | .type = HDA_FIXUP_PINS, | ||
4503 | diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c | ||
4504 | index 89ac5f5a93eb..7ae46c2647d4 100644 | ||
4505 | --- a/sound/soc/atmel/atmel-classd.c | ||
4506 | +++ b/sound/soc/atmel/atmel-classd.c | ||
4507 | @@ -349,7 +349,7 @@ static int atmel_classd_codec_dai_digital_mute(struct snd_soc_dai *codec_dai, | ||
4508 | } | ||
4509 | |||
4510 | #define CLASSD_ACLK_RATE_11M2896_MPY_8 (112896 * 100 * 8) | ||
4511 | -#define CLASSD_ACLK_RATE_12M288_MPY_8 (12228 * 1000 * 8) | ||
4512 | +#define CLASSD_ACLK_RATE_12M288_MPY_8 (12288 * 1000 * 8) | ||
4513 | |||
4514 | static struct { | ||
4515 | int rate; | ||
4516 | diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c | ||
4517 | index b5b1934d8550..bef8a4546c12 100644 | ||
4518 | --- a/sound/soc/intel/skylake/skl-topology.c | ||
4519 | +++ b/sound/soc/intel/skylake/skl-topology.c | ||
4520 | @@ -448,7 +448,7 @@ static int skl_tplg_set_module_init_data(struct snd_soc_dapm_widget *w) | ||
4521 | if (bc->set_params != SKL_PARAM_INIT) | ||
4522 | continue; | ||
4523 | |||
4524 | - mconfig->formats_config.caps = (u32 *)&bc->params; | ||
4525 | + mconfig->formats_config.caps = (u32 *)bc->params; | ||
4526 | mconfig->formats_config.caps_size = bc->size; | ||
4527 | |||
4528 | break; | ||
4529 | diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c | ||
4530 | index a29786dd9522..4d28a9ddbee0 100644 | ||
4531 | --- a/virt/kvm/eventfd.c | ||
4532 | +++ b/virt/kvm/eventfd.c | ||
4533 | @@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, | ||
4534 | continue; | ||
4535 | |||
4536 | kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); | ||
4537 | - kvm->buses[bus_idx]->ioeventfd_count--; | ||
4538 | + if (kvm->buses[bus_idx]) | ||
4539 | + kvm->buses[bus_idx]->ioeventfd_count--; | ||
4540 | ioeventfd_release(p); | ||
4541 | ret = 0; | ||
4542 | break; | ||
4543 | diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c | ||
4544 | index 7f9ee2929cfe..f4c6d4f6d2e8 100644 | ||
4545 | --- a/virt/kvm/kvm_main.c | ||
4546 | +++ b/virt/kvm/kvm_main.c | ||
4547 | @@ -720,8 +720,11 @@ static void kvm_destroy_vm(struct kvm *kvm) | ||
4548 | list_del(&kvm->vm_list); | ||
4549 | spin_unlock(&kvm_lock); | ||
4550 | kvm_free_irq_routing(kvm); | ||
4551 | - for (i = 0; i < KVM_NR_BUSES; i++) | ||
4552 | - kvm_io_bus_destroy(kvm->buses[i]); | ||
4553 | + for (i = 0; i < KVM_NR_BUSES; i++) { | ||
4554 | + if (kvm->buses[i]) | ||
4555 | + kvm_io_bus_destroy(kvm->buses[i]); | ||
4556 | + kvm->buses[i] = NULL; | ||
4557 | + } | ||
4558 | kvm_coalesced_mmio_free(kvm); | ||
4559 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | ||
4560 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); | ||
4561 | @@ -3463,6 +3466,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, | ||
4562 | }; | ||
4563 | |||
4564 | bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); | ||
4565 | + if (!bus) | ||
4566 | + return -ENOMEM; | ||
4567 | r = __kvm_io_bus_write(vcpu, bus, &range, val); | ||
4568 | return r < 0 ? r : 0; | ||
4569 | } | ||
4570 | @@ -3480,6 +3485,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, | ||
4571 | }; | ||
4572 | |||
4573 | bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); | ||
4574 | + if (!bus) | ||
4575 | + return -ENOMEM; | ||
4576 | |||
4577 | /* First try the device referenced by cookie. */ | ||
4578 | if ((cookie >= 0) && (cookie < bus->dev_count) && | ||
4579 | @@ -3530,6 +3537,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, | ||
4580 | }; | ||
4581 | |||
4582 | bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); | ||
4583 | + if (!bus) | ||
4584 | + return -ENOMEM; | ||
4585 | r = __kvm_io_bus_read(vcpu, bus, &range, val); | ||
4586 | return r < 0 ? r : 0; | ||
4587 | } | ||
4588 | @@ -3542,6 +3551,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | ||
4589 | struct kvm_io_bus *new_bus, *bus; | ||
4590 | |||
4591 | bus = kvm->buses[bus_idx]; | ||
4592 | + if (!bus) | ||
4593 | + return -ENOMEM; | ||
4594 | + | ||
4595 | /* exclude ioeventfd which is limited by maximum fd */ | ||
4596 | if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) | ||
4597 | return -ENOSPC; | ||
4598 | @@ -3561,37 +3573,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | ||
4599 | } | ||
4600 | |||
4601 | /* Caller must hold slots_lock. */ | ||
4602 | -int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | ||
4603 | - struct kvm_io_device *dev) | ||
4604 | +void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | ||
4605 | + struct kvm_io_device *dev) | ||
4606 | { | ||
4607 | - int i, r; | ||
4608 | + int i; | ||
4609 | struct kvm_io_bus *new_bus, *bus; | ||
4610 | |||
4611 | bus = kvm->buses[bus_idx]; | ||
4612 | - r = -ENOENT; | ||
4613 | + if (!bus) | ||
4614 | + return; | ||
4615 | + | ||
4616 | for (i = 0; i < bus->dev_count; i++) | ||
4617 | if (bus->range[i].dev == dev) { | ||
4618 | - r = 0; | ||
4619 | break; | ||
4620 | } | ||
4621 | |||
4622 | - if (r) | ||
4623 | - return r; | ||
4624 | + if (i == bus->dev_count) | ||
4625 | + return; | ||
4626 | |||
4627 | new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * | ||
4628 | sizeof(struct kvm_io_range)), GFP_KERNEL); | ||
4629 | - if (!new_bus) | ||
4630 | - return -ENOMEM; | ||
4631 | + if (!new_bus) { | ||
4632 | + pr_err("kvm: failed to shrink bus, removing it completely\n"); | ||
4633 | + goto broken; | ||
4634 | + } | ||
4635 | |||
4636 | memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); | ||
4637 | new_bus->dev_count--; | ||
4638 | memcpy(new_bus->range + i, bus->range + i + 1, | ||
4639 | (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); | ||
4640 | |||
4641 | +broken: | ||
4642 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); | ||
4643 | synchronize_srcu_expedited(&kvm->srcu); | ||
4644 | kfree(bus); | ||
4645 | - return r; | ||
4646 | + return; | ||
4647 | } | ||
4648 | |||
4649 | struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, | ||
4650 | @@ -3604,6 +3620,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, | ||
4651 | srcu_idx = srcu_read_lock(&kvm->srcu); | ||
4652 | |||
4653 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | ||
4654 | + if (!bus) | ||
4655 | + goto out_unlock; | ||
4656 | |||
4657 | dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1); | ||
4658 | if (dev_idx < 0) |