Contents of /trunk/kernel-alx-legacy/patches-4.9/0120-4.9.21-all-fixes.patch
Parent Directory | Revision Log
Revision 3608 -
(show annotations)
(download)
Fri Aug 14 07:34:29 2020 UTC (4 years, 1 month ago) by niro
File size: 148261 byte(s)
Fri Aug 14 07:34:29 2020 UTC (4 years, 1 month ago) by niro
File size: 148261 byte(s)
-added kerenl-alx-legacy pkg
1 | diff --git a/Makefile b/Makefile |
2 | index 44960184701a..1523557bd61f 100644 |
3 | --- a/Makefile |
4 | +++ b/Makefile |
5 | @@ -1,6 +1,6 @@ |
6 | VERSION = 4 |
7 | PATCHLEVEL = 9 |
8 | -SUBLEVEL = 20 |
9 | +SUBLEVEL = 21 |
10 | EXTRAVERSION = |
11 | NAME = Roaring Lionus |
12 | |
13 | diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi |
14 | index ae4b3880616d..4616452ce74d 100644 |
15 | --- a/arch/arm/boot/dts/bcm5301x.dtsi |
16 | +++ b/arch/arm/boot/dts/bcm5301x.dtsi |
17 | @@ -66,14 +66,14 @@ |
18 | timer@20200 { |
19 | compatible = "arm,cortex-a9-global-timer"; |
20 | reg = <0x20200 0x100>; |
21 | - interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>; |
22 | + interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>; |
23 | clocks = <&periph_clk>; |
24 | }; |
25 | |
26 | local-timer@20600 { |
27 | compatible = "arm,cortex-a9-twd-timer"; |
28 | reg = <0x20600 0x100>; |
29 | - interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>; |
30 | + interrupts = <GIC_PPI 13 IRQ_TYPE_EDGE_RISING>; |
31 | clocks = <&periph_clk>; |
32 | }; |
33 | |
34 | diff --git a/arch/arm/mach-bcm/bcm_5301x.c b/arch/arm/mach-bcm/bcm_5301x.c |
35 | index c8830a2b0d60..fe067f6cebb6 100644 |
36 | --- a/arch/arm/mach-bcm/bcm_5301x.c |
37 | +++ b/arch/arm/mach-bcm/bcm_5301x.c |
38 | @@ -9,14 +9,42 @@ |
39 | #include <asm/hardware/cache-l2x0.h> |
40 | |
41 | #include <asm/mach/arch.h> |
42 | +#include <asm/siginfo.h> |
43 | +#include <asm/signal.h> |
44 | + |
45 | +#define FSR_EXTERNAL (1 << 12) |
46 | +#define FSR_READ (0 << 10) |
47 | +#define FSR_IMPRECISE 0x0406 |
48 | |
49 | static const char *const bcm5301x_dt_compat[] __initconst = { |
50 | "brcm,bcm4708", |
51 | NULL, |
52 | }; |
53 | |
54 | +static int bcm5301x_abort_handler(unsigned long addr, unsigned int fsr, |
55 | + struct pt_regs *regs) |
56 | +{ |
57 | + /* |
58 | + * We want to ignore aborts forwarded from the PCIe bus that are |
59 | + * expected and shouldn't really be passed by the PCIe controller. |
60 | + * The biggest disadvantage is the same FSR code may be reported when |
61 | + * reading non-existing APB register and we shouldn't ignore that. |
62 | + */ |
63 | + if (fsr == (FSR_EXTERNAL | FSR_READ | FSR_IMPRECISE)) |
64 | + return 0; |
65 | + |
66 | + return 1; |
67 | +} |
68 | + |
69 | +static void __init bcm5301x_init_early(void) |
70 | +{ |
71 | + hook_fault_code(16 + 6, bcm5301x_abort_handler, SIGBUS, BUS_OBJERR, |
72 | + "imprecise external abort"); |
73 | +} |
74 | + |
75 | DT_MACHINE_START(BCM5301X, "BCM5301X") |
76 | .l2c_aux_val = 0, |
77 | .l2c_aux_mask = ~0, |
78 | .dt_compat = bcm5301x_dt_compat, |
79 | + .init_early = bcm5301x_init_early, |
80 | MACHINE_END |
81 | diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c |
82 | index 8ac0e5994ed2..0ddf3698b85d 100644 |
83 | --- a/arch/mips/lantiq/irq.c |
84 | +++ b/arch/mips/lantiq/irq.c |
85 | @@ -269,6 +269,11 @@ static void ltq_hw5_irqdispatch(void) |
86 | DEFINE_HWx_IRQDISPATCH(5) |
87 | #endif |
88 | |
89 | +static void ltq_hw_irq_handler(struct irq_desc *desc) |
90 | +{ |
91 | + ltq_hw_irqdispatch(irq_desc_get_irq(desc) - 2); |
92 | +} |
93 | + |
94 | #ifdef CONFIG_MIPS_MT_SMP |
95 | void __init arch_init_ipiirq(int irq, struct irqaction *action) |
96 | { |
97 | @@ -313,23 +318,19 @@ static struct irqaction irq_call = { |
98 | asmlinkage void plat_irq_dispatch(void) |
99 | { |
100 | unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM; |
101 | - unsigned int i; |
102 | - |
103 | - if ((MIPS_CPU_TIMER_IRQ == 7) && (pending & CAUSEF_IP7)) { |
104 | - do_IRQ(MIPS_CPU_TIMER_IRQ); |
105 | - goto out; |
106 | - } else { |
107 | - for (i = 0; i < MAX_IM; i++) { |
108 | - if (pending & (CAUSEF_IP2 << i)) { |
109 | - ltq_hw_irqdispatch(i); |
110 | - goto out; |
111 | - } |
112 | - } |
113 | + int irq; |
114 | + |
115 | + if (!pending) { |
116 | + spurious_interrupt(); |
117 | + return; |
118 | } |
119 | - pr_alert("Spurious IRQ: CAUSE=0x%08x\n", read_c0_status()); |
120 | |
121 | -out: |
122 | - return; |
123 | + pending >>= CAUSEB_IP; |
124 | + while (pending) { |
125 | + irq = fls(pending) - 1; |
126 | + do_IRQ(MIPS_CPU_IRQ_BASE + irq); |
127 | + pending &= ~BIT(irq); |
128 | + } |
129 | } |
130 | |
131 | static int icu_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) |
132 | @@ -354,11 +355,6 @@ static const struct irq_domain_ops irq_domain_ops = { |
133 | .map = icu_map, |
134 | }; |
135 | |
136 | -static struct irqaction cascade = { |
137 | - .handler = no_action, |
138 | - .name = "cascade", |
139 | -}; |
140 | - |
141 | int __init icu_of_init(struct device_node *node, struct device_node *parent) |
142 | { |
143 | struct device_node *eiu_node; |
144 | @@ -390,7 +386,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent) |
145 | mips_cpu_irq_init(); |
146 | |
147 | for (i = 0; i < MAX_IM; i++) |
148 | - setup_irq(i + 2, &cascade); |
149 | + irq_set_chained_handler(i + 2, ltq_hw_irq_handler); |
150 | |
151 | if (cpu_has_vint) { |
152 | pr_info("Setting up vectored interrupts\n"); |
153 | diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h |
154 | index 9a2aee1b90fc..7fcf5128996a 100644 |
155 | --- a/arch/parisc/include/asm/uaccess.h |
156 | +++ b/arch/parisc/include/asm/uaccess.h |
157 | @@ -68,6 +68,15 @@ struct exception_table_entry { |
158 | ".previous\n" |
159 | |
160 | /* |
161 | + * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry |
162 | + * (with lowest bit set) for which the fault handler in fixup_exception() will |
163 | + * load -EFAULT into %r8 for a read or write fault, and zeroes the target |
164 | + * register in case of a read fault in get_user(). |
165 | + */ |
166 | +#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\ |
167 | + ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1) |
168 | + |
169 | +/* |
170 | * The page fault handler stores, in a per-cpu area, the following information |
171 | * if a fixup routine is available. |
172 | */ |
173 | @@ -94,7 +103,7 @@ struct exception_data { |
174 | #define __get_user(x, ptr) \ |
175 | ({ \ |
176 | register long __gu_err __asm__ ("r8") = 0; \ |
177 | - register long __gu_val __asm__ ("r9") = 0; \ |
178 | + register long __gu_val; \ |
179 | \ |
180 | load_sr2(); \ |
181 | switch (sizeof(*(ptr))) { \ |
182 | @@ -110,22 +119,23 @@ struct exception_data { |
183 | }) |
184 | |
185 | #define __get_user_asm(ldx, ptr) \ |
186 | - __asm__("\n1:\t" ldx "\t0(%%sr2,%2),%0\n\t" \ |
187 | - ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_1)\ |
188 | + __asm__("1: " ldx " 0(%%sr2,%2),%0\n" \ |
189 | + "9:\n" \ |
190 | + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ |
191 | : "=r"(__gu_val), "=r"(__gu_err) \ |
192 | - : "r"(ptr), "1"(__gu_err) \ |
193 | - : "r1"); |
194 | + : "r"(ptr), "1"(__gu_err)); |
195 | |
196 | #if !defined(CONFIG_64BIT) |
197 | |
198 | #define __get_user_asm64(ptr) \ |
199 | - __asm__("\n1:\tldw 0(%%sr2,%2),%0" \ |
200 | - "\n2:\tldw 4(%%sr2,%2),%R0\n\t" \ |
201 | - ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_2)\ |
202 | - ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_get_user_skip_1)\ |
203 | + __asm__(" copy %%r0,%R0\n" \ |
204 | + "1: ldw 0(%%sr2,%2),%0\n" \ |
205 | + "2: ldw 4(%%sr2,%2),%R0\n" \ |
206 | + "9:\n" \ |
207 | + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ |
208 | + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ |
209 | : "=r"(__gu_val), "=r"(__gu_err) \ |
210 | - : "r"(ptr), "1"(__gu_err) \ |
211 | - : "r1"); |
212 | + : "r"(ptr), "1"(__gu_err)); |
213 | |
214 | #endif /* !defined(CONFIG_64BIT) */ |
215 | |
216 | @@ -151,32 +161,31 @@ struct exception_data { |
217 | * The "__put_user/kernel_asm()" macros tell gcc they read from memory |
218 | * instead of writing. This is because they do not write to any memory |
219 | * gcc knows about, so there are no aliasing issues. These macros must |
220 | - * also be aware that "fixup_put_user_skip_[12]" are executed in the |
221 | - * context of the fault, and any registers used there must be listed |
222 | - * as clobbers. In this case only "r1" is used by the current routines. |
223 | - * r8/r9 are already listed as err/val. |
224 | + * also be aware that fixups are executed in the context of the fault, |
225 | + * and any registers used there must be listed as clobbers. |
226 | + * r8 is already listed as err. |
227 | */ |
228 | |
229 | #define __put_user_asm(stx, x, ptr) \ |
230 | __asm__ __volatile__ ( \ |
231 | - "\n1:\t" stx "\t%2,0(%%sr2,%1)\n\t" \ |
232 | - ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_1)\ |
233 | + "1: " stx " %2,0(%%sr2,%1)\n" \ |
234 | + "9:\n" \ |
235 | + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ |
236 | : "=r"(__pu_err) \ |
237 | - : "r"(ptr), "r"(x), "0"(__pu_err) \ |
238 | - : "r1") |
239 | + : "r"(ptr), "r"(x), "0"(__pu_err)) |
240 | |
241 | |
242 | #if !defined(CONFIG_64BIT) |
243 | |
244 | #define __put_user_asm64(__val, ptr) do { \ |
245 | __asm__ __volatile__ ( \ |
246 | - "\n1:\tstw %2,0(%%sr2,%1)" \ |
247 | - "\n2:\tstw %R2,4(%%sr2,%1)\n\t" \ |
248 | - ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_2)\ |
249 | - ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_put_user_skip_1)\ |
250 | + "1: stw %2,0(%%sr2,%1)\n" \ |
251 | + "2: stw %R2,4(%%sr2,%1)\n" \ |
252 | + "9:\n" \ |
253 | + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ |
254 | + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ |
255 | : "=r"(__pu_err) \ |
256 | - : "r"(ptr), "r"(__val), "0"(__pu_err) \ |
257 | - : "r1"); \ |
258 | + : "r"(ptr), "r"(__val), "0"(__pu_err)); \ |
259 | } while (0) |
260 | |
261 | #endif /* !defined(CONFIG_64BIT) */ |
262 | diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c |
263 | index 3cad8aadc69e..4e6f0d93154f 100644 |
264 | --- a/arch/parisc/kernel/parisc_ksyms.c |
265 | +++ b/arch/parisc/kernel/parisc_ksyms.c |
266 | @@ -47,16 +47,6 @@ EXPORT_SYMBOL(__cmpxchg_u64); |
267 | EXPORT_SYMBOL(lclear_user); |
268 | EXPORT_SYMBOL(lstrnlen_user); |
269 | |
270 | -/* Global fixups - defined as int to avoid creation of function pointers */ |
271 | -extern int fixup_get_user_skip_1; |
272 | -extern int fixup_get_user_skip_2; |
273 | -extern int fixup_put_user_skip_1; |
274 | -extern int fixup_put_user_skip_2; |
275 | -EXPORT_SYMBOL(fixup_get_user_skip_1); |
276 | -EXPORT_SYMBOL(fixup_get_user_skip_2); |
277 | -EXPORT_SYMBOL(fixup_put_user_skip_1); |
278 | -EXPORT_SYMBOL(fixup_put_user_skip_2); |
279 | - |
280 | #ifndef CONFIG_64BIT |
281 | /* Needed so insmod can set dp value */ |
282 | extern int $global$; |
283 | diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c |
284 | index e81afc378850..e7ffde2758fc 100644 |
285 | --- a/arch/parisc/kernel/process.c |
286 | +++ b/arch/parisc/kernel/process.c |
287 | @@ -140,6 +140,8 @@ void machine_power_off(void) |
288 | printk(KERN_EMERG "System shut down completed.\n" |
289 | "Please power this system off now."); |
290 | |
291 | + /* prevent soft lockup/stalled CPU messages for endless loop. */ |
292 | + rcu_sysrq_start(); |
293 | for (;;); |
294 | } |
295 | |
296 | diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile |
297 | index 8fa92b8d839a..f2dac4d73b1b 100644 |
298 | --- a/arch/parisc/lib/Makefile |
299 | +++ b/arch/parisc/lib/Makefile |
300 | @@ -2,7 +2,7 @@ |
301 | # Makefile for parisc-specific library files |
302 | # |
303 | |
304 | -lib-y := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o \ |
305 | +lib-y := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \ |
306 | ucmpdi2.o delay.o |
307 | |
308 | obj-y := iomap.o |
309 | diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S |
310 | deleted file mode 100644 |
311 | index a5b72f22c7a6..000000000000 |
312 | --- a/arch/parisc/lib/fixup.S |
313 | +++ /dev/null |
314 | @@ -1,98 +0,0 @@ |
315 | -/* |
316 | - * Linux/PA-RISC Project (http://www.parisc-linux.org/) |
317 | - * |
318 | - * Copyright (C) 2004 Randolph Chung <tausq@debian.org> |
319 | - * |
320 | - * This program is free software; you can redistribute it and/or modify |
321 | - * it under the terms of the GNU General Public License as published by |
322 | - * the Free Software Foundation; either version 2, or (at your option) |
323 | - * any later version. |
324 | - * |
325 | - * This program is distributed in the hope that it will be useful, |
326 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
327 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
328 | - * GNU General Public License for more details. |
329 | - * |
330 | - * You should have received a copy of the GNU General Public License |
331 | - * along with this program; if not, write to the Free Software |
332 | - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
333 | - * |
334 | - * Fixup routines for kernel exception handling. |
335 | - */ |
336 | -#include <asm/asm-offsets.h> |
337 | -#include <asm/assembly.h> |
338 | -#include <asm/errno.h> |
339 | -#include <linux/linkage.h> |
340 | - |
341 | -#ifdef CONFIG_SMP |
342 | - .macro get_fault_ip t1 t2 |
343 | - loadgp |
344 | - addil LT%__per_cpu_offset,%r27 |
345 | - LDREG RT%__per_cpu_offset(%r1),\t1 |
346 | - /* t2 = smp_processor_id() */ |
347 | - mfctl 30,\t2 |
348 | - ldw TI_CPU(\t2),\t2 |
349 | -#ifdef CONFIG_64BIT |
350 | - extrd,u \t2,63,32,\t2 |
351 | -#endif |
352 | - /* t2 = &__per_cpu_offset[smp_processor_id()]; */ |
353 | - LDREGX \t2(\t1),\t2 |
354 | - addil LT%exception_data,%r27 |
355 | - LDREG RT%exception_data(%r1),\t1 |
356 | - /* t1 = this_cpu_ptr(&exception_data) */ |
357 | - add,l \t1,\t2,\t1 |
358 | - /* %r27 = t1->fault_gp - restore gp */ |
359 | - LDREG EXCDATA_GP(\t1), %r27 |
360 | - /* t1 = t1->fault_ip */ |
361 | - LDREG EXCDATA_IP(\t1), \t1 |
362 | - .endm |
363 | -#else |
364 | - .macro get_fault_ip t1 t2 |
365 | - loadgp |
366 | - /* t1 = this_cpu_ptr(&exception_data) */ |
367 | - addil LT%exception_data,%r27 |
368 | - LDREG RT%exception_data(%r1),\t2 |
369 | - /* %r27 = t2->fault_gp - restore gp */ |
370 | - LDREG EXCDATA_GP(\t2), %r27 |
371 | - /* t1 = t2->fault_ip */ |
372 | - LDREG EXCDATA_IP(\t2), \t1 |
373 | - .endm |
374 | -#endif |
375 | - |
376 | - .level LEVEL |
377 | - |
378 | - .text |
379 | - .section .fixup, "ax" |
380 | - |
381 | - /* get_user() fixups, store -EFAULT in r8, and 0 in r9 */ |
382 | -ENTRY_CFI(fixup_get_user_skip_1) |
383 | - get_fault_ip %r1,%r8 |
384 | - ldo 4(%r1), %r1 |
385 | - ldi -EFAULT, %r8 |
386 | - bv %r0(%r1) |
387 | - copy %r0, %r9 |
388 | -ENDPROC_CFI(fixup_get_user_skip_1) |
389 | - |
390 | -ENTRY_CFI(fixup_get_user_skip_2) |
391 | - get_fault_ip %r1,%r8 |
392 | - ldo 8(%r1), %r1 |
393 | - ldi -EFAULT, %r8 |
394 | - bv %r0(%r1) |
395 | - copy %r0, %r9 |
396 | -ENDPROC_CFI(fixup_get_user_skip_2) |
397 | - |
398 | - /* put_user() fixups, store -EFAULT in r8 */ |
399 | -ENTRY_CFI(fixup_put_user_skip_1) |
400 | - get_fault_ip %r1,%r8 |
401 | - ldo 4(%r1), %r1 |
402 | - bv %r0(%r1) |
403 | - ldi -EFAULT, %r8 |
404 | -ENDPROC_CFI(fixup_put_user_skip_1) |
405 | - |
406 | -ENTRY_CFI(fixup_put_user_skip_2) |
407 | - get_fault_ip %r1,%r8 |
408 | - ldo 8(%r1), %r1 |
409 | - bv %r0(%r1) |
410 | - ldi -EFAULT, %r8 |
411 | -ENDPROC_CFI(fixup_put_user_skip_2) |
412 | - |
413 | diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S |
414 | index 56845de6b5df..f01188c044ee 100644 |
415 | --- a/arch/parisc/lib/lusercopy.S |
416 | +++ b/arch/parisc/lib/lusercopy.S |
417 | @@ -5,6 +5,8 @@ |
418 | * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org> |
419 | * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr> |
420 | * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org> |
421 | + * Copyright (C) 2017 Helge Deller <deller@gmx.de> |
422 | + * Copyright (C) 2017 John David Anglin <dave.anglin@bell.net> |
423 | * |
424 | * |
425 | * This program is free software; you can redistribute it and/or modify |
426 | @@ -132,4 +134,320 @@ ENDPROC_CFI(lstrnlen_user) |
427 | |
428 | .procend |
429 | |
430 | + |
431 | + |
432 | +/* |
433 | + * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) |
434 | + * |
435 | + * Inputs: |
436 | + * - sr1 already contains space of source region |
437 | + * - sr2 already contains space of destination region |
438 | + * |
439 | + * Returns: |
440 | + * - number of bytes that could not be copied. |
441 | + * On success, this will be zero. |
442 | + * |
443 | + * This code is based on a C-implementation of a copy routine written by |
444 | + * Randolph Chung, which in turn was derived from the glibc. |
445 | + * |
446 | + * Several strategies are tried to try to get the best performance for various |
447 | + * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes |
448 | + * at a time using general registers. Unaligned copies are handled either by |
449 | + * aligning the destination and then using shift-and-write method, or in a few |
450 | + * cases by falling back to a byte-at-a-time copy. |
451 | + * |
452 | + * Testing with various alignments and buffer sizes shows that this code is |
453 | + * often >10x faster than a simple byte-at-a-time copy, even for strangely |
454 | + * aligned operands. It is interesting to note that the glibc version of memcpy |
455 | + * (written in C) is actually quite fast already. This routine is able to beat |
456 | + * it by 30-40% for aligned copies because of the loop unrolling, but in some |
457 | + * cases the glibc version is still slightly faster. This lends more |
458 | + * credibility that gcc can generate very good code as long as we are careful. |
459 | + * |
460 | + * Possible optimizations: |
461 | + * - add cache prefetching |
462 | + * - try not to use the post-increment address modifiers; they may create |
463 | + * additional interlocks. Assumption is that those were only efficient on old |
464 | + * machines (pre PA8000 processors) |
465 | + */ |
466 | + |
467 | + dst = arg0 |
468 | + src = arg1 |
469 | + len = arg2 |
470 | + end = arg3 |
471 | + t1 = r19 |
472 | + t2 = r20 |
473 | + t3 = r21 |
474 | + t4 = r22 |
475 | + srcspc = sr1 |
476 | + dstspc = sr2 |
477 | + |
478 | + t0 = r1 |
479 | + a1 = t1 |
480 | + a2 = t2 |
481 | + a3 = t3 |
482 | + a0 = t4 |
483 | + |
484 | + save_src = ret0 |
485 | + save_dst = ret1 |
486 | + save_len = r31 |
487 | + |
488 | +ENTRY_CFI(pa_memcpy) |
489 | + .proc |
490 | + .callinfo NO_CALLS |
491 | + .entry |
492 | + |
493 | + /* Last destination address */ |
494 | + add dst,len,end |
495 | + |
496 | + /* short copy with less than 16 bytes? */ |
497 | + cmpib,>>=,n 15,len,.Lbyte_loop |
498 | + |
499 | + /* same alignment? */ |
500 | + xor src,dst,t0 |
501 | + extru t0,31,2,t1 |
502 | + cmpib,<>,n 0,t1,.Lunaligned_copy |
503 | + |
504 | +#ifdef CONFIG_64BIT |
505 | + /* only do 64-bit copies if we can get aligned. */ |
506 | + extru t0,31,3,t1 |
507 | + cmpib,<>,n 0,t1,.Lalign_loop32 |
508 | + |
509 | + /* loop until we are 64-bit aligned */ |
510 | +.Lalign_loop64: |
511 | + extru dst,31,3,t1 |
512 | + cmpib,=,n 0,t1,.Lcopy_loop_16 |
513 | +20: ldb,ma 1(srcspc,src),t1 |
514 | +21: stb,ma t1,1(dstspc,dst) |
515 | + b .Lalign_loop64 |
516 | + ldo -1(len),len |
517 | + |
518 | + ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) |
519 | + ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) |
520 | + |
521 | + ldi 31,t0 |
522 | +.Lcopy_loop_16: |
523 | + cmpb,COND(>>=),n t0,len,.Lword_loop |
524 | + |
525 | +10: ldd 0(srcspc,src),t1 |
526 | +11: ldd 8(srcspc,src),t2 |
527 | + ldo 16(src),src |
528 | +12: std,ma t1,8(dstspc,dst) |
529 | +13: std,ma t2,8(dstspc,dst) |
530 | +14: ldd 0(srcspc,src),t1 |
531 | +15: ldd 8(srcspc,src),t2 |
532 | + ldo 16(src),src |
533 | +16: std,ma t1,8(dstspc,dst) |
534 | +17: std,ma t2,8(dstspc,dst) |
535 | + |
536 | + ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) |
537 | + ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault) |
538 | + ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) |
539 | + ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) |
540 | + ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) |
541 | + ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault) |
542 | + ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) |
543 | + ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) |
544 | + |
545 | + b .Lcopy_loop_16 |
546 | + ldo -32(len),len |
547 | + |
548 | +.Lword_loop: |
549 | + cmpib,COND(>>=),n 3,len,.Lbyte_loop |
550 | +20: ldw,ma 4(srcspc,src),t1 |
551 | +21: stw,ma t1,4(dstspc,dst) |
552 | + b .Lword_loop |
553 | + ldo -4(len),len |
554 | + |
555 | + ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) |
556 | + ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) |
557 | + |
558 | +#endif /* CONFIG_64BIT */ |
559 | + |
560 | + /* loop until we are 32-bit aligned */ |
561 | +.Lalign_loop32: |
562 | + extru dst,31,2,t1 |
563 | + cmpib,=,n 0,t1,.Lcopy_loop_4 |
564 | +20: ldb,ma 1(srcspc,src),t1 |
565 | +21: stb,ma t1,1(dstspc,dst) |
566 | + b .Lalign_loop32 |
567 | + ldo -1(len),len |
568 | + |
569 | + ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) |
570 | + ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) |
571 | + |
572 | + |
573 | +.Lcopy_loop_4: |
574 | + cmpib,COND(>>=),n 15,len,.Lbyte_loop |
575 | + |
576 | +10: ldw 0(srcspc,src),t1 |
577 | +11: ldw 4(srcspc,src),t2 |
578 | +12: stw,ma t1,4(dstspc,dst) |
579 | +13: stw,ma t2,4(dstspc,dst) |
580 | +14: ldw 8(srcspc,src),t1 |
581 | +15: ldw 12(srcspc,src),t2 |
582 | + ldo 16(src),src |
583 | +16: stw,ma t1,4(dstspc,dst) |
584 | +17: stw,ma t2,4(dstspc,dst) |
585 | + |
586 | + ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) |
587 | + ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault) |
588 | + ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) |
589 | + ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) |
590 | + ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) |
591 | + ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault) |
592 | + ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) |
593 | + ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) |
594 | + |
595 | + b .Lcopy_loop_4 |
596 | + ldo -16(len),len |
597 | + |
598 | +.Lbyte_loop: |
599 | + cmpclr,COND(<>) len,%r0,%r0 |
600 | + b,n .Lcopy_done |
601 | +20: ldb 0(srcspc,src),t1 |
602 | + ldo 1(src),src |
603 | +21: stb,ma t1,1(dstspc,dst) |
604 | + b .Lbyte_loop |
605 | + ldo -1(len),len |
606 | + |
607 | + ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) |
608 | + ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) |
609 | + |
610 | +.Lcopy_done: |
611 | + bv %r0(%r2) |
612 | + sub end,dst,ret0 |
613 | + |
614 | + |
615 | + /* src and dst are not aligned the same way. */ |
616 | + /* need to go the hard way */ |
617 | +.Lunaligned_copy: |
618 | + /* align until dst is 32bit-word-aligned */ |
619 | + extru dst,31,2,t1 |
620 | + cmpib,COND(=),n 0,t1,.Lcopy_dstaligned |
621 | +20: ldb 0(srcspc,src),t1 |
622 | + ldo 1(src),src |
623 | +21: stb,ma t1,1(dstspc,dst) |
624 | + b .Lunaligned_copy |
625 | + ldo -1(len),len |
626 | + |
627 | + ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) |
628 | + ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) |
629 | + |
630 | +.Lcopy_dstaligned: |
631 | + |
632 | + /* store src, dst and len in safe place */ |
633 | + copy src,save_src |
634 | + copy dst,save_dst |
635 | + copy len,save_len |
636 | + |
637 | + /* len now needs give number of words to copy */ |
638 | + SHRREG len,2,len |
639 | + |
640 | + /* |
641 | + * Copy from a not-aligned src to an aligned dst using shifts. |
642 | + * Handles 4 words per loop. |
643 | + */ |
644 | + |
645 | + depw,z src,28,2,t0 |
646 | + subi 32,t0,t0 |
647 | + mtsar t0 |
648 | + extru len,31,2,t0 |
649 | + cmpib,= 2,t0,.Lcase2 |
650 | + /* Make src aligned by rounding it down. */ |
651 | + depi 0,31,2,src |
652 | + |
653 | + cmpiclr,<> 3,t0,%r0 |
654 | + b,n .Lcase3 |
655 | + cmpiclr,<> 1,t0,%r0 |
656 | + b,n .Lcase1 |
657 | +.Lcase0: |
658 | + cmpb,= %r0,len,.Lcda_finish |
659 | + nop |
660 | + |
661 | +1: ldw,ma 4(srcspc,src), a3 |
662 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) |
663 | +1: ldw,ma 4(srcspc,src), a0 |
664 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) |
665 | + b,n .Ldo3 |
666 | +.Lcase1: |
667 | +1: ldw,ma 4(srcspc,src), a2 |
668 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) |
669 | +1: ldw,ma 4(srcspc,src), a3 |
670 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) |
671 | + ldo -1(len),len |
672 | + cmpb,=,n %r0,len,.Ldo0 |
673 | +.Ldo4: |
674 | +1: ldw,ma 4(srcspc,src), a0 |
675 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) |
676 | + shrpw a2, a3, %sar, t0 |
677 | +1: stw,ma t0, 4(dstspc,dst) |
678 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) |
679 | +.Ldo3: |
680 | +1: ldw,ma 4(srcspc,src), a1 |
681 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) |
682 | + shrpw a3, a0, %sar, t0 |
683 | +1: stw,ma t0, 4(dstspc,dst) |
684 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) |
685 | +.Ldo2: |
686 | +1: ldw,ma 4(srcspc,src), a2 |
687 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) |
688 | + shrpw a0, a1, %sar, t0 |
689 | +1: stw,ma t0, 4(dstspc,dst) |
690 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) |
691 | +.Ldo1: |
692 | +1: ldw,ma 4(srcspc,src), a3 |
693 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) |
694 | + shrpw a1, a2, %sar, t0 |
695 | +1: stw,ma t0, 4(dstspc,dst) |
696 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) |
697 | + ldo -4(len),len |
698 | + cmpb,<> %r0,len,.Ldo4 |
699 | + nop |
700 | +.Ldo0: |
701 | + shrpw a2, a3, %sar, t0 |
702 | +1: stw,ma t0, 4(dstspc,dst) |
703 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) |
704 | + |
705 | +.Lcda_rdfault: |
706 | +.Lcda_finish: |
707 | + /* calculate new src, dst and len and jump to byte-copy loop */ |
708 | + sub dst,save_dst,t0 |
709 | + add save_src,t0,src |
710 | + b .Lbyte_loop |
711 | + sub save_len,t0,len |
712 | + |
713 | +.Lcase3: |
714 | +1: ldw,ma 4(srcspc,src), a0 |
715 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) |
716 | +1: ldw,ma 4(srcspc,src), a1 |
717 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) |
718 | + b .Ldo2 |
719 | + ldo 1(len),len |
720 | +.Lcase2: |
721 | +1: ldw,ma 4(srcspc,src), a1 |
722 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) |
723 | +1: ldw,ma 4(srcspc,src), a2 |
724 | + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) |
725 | + b .Ldo1 |
726 | + ldo 2(len),len |
727 | + |
728 | + |
729 | + /* fault exception fixup handlers: */ |
730 | +#ifdef CONFIG_64BIT |
731 | +.Lcopy16_fault: |
732 | +10: b .Lcopy_done |
733 | + std,ma t1,8(dstspc,dst) |
734 | + ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) |
735 | +#endif |
736 | + |
737 | +.Lcopy8_fault: |
738 | +10: b .Lcopy_done |
739 | + stw,ma t1,4(dstspc,dst) |
740 | + ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) |
741 | + |
742 | + .exit |
743 | +ENDPROC_CFI(pa_memcpy) |
744 | + .procend |
745 | + |
746 | .end |
747 | diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c |
748 | index f82ff10ed974..b3d47ec1d80a 100644 |
749 | --- a/arch/parisc/lib/memcpy.c |
750 | +++ b/arch/parisc/lib/memcpy.c |
751 | @@ -2,7 +2,7 @@ |
752 | * Optimized memory copy routines. |
753 | * |
754 | * Copyright (C) 2004 Randolph Chung <tausq@debian.org> |
755 | - * Copyright (C) 2013 Helge Deller <deller@gmx.de> |
756 | + * Copyright (C) 2013-2017 Helge Deller <deller@gmx.de> |
757 | * |
758 | * This program is free software; you can redistribute it and/or modify |
759 | * it under the terms of the GNU General Public License as published by |
760 | @@ -21,474 +21,21 @@ |
761 | * Portions derived from the GNU C Library |
762 | * Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc. |
763 | * |
764 | - * Several strategies are tried to try to get the best performance for various |
765 | - * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using |
766 | - * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using |
767 | - * general registers. Unaligned copies are handled either by aligning the |
768 | - * destination and then using shift-and-write method, or in a few cases by |
769 | - * falling back to a byte-at-a-time copy. |
770 | - * |
771 | - * I chose to implement this in C because it is easier to maintain and debug, |
772 | - * and in my experiments it appears that the C code generated by gcc (3.3/3.4 |
773 | - * at the time of writing) is fairly optimal. Unfortunately some of the |
774 | - * semantics of the copy routine (exception handling) is difficult to express |
775 | - * in C, so we have to play some tricks to get it to work. |
776 | - * |
777 | - * All the loads and stores are done via explicit asm() code in order to use |
778 | - * the right space registers. |
779 | - * |
780 | - * Testing with various alignments and buffer sizes shows that this code is |
781 | - * often >10x faster than a simple byte-at-a-time copy, even for strangely |
782 | - * aligned operands. It is interesting to note that the glibc version |
783 | - * of memcpy (written in C) is actually quite fast already. This routine is |
784 | - * able to beat it by 30-40% for aligned copies because of the loop unrolling, |
785 | - * but in some cases the glibc version is still slightly faster. This lends |
786 | - * more credibility that gcc can generate very good code as long as we are |
787 | - * careful. |
788 | - * |
789 | - * TODO: |
790 | - * - cache prefetching needs more experimentation to get optimal settings |
791 | - * - try not to use the post-increment address modifiers; they create additional |
792 | - * interlocks |
793 | - * - replace byte-copy loops with stybs sequences |
794 | */ |
795 | |
796 | -#ifdef __KERNEL__ |
797 | #include <linux/module.h> |
798 | #include <linux/compiler.h> |
799 | #include <linux/uaccess.h> |
800 | -#define s_space "%%sr1" |
801 | -#define d_space "%%sr2" |
802 | -#else |
803 | -#include "memcpy.h" |
804 | -#define s_space "%%sr0" |
805 | -#define d_space "%%sr0" |
806 | -#define pa_memcpy new2_copy |
807 | -#endif |
808 | |
809 | DECLARE_PER_CPU(struct exception_data, exception_data); |
810 | |
811 | -#define preserve_branch(label) do { \ |
812 | - volatile int dummy = 0; \ |
813 | - /* The following branch is never taken, it's just here to */ \ |
814 | - /* prevent gcc from optimizing away our exception code. */ \ |
815 | - if (unlikely(dummy != dummy)) \ |
816 | - goto label; \ |
817 | -} while (0) |
818 | - |
819 | #define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3)) |
820 | #define get_kernel_space() (0) |
821 | |
822 | -#define MERGE(w0, sh_1, w1, sh_2) ({ \ |
823 | - unsigned int _r; \ |
824 | - asm volatile ( \ |
825 | - "mtsar %3\n" \ |
826 | - "shrpw %1, %2, %%sar, %0\n" \ |
827 | - : "=r"(_r) \ |
828 | - : "r"(w0), "r"(w1), "r"(sh_2) \ |
829 | - ); \ |
830 | - _r; \ |
831 | -}) |
832 | -#define THRESHOLD 16 |
833 | - |
834 | -#ifdef DEBUG_MEMCPY |
835 | -#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __func__ ); printk(KERN_DEBUG fmt, ##args ); } while (0) |
836 | -#else |
837 | -#define DPRINTF(fmt, args...) |
838 | -#endif |
839 | - |
840 | -#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \ |
841 | - __asm__ __volatile__ ( \ |
842 | - "1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n\t" \ |
843 | - ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ |
844 | - : _tt(_t), "+r"(_a) \ |
845 | - : \ |
846 | - : "r8") |
847 | - |
848 | -#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \ |
849 | - __asm__ __volatile__ ( \ |
850 | - "1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n\t" \ |
851 | - ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ |
852 | - : "+r"(_a) \ |
853 | - : _tt(_t) \ |
854 | - : "r8") |
855 | - |
856 | -#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e) |
857 | -#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e) |
858 | -#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e) |
859 | -#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e) |
860 | -#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e) |
861 | -#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e) |
862 | - |
863 | -#define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e) \ |
864 | - __asm__ __volatile__ ( \ |
865 | - "1:\t" #_insn " " #_o "(" _s ",%1), %0\n\t" \ |
866 | - ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ |
867 | - : _tt(_t) \ |
868 | - : "r"(_a) \ |
869 | - : "r8") |
870 | - |
871 | -#define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e) \ |
872 | - __asm__ __volatile__ ( \ |
873 | - "1:\t" #_insn " %0, " #_o "(" _s ",%1)\n\t" \ |
874 | - ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ |
875 | - : \ |
876 | - : _tt(_t), "r"(_a) \ |
877 | - : "r8") |
878 | - |
879 | -#define ldw(_s,_o,_a,_t,_e) def_load_insn(ldw,"=r",_s,_o,_a,_t,_e) |
880 | -#define stw(_s,_t,_o,_a,_e) def_store_insn(stw,"r",_s,_t,_o,_a,_e) |
881 | - |
882 | -#ifdef CONFIG_PREFETCH |
883 | -static inline void prefetch_src(const void *addr) |
884 | -{ |
885 | - __asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr)); |
886 | -} |
887 | - |
888 | -static inline void prefetch_dst(const void *addr) |
889 | -{ |
890 | - __asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr)); |
891 | -} |
892 | -#else |
893 | -#define prefetch_src(addr) do { } while(0) |
894 | -#define prefetch_dst(addr) do { } while(0) |
895 | -#endif |
896 | - |
897 | -#define PA_MEMCPY_OK 0 |
898 | -#define PA_MEMCPY_LOAD_ERROR 1 |
899 | -#define PA_MEMCPY_STORE_ERROR 2 |
900 | - |
901 | -/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words |
902 | - * per loop. This code is derived from glibc. |
903 | - */ |
904 | -static noinline unsigned long copy_dstaligned(unsigned long dst, |
905 | - unsigned long src, unsigned long len) |
906 | -{ |
907 | - /* gcc complains that a2 and a3 may be uninitialized, but actually |
908 | - * they cannot be. Initialize a2/a3 to shut gcc up. |
909 | - */ |
910 | - register unsigned int a0, a1, a2 = 0, a3 = 0; |
911 | - int sh_1, sh_2; |
912 | - |
913 | - /* prefetch_src((const void *)src); */ |
914 | - |
915 | - /* Calculate how to shift a word read at the memory operation |
916 | - aligned srcp to make it aligned for copy. */ |
917 | - sh_1 = 8 * (src % sizeof(unsigned int)); |
918 | - sh_2 = 8 * sizeof(unsigned int) - sh_1; |
919 | - |
920 | - /* Make src aligned by rounding it down. */ |
921 | - src &= -sizeof(unsigned int); |
922 | - |
923 | - switch (len % 4) |
924 | - { |
925 | - case 2: |
926 | - /* a1 = ((unsigned int *) src)[0]; |
927 | - a2 = ((unsigned int *) src)[1]; */ |
928 | - ldw(s_space, 0, src, a1, cda_ldw_exc); |
929 | - ldw(s_space, 4, src, a2, cda_ldw_exc); |
930 | - src -= 1 * sizeof(unsigned int); |
931 | - dst -= 3 * sizeof(unsigned int); |
932 | - len += 2; |
933 | - goto do1; |
934 | - case 3: |
935 | - /* a0 = ((unsigned int *) src)[0]; |
936 | - a1 = ((unsigned int *) src)[1]; */ |
937 | - ldw(s_space, 0, src, a0, cda_ldw_exc); |
938 | - ldw(s_space, 4, src, a1, cda_ldw_exc); |
939 | - src -= 0 * sizeof(unsigned int); |
940 | - dst -= 2 * sizeof(unsigned int); |
941 | - len += 1; |
942 | - goto do2; |
943 | - case 0: |
944 | - if (len == 0) |
945 | - return PA_MEMCPY_OK; |
946 | - /* a3 = ((unsigned int *) src)[0]; |
947 | - a0 = ((unsigned int *) src)[1]; */ |
948 | - ldw(s_space, 0, src, a3, cda_ldw_exc); |
949 | - ldw(s_space, 4, src, a0, cda_ldw_exc); |
950 | - src -=-1 * sizeof(unsigned int); |
951 | - dst -= 1 * sizeof(unsigned int); |
952 | - len += 0; |
953 | - goto do3; |
954 | - case 1: |
955 | - /* a2 = ((unsigned int *) src)[0]; |
956 | - a3 = ((unsigned int *) src)[1]; */ |
957 | - ldw(s_space, 0, src, a2, cda_ldw_exc); |
958 | - ldw(s_space, 4, src, a3, cda_ldw_exc); |
959 | - src -=-2 * sizeof(unsigned int); |
960 | - dst -= 0 * sizeof(unsigned int); |
961 | - len -= 1; |
962 | - if (len == 0) |
963 | - goto do0; |
964 | - goto do4; /* No-op. */ |
965 | - } |
966 | - |
967 | - do |
968 | - { |
969 | - /* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */ |
970 | -do4: |
971 | - /* a0 = ((unsigned int *) src)[0]; */ |
972 | - ldw(s_space, 0, src, a0, cda_ldw_exc); |
973 | - /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */ |
974 | - stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc); |
975 | -do3: |
976 | - /* a1 = ((unsigned int *) src)[1]; */ |
977 | - ldw(s_space, 4, src, a1, cda_ldw_exc); |
978 | - /* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */ |
979 | - stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc); |
980 | -do2: |
981 | - /* a2 = ((unsigned int *) src)[2]; */ |
982 | - ldw(s_space, 8, src, a2, cda_ldw_exc); |
983 | - /* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */ |
984 | - stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc); |
985 | -do1: |
986 | - /* a3 = ((unsigned int *) src)[3]; */ |
987 | - ldw(s_space, 12, src, a3, cda_ldw_exc); |
988 | - /* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */ |
989 | - stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc); |
990 | - |
991 | - src += 4 * sizeof(unsigned int); |
992 | - dst += 4 * sizeof(unsigned int); |
993 | - len -= 4; |
994 | - } |
995 | - while (len != 0); |
996 | - |
997 | -do0: |
998 | - /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */ |
999 | - stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc); |
1000 | - |
1001 | - preserve_branch(handle_load_error); |
1002 | - preserve_branch(handle_store_error); |
1003 | - |
1004 | - return PA_MEMCPY_OK; |
1005 | - |
1006 | -handle_load_error: |
1007 | - __asm__ __volatile__ ("cda_ldw_exc:\n"); |
1008 | - return PA_MEMCPY_LOAD_ERROR; |
1009 | - |
1010 | -handle_store_error: |
1011 | - __asm__ __volatile__ ("cda_stw_exc:\n"); |
1012 | - return PA_MEMCPY_STORE_ERROR; |
1013 | -} |
1014 | - |
1015 | - |
1016 | -/* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR. |
1017 | - * In case of an access fault the faulty address can be read from the per_cpu |
1018 | - * exception data struct. */ |
1019 | -static noinline unsigned long pa_memcpy_internal(void *dstp, const void *srcp, |
1020 | - unsigned long len) |
1021 | -{ |
1022 | - register unsigned long src, dst, t1, t2, t3; |
1023 | - register unsigned char *pcs, *pcd; |
1024 | - register unsigned int *pws, *pwd; |
1025 | - register double *pds, *pdd; |
1026 | - unsigned long ret; |
1027 | - |
1028 | - src = (unsigned long)srcp; |
1029 | - dst = (unsigned long)dstp; |
1030 | - pcs = (unsigned char *)srcp; |
1031 | - pcd = (unsigned char *)dstp; |
1032 | - |
1033 | - /* prefetch_src((const void *)srcp); */ |
1034 | - |
1035 | - if (len < THRESHOLD) |
1036 | - goto byte_copy; |
1037 | - |
1038 | - /* Check alignment */ |
1039 | - t1 = (src ^ dst); |
1040 | - if (unlikely(t1 & (sizeof(double)-1))) |
1041 | - goto unaligned_copy; |
1042 | - |
1043 | - /* src and dst have same alignment. */ |
1044 | - |
1045 | - /* Copy bytes till we are double-aligned. */ |
1046 | - t2 = src & (sizeof(double) - 1); |
1047 | - if (unlikely(t2 != 0)) { |
1048 | - t2 = sizeof(double) - t2; |
1049 | - while (t2 && len) { |
1050 | - /* *pcd++ = *pcs++; */ |
1051 | - ldbma(s_space, pcs, t3, pmc_load_exc); |
1052 | - len--; |
1053 | - stbma(d_space, t3, pcd, pmc_store_exc); |
1054 | - t2--; |
1055 | - } |
1056 | - } |
1057 | - |
1058 | - pds = (double *)pcs; |
1059 | - pdd = (double *)pcd; |
1060 | - |
1061 | -#if 0 |
1062 | - /* Copy 8 doubles at a time */ |
1063 | - while (len >= 8*sizeof(double)) { |
1064 | - register double r1, r2, r3, r4, r5, r6, r7, r8; |
1065 | - /* prefetch_src((char *)pds + L1_CACHE_BYTES); */ |
1066 | - flddma(s_space, pds, r1, pmc_load_exc); |
1067 | - flddma(s_space, pds, r2, pmc_load_exc); |
1068 | - flddma(s_space, pds, r3, pmc_load_exc); |
1069 | - flddma(s_space, pds, r4, pmc_load_exc); |
1070 | - fstdma(d_space, r1, pdd, pmc_store_exc); |
1071 | - fstdma(d_space, r2, pdd, pmc_store_exc); |
1072 | - fstdma(d_space, r3, pdd, pmc_store_exc); |
1073 | - fstdma(d_space, r4, pdd, pmc_store_exc); |
1074 | - |
1075 | -#if 0 |
1076 | - if (L1_CACHE_BYTES <= 32) |
1077 | - prefetch_src((char *)pds + L1_CACHE_BYTES); |
1078 | -#endif |
1079 | - flddma(s_space, pds, r5, pmc_load_exc); |
1080 | - flddma(s_space, pds, r6, pmc_load_exc); |
1081 | - flddma(s_space, pds, r7, pmc_load_exc); |
1082 | - flddma(s_space, pds, r8, pmc_load_exc); |
1083 | - fstdma(d_space, r5, pdd, pmc_store_exc); |
1084 | - fstdma(d_space, r6, pdd, pmc_store_exc); |
1085 | - fstdma(d_space, r7, pdd, pmc_store_exc); |
1086 | - fstdma(d_space, r8, pdd, pmc_store_exc); |
1087 | - len -= 8*sizeof(double); |
1088 | - } |
1089 | -#endif |
1090 | - |
1091 | - pws = (unsigned int *)pds; |
1092 | - pwd = (unsigned int *)pdd; |
1093 | - |
1094 | -word_copy: |
1095 | - while (len >= 8*sizeof(unsigned int)) { |
1096 | - register unsigned int r1,r2,r3,r4,r5,r6,r7,r8; |
1097 | - /* prefetch_src((char *)pws + L1_CACHE_BYTES); */ |
1098 | - ldwma(s_space, pws, r1, pmc_load_exc); |
1099 | - ldwma(s_space, pws, r2, pmc_load_exc); |
1100 | - ldwma(s_space, pws, r3, pmc_load_exc); |
1101 | - ldwma(s_space, pws, r4, pmc_load_exc); |
1102 | - stwma(d_space, r1, pwd, pmc_store_exc); |
1103 | - stwma(d_space, r2, pwd, pmc_store_exc); |
1104 | - stwma(d_space, r3, pwd, pmc_store_exc); |
1105 | - stwma(d_space, r4, pwd, pmc_store_exc); |
1106 | - |
1107 | - ldwma(s_space, pws, r5, pmc_load_exc); |
1108 | - ldwma(s_space, pws, r6, pmc_load_exc); |
1109 | - ldwma(s_space, pws, r7, pmc_load_exc); |
1110 | - ldwma(s_space, pws, r8, pmc_load_exc); |
1111 | - stwma(d_space, r5, pwd, pmc_store_exc); |
1112 | - stwma(d_space, r6, pwd, pmc_store_exc); |
1113 | - stwma(d_space, r7, pwd, pmc_store_exc); |
1114 | - stwma(d_space, r8, pwd, pmc_store_exc); |
1115 | - len -= 8*sizeof(unsigned int); |
1116 | - } |
1117 | - |
1118 | - while (len >= 4*sizeof(unsigned int)) { |
1119 | - register unsigned int r1,r2,r3,r4; |
1120 | - ldwma(s_space, pws, r1, pmc_load_exc); |
1121 | - ldwma(s_space, pws, r2, pmc_load_exc); |
1122 | - ldwma(s_space, pws, r3, pmc_load_exc); |
1123 | - ldwma(s_space, pws, r4, pmc_load_exc); |
1124 | - stwma(d_space, r1, pwd, pmc_store_exc); |
1125 | - stwma(d_space, r2, pwd, pmc_store_exc); |
1126 | - stwma(d_space, r3, pwd, pmc_store_exc); |
1127 | - stwma(d_space, r4, pwd, pmc_store_exc); |
1128 | - len -= 4*sizeof(unsigned int); |
1129 | - } |
1130 | - |
1131 | - pcs = (unsigned char *)pws; |
1132 | - pcd = (unsigned char *)pwd; |
1133 | - |
1134 | -byte_copy: |
1135 | - while (len) { |
1136 | - /* *pcd++ = *pcs++; */ |
1137 | - ldbma(s_space, pcs, t3, pmc_load_exc); |
1138 | - stbma(d_space, t3, pcd, pmc_store_exc); |
1139 | - len--; |
1140 | - } |
1141 | - |
1142 | - return PA_MEMCPY_OK; |
1143 | - |
1144 | -unaligned_copy: |
1145 | - /* possibly we are aligned on a word, but not on a double... */ |
1146 | - if (likely((t1 & (sizeof(unsigned int)-1)) == 0)) { |
1147 | - t2 = src & (sizeof(unsigned int) - 1); |
1148 | - |
1149 | - if (unlikely(t2 != 0)) { |
1150 | - t2 = sizeof(unsigned int) - t2; |
1151 | - while (t2) { |
1152 | - /* *pcd++ = *pcs++; */ |
1153 | - ldbma(s_space, pcs, t3, pmc_load_exc); |
1154 | - stbma(d_space, t3, pcd, pmc_store_exc); |
1155 | - len--; |
1156 | - t2--; |
1157 | - } |
1158 | - } |
1159 | - |
1160 | - pws = (unsigned int *)pcs; |
1161 | - pwd = (unsigned int *)pcd; |
1162 | - goto word_copy; |
1163 | - } |
1164 | - |
1165 | - /* Align the destination. */ |
1166 | - if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) { |
1167 | - t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1)); |
1168 | - while (t2) { |
1169 | - /* *pcd++ = *pcs++; */ |
1170 | - ldbma(s_space, pcs, t3, pmc_load_exc); |
1171 | - stbma(d_space, t3, pcd, pmc_store_exc); |
1172 | - len--; |
1173 | - t2--; |
1174 | - } |
1175 | - dst = (unsigned long)pcd; |
1176 | - src = (unsigned long)pcs; |
1177 | - } |
1178 | - |
1179 | - ret = copy_dstaligned(dst, src, len / sizeof(unsigned int)); |
1180 | - if (ret) |
1181 | - return ret; |
1182 | - |
1183 | - pcs += (len & -sizeof(unsigned int)); |
1184 | - pcd += (len & -sizeof(unsigned int)); |
1185 | - len %= sizeof(unsigned int); |
1186 | - |
1187 | - preserve_branch(handle_load_error); |
1188 | - preserve_branch(handle_store_error); |
1189 | - |
1190 | - goto byte_copy; |
1191 | - |
1192 | -handle_load_error: |
1193 | - __asm__ __volatile__ ("pmc_load_exc:\n"); |
1194 | - return PA_MEMCPY_LOAD_ERROR; |
1195 | - |
1196 | -handle_store_error: |
1197 | - __asm__ __volatile__ ("pmc_store_exc:\n"); |
1198 | - return PA_MEMCPY_STORE_ERROR; |
1199 | -} |
1200 | - |
1201 | - |
1202 | /* Returns 0 for success, otherwise, returns number of bytes not transferred. */ |
1203 | -static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) |
1204 | -{ |
1205 | - unsigned long ret, fault_addr, reference; |
1206 | - struct exception_data *d; |
1207 | - |
1208 | - ret = pa_memcpy_internal(dstp, srcp, len); |
1209 | - if (likely(ret == PA_MEMCPY_OK)) |
1210 | - return 0; |
1211 | - |
1212 | - /* if a load or store fault occured we can get the faulty addr */ |
1213 | - d = this_cpu_ptr(&exception_data); |
1214 | - fault_addr = d->fault_addr; |
1215 | - |
1216 | - /* error in load or store? */ |
1217 | - if (ret == PA_MEMCPY_LOAD_ERROR) |
1218 | - reference = (unsigned long) srcp; |
1219 | - else |
1220 | - reference = (unsigned long) dstp; |
1221 | +extern unsigned long pa_memcpy(void *dst, const void *src, |
1222 | + unsigned long len); |
1223 | |
1224 | - DPRINTF("pa_memcpy: fault type = %lu, len=%lu fault_addr=%lu ref=%lu\n", |
1225 | - ret, len, fault_addr, reference); |
1226 | - |
1227 | - if (fault_addr >= reference) |
1228 | - return len - (fault_addr - reference); |
1229 | - else |
1230 | - return len; |
1231 | -} |
1232 | - |
1233 | -#ifdef __KERNEL__ |
1234 | unsigned long __copy_to_user(void __user *dst, const void *src, |
1235 | unsigned long len) |
1236 | { |
1237 | @@ -537,5 +84,3 @@ long probe_kernel_read(void *dst, const void *src, size_t size) |
1238 | |
1239 | return __probe_kernel_read(dst, src, size); |
1240 | } |
1241 | - |
1242 | -#endif |
1243 | diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c |
1244 | index 1a0b4f63f0e9..040c48fc5391 100644 |
1245 | --- a/arch/parisc/mm/fault.c |
1246 | +++ b/arch/parisc/mm/fault.c |
1247 | @@ -149,6 +149,23 @@ int fixup_exception(struct pt_regs *regs) |
1248 | d->fault_space = regs->isr; |
1249 | d->fault_addr = regs->ior; |
1250 | |
1251 | + /* |
1252 | + * Fix up get_user() and put_user(). |
1253 | + * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant |
1254 | + * bit in the relative address of the fixup routine to indicate |
1255 | + * that %r8 should be loaded with -EFAULT to report a userspace |
1256 | + * access error. |
1257 | + */ |
1258 | + if (fix->fixup & 1) { |
1259 | + regs->gr[8] = -EFAULT; |
1260 | + |
1261 | + /* zero target register for get_user() */ |
1262 | + if (parisc_acctyp(0, regs->iir) == VM_READ) { |
1263 | + int treg = regs->iir & 0x1f; |
1264 | + regs->gr[treg] = 0; |
1265 | + } |
1266 | + } |
1267 | + |
1268 | regs->iaoq[0] = (unsigned long)&fix->fixup + fix->fixup; |
1269 | regs->iaoq[0] &= ~3; |
1270 | /* |
1271 | diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S |
1272 | index 779782f58324..9a53a06e5a3e 100644 |
1273 | --- a/arch/x86/lib/memcpy_64.S |
1274 | +++ b/arch/x86/lib/memcpy_64.S |
1275 | @@ -290,7 +290,7 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) |
1276 | _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) |
1277 | _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) |
1278 | _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) |
1279 | - _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) |
1280 | + _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) |
1281 | _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) |
1282 | _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) |
1283 | _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) |
1284 | diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c |
1285 | index 887e57182716..aed206475aa7 100644 |
1286 | --- a/arch/x86/mm/kaslr.c |
1287 | +++ b/arch/x86/mm/kaslr.c |
1288 | @@ -48,7 +48,7 @@ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; |
1289 | #if defined(CONFIG_X86_ESPFIX64) |
1290 | static const unsigned long vaddr_end = ESPFIX_BASE_ADDR; |
1291 | #elif defined(CONFIG_EFI) |
1292 | -static const unsigned long vaddr_end = EFI_VA_START; |
1293 | +static const unsigned long vaddr_end = EFI_VA_END; |
1294 | #else |
1295 | static const unsigned long vaddr_end = __START_KERNEL_map; |
1296 | #endif |
1297 | @@ -105,7 +105,7 @@ void __init kernel_randomize_memory(void) |
1298 | */ |
1299 | BUILD_BUG_ON(vaddr_start >= vaddr_end); |
1300 | BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && |
1301 | - vaddr_end >= EFI_VA_START); |
1302 | + vaddr_end >= EFI_VA_END); |
1303 | BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) || |
1304 | IS_ENABLED(CONFIG_EFI)) && |
1305 | vaddr_end >= __START_KERNEL_map); |
1306 | diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c |
1307 | index f8960fca0827..9f21b0c5945d 100644 |
1308 | --- a/arch/x86/xen/setup.c |
1309 | +++ b/arch/x86/xen/setup.c |
1310 | @@ -713,10 +713,9 @@ static void __init xen_reserve_xen_mfnlist(void) |
1311 | size = PFN_PHYS(xen_start_info->nr_p2m_frames); |
1312 | } |
1313 | |
1314 | - if (!xen_is_e820_reserved(start, size)) { |
1315 | - memblock_reserve(start, size); |
1316 | + memblock_reserve(start, size); |
1317 | + if (!xen_is_e820_reserved(start, size)) |
1318 | return; |
1319 | - } |
1320 | |
1321 | #ifdef CONFIG_X86_32 |
1322 | /* |
1323 | @@ -727,6 +726,7 @@ static void __init xen_reserve_xen_mfnlist(void) |
1324 | BUG(); |
1325 | #else |
1326 | xen_relocate_p2m(); |
1327 | + memblock_free(start, size); |
1328 | #endif |
1329 | } |
1330 | |
1331 | diff --git a/block/bio.c b/block/bio.c |
1332 | index db85c5753a76..655c9016052a 100644 |
1333 | --- a/block/bio.c |
1334 | +++ b/block/bio.c |
1335 | @@ -372,10 +372,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs) |
1336 | bio_list_init(&punt); |
1337 | bio_list_init(&nopunt); |
1338 | |
1339 | - while ((bio = bio_list_pop(current->bio_list))) |
1340 | + while ((bio = bio_list_pop(¤t->bio_list[0]))) |
1341 | bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); |
1342 | + current->bio_list[0] = nopunt; |
1343 | |
1344 | - *current->bio_list = nopunt; |
1345 | + bio_list_init(&nopunt); |
1346 | + while ((bio = bio_list_pop(¤t->bio_list[1]))) |
1347 | + bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); |
1348 | + current->bio_list[1] = nopunt; |
1349 | |
1350 | spin_lock(&bs->rescue_lock); |
1351 | bio_list_merge(&bs->rescue_list, &punt); |
1352 | @@ -462,7 +466,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) |
1353 | * we retry with the original gfp_flags. |
1354 | */ |
1355 | |
1356 | - if (current->bio_list && !bio_list_empty(current->bio_list)) |
1357 | + if (current->bio_list && |
1358 | + (!bio_list_empty(¤t->bio_list[0]) || |
1359 | + !bio_list_empty(¤t->bio_list[1]))) |
1360 | gfp_mask &= ~__GFP_DIRECT_RECLAIM; |
1361 | |
1362 | p = mempool_alloc(bs->bio_pool, gfp_mask); |
1363 | diff --git a/block/blk-core.c b/block/blk-core.c |
1364 | index 14d7c0740dc0..d1f2801ce836 100644 |
1365 | --- a/block/blk-core.c |
1366 | +++ b/block/blk-core.c |
1367 | @@ -1994,7 +1994,14 @@ generic_make_request_checks(struct bio *bio) |
1368 | */ |
1369 | blk_qc_t generic_make_request(struct bio *bio) |
1370 | { |
1371 | - struct bio_list bio_list_on_stack; |
1372 | + /* |
1373 | + * bio_list_on_stack[0] contains bios submitted by the current |
1374 | + * make_request_fn. |
1375 | + * bio_list_on_stack[1] contains bios that were submitted before |
1376 | + * the current make_request_fn, but that haven't been processed |
1377 | + * yet. |
1378 | + */ |
1379 | + struct bio_list bio_list_on_stack[2]; |
1380 | blk_qc_t ret = BLK_QC_T_NONE; |
1381 | |
1382 | if (!generic_make_request_checks(bio)) |
1383 | @@ -2011,7 +2018,7 @@ blk_qc_t generic_make_request(struct bio *bio) |
1384 | * should be added at the tail |
1385 | */ |
1386 | if (current->bio_list) { |
1387 | - bio_list_add(current->bio_list, bio); |
1388 | + bio_list_add(¤t->bio_list[0], bio); |
1389 | goto out; |
1390 | } |
1391 | |
1392 | @@ -2030,23 +2037,39 @@ blk_qc_t generic_make_request(struct bio *bio) |
1393 | * bio_list, and call into ->make_request() again. |
1394 | */ |
1395 | BUG_ON(bio->bi_next); |
1396 | - bio_list_init(&bio_list_on_stack); |
1397 | - current->bio_list = &bio_list_on_stack; |
1398 | + bio_list_init(&bio_list_on_stack[0]); |
1399 | + current->bio_list = bio_list_on_stack; |
1400 | do { |
1401 | struct request_queue *q = bdev_get_queue(bio->bi_bdev); |
1402 | |
1403 | if (likely(blk_queue_enter(q, false) == 0)) { |
1404 | + struct bio_list lower, same; |
1405 | + |
1406 | + /* Create a fresh bio_list for all subordinate requests */ |
1407 | + bio_list_on_stack[1] = bio_list_on_stack[0]; |
1408 | + bio_list_init(&bio_list_on_stack[0]); |
1409 | ret = q->make_request_fn(q, bio); |
1410 | |
1411 | blk_queue_exit(q); |
1412 | |
1413 | - bio = bio_list_pop(current->bio_list); |
1414 | + /* sort new bios into those for a lower level |
1415 | + * and those for the same level |
1416 | + */ |
1417 | + bio_list_init(&lower); |
1418 | + bio_list_init(&same); |
1419 | + while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) |
1420 | + if (q == bdev_get_queue(bio->bi_bdev)) |
1421 | + bio_list_add(&same, bio); |
1422 | + else |
1423 | + bio_list_add(&lower, bio); |
1424 | + /* now assemble so we handle the lowest level first */ |
1425 | + bio_list_merge(&bio_list_on_stack[0], &lower); |
1426 | + bio_list_merge(&bio_list_on_stack[0], &same); |
1427 | + bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); |
1428 | } else { |
1429 | - struct bio *bio_next = bio_list_pop(current->bio_list); |
1430 | - |
1431 | bio_io_error(bio); |
1432 | - bio = bio_next; |
1433 | } |
1434 | + bio = bio_list_pop(&bio_list_on_stack[0]); |
1435 | } while (bio); |
1436 | current->bio_list = NULL; /* deactivate */ |
1437 | |
1438 | diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile |
1439 | index 9ed087853dee..4c5678cfa9c4 100644 |
1440 | --- a/drivers/acpi/Makefile |
1441 | +++ b/drivers/acpi/Makefile |
1442 | @@ -2,7 +2,6 @@ |
1443 | # Makefile for the Linux ACPI interpreter |
1444 | # |
1445 | |
1446 | -ccflags-y := -Os |
1447 | ccflags-$(CONFIG_ACPI_DEBUG) += -DACPI_DEBUG_OUTPUT |
1448 | |
1449 | # |
1450 | diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c |
1451 | index b4c1a6a51da4..03250e1f1103 100644 |
1452 | --- a/drivers/acpi/acpi_platform.c |
1453 | +++ b/drivers/acpi/acpi_platform.c |
1454 | @@ -25,9 +25,11 @@ |
1455 | ACPI_MODULE_NAME("platform"); |
1456 | |
1457 | static const struct acpi_device_id forbidden_id_list[] = { |
1458 | - {"PNP0000", 0}, /* PIC */ |
1459 | - {"PNP0100", 0}, /* Timer */ |
1460 | - {"PNP0200", 0}, /* AT DMA Controller */ |
1461 | + {"PNP0000", 0}, /* PIC */ |
1462 | + {"PNP0100", 0}, /* Timer */ |
1463 | + {"PNP0200", 0}, /* AT DMA Controller */ |
1464 | + {"ACPI0009", 0}, /* IOxAPIC */ |
1465 | + {"ACPI000A", 0}, /* IOAPIC */ |
1466 | {"", 0}, |
1467 | }; |
1468 | |
1469 | diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c |
1470 | index b1254f885fed..b87d27859141 100644 |
1471 | --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c |
1472 | +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c |
1473 | @@ -1299,6 +1299,8 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, |
1474 | goto out_pm_put; |
1475 | } |
1476 | |
1477 | + mutex_lock(&gpu->lock); |
1478 | + |
1479 | fence = etnaviv_gpu_fence_alloc(gpu); |
1480 | if (!fence) { |
1481 | event_free(gpu, event); |
1482 | @@ -1306,8 +1308,6 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, |
1483 | goto out_pm_put; |
1484 | } |
1485 | |
1486 | - mutex_lock(&gpu->lock); |
1487 | - |
1488 | gpu->event[event].fence = fence; |
1489 | submit->fence = fence->seqno; |
1490 | gpu->active_fence = submit->fence; |
1491 | diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c |
1492 | index 3de5e6e21662..4ce04e06d9ac 100644 |
1493 | --- a/drivers/gpu/drm/radeon/radeon_ttm.c |
1494 | +++ b/drivers/gpu/drm/radeon/radeon_ttm.c |
1495 | @@ -213,8 +213,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo, |
1496 | rbo->placement.num_busy_placement = 0; |
1497 | for (i = 0; i < rbo->placement.num_placement; i++) { |
1498 | if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) { |
1499 | - if (rbo->placements[0].fpfn < fpfn) |
1500 | - rbo->placements[0].fpfn = fpfn; |
1501 | + if (rbo->placements[i].fpfn < fpfn) |
1502 | + rbo->placements[i].fpfn = fpfn; |
1503 | } else { |
1504 | rbo->placement.busy_placement = |
1505 | &rbo->placements[i]; |
1506 | diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c |
1507 | index 7aadce1f7e7a..c7e6c9839c9a 100644 |
1508 | --- a/drivers/gpu/drm/vc4/vc4_crtc.c |
1509 | +++ b/drivers/gpu/drm/vc4/vc4_crtc.c |
1510 | @@ -842,6 +842,17 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc, |
1511 | drm_atomic_helper_crtc_destroy_state(crtc, state); |
1512 | } |
1513 | |
1514 | +static void |
1515 | +vc4_crtc_reset(struct drm_crtc *crtc) |
1516 | +{ |
1517 | + if (crtc->state) |
1518 | + __drm_atomic_helper_crtc_destroy_state(crtc->state); |
1519 | + |
1520 | + crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL); |
1521 | + if (crtc->state) |
1522 | + crtc->state->crtc = crtc; |
1523 | +} |
1524 | + |
1525 | static const struct drm_crtc_funcs vc4_crtc_funcs = { |
1526 | .set_config = drm_atomic_helper_set_config, |
1527 | .destroy = vc4_crtc_destroy, |
1528 | @@ -849,7 +860,7 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = { |
1529 | .set_property = NULL, |
1530 | .cursor_set = NULL, /* handled by drm_mode_cursor_universal */ |
1531 | .cursor_move = NULL, /* handled by drm_mode_cursor_universal */ |
1532 | - .reset = drm_atomic_helper_crtc_reset, |
1533 | + .reset = vc4_crtc_reset, |
1534 | .atomic_duplicate_state = vc4_crtc_duplicate_state, |
1535 | .atomic_destroy_state = vc4_crtc_destroy_state, |
1536 | .gamma_set = vc4_crtc_gamma_set, |
1537 | diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c |
1538 | index 5e7a5648e708..0c535d0f3b95 100644 |
1539 | --- a/drivers/hid/wacom_sys.c |
1540 | +++ b/drivers/hid/wacom_sys.c |
1541 | @@ -2017,6 +2017,14 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) |
1542 | |
1543 | wacom_update_name(wacom, wireless ? " (WL)" : ""); |
1544 | |
1545 | + /* pen only Bamboo neither support touch nor pad */ |
1546 | + if ((features->type == BAMBOO_PEN) && |
1547 | + ((features->device_type & WACOM_DEVICETYPE_TOUCH) || |
1548 | + (features->device_type & WACOM_DEVICETYPE_PAD))) { |
1549 | + error = -ENODEV; |
1550 | + goto fail; |
1551 | + } |
1552 | + |
1553 | error = wacom_add_shared_data(hdev); |
1554 | if (error) |
1555 | goto fail; |
1556 | @@ -2064,14 +2072,6 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) |
1557 | goto fail_quirks; |
1558 | } |
1559 | |
1560 | - /* pen only Bamboo neither support touch nor pad */ |
1561 | - if ((features->type == BAMBOO_PEN) && |
1562 | - ((features->device_type & WACOM_DEVICETYPE_TOUCH) || |
1563 | - (features->device_type & WACOM_DEVICETYPE_PAD))) { |
1564 | - error = -ENODEV; |
1565 | - goto fail_quirks; |
1566 | - } |
1567 | - |
1568 | if (features->device_type & WACOM_DEVICETYPE_WL_MONITOR) |
1569 | error = hid_hw_open(hdev); |
1570 | |
1571 | diff --git a/drivers/md/dm.c b/drivers/md/dm.c |
1572 | index 628ba001bb3c..e66f4040d84b 100644 |
1573 | --- a/drivers/md/dm.c |
1574 | +++ b/drivers/md/dm.c |
1575 | @@ -986,26 +986,29 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule) |
1576 | struct dm_offload *o = container_of(cb, struct dm_offload, cb); |
1577 | struct bio_list list; |
1578 | struct bio *bio; |
1579 | + int i; |
1580 | |
1581 | INIT_LIST_HEAD(&o->cb.list); |
1582 | |
1583 | if (unlikely(!current->bio_list)) |
1584 | return; |
1585 | |
1586 | - list = *current->bio_list; |
1587 | - bio_list_init(current->bio_list); |
1588 | - |
1589 | - while ((bio = bio_list_pop(&list))) { |
1590 | - struct bio_set *bs = bio->bi_pool; |
1591 | - if (unlikely(!bs) || bs == fs_bio_set) { |
1592 | - bio_list_add(current->bio_list, bio); |
1593 | - continue; |
1594 | + for (i = 0; i < 2; i++) { |
1595 | + list = current->bio_list[i]; |
1596 | + bio_list_init(¤t->bio_list[i]); |
1597 | + |
1598 | + while ((bio = bio_list_pop(&list))) { |
1599 | + struct bio_set *bs = bio->bi_pool; |
1600 | + if (unlikely(!bs) || bs == fs_bio_set) { |
1601 | + bio_list_add(¤t->bio_list[i], bio); |
1602 | + continue; |
1603 | + } |
1604 | + |
1605 | + spin_lock(&bs->rescue_lock); |
1606 | + bio_list_add(&bs->rescue_list, bio); |
1607 | + queue_work(bs->rescue_workqueue, &bs->rescue_work); |
1608 | + spin_unlock(&bs->rescue_lock); |
1609 | } |
1610 | - |
1611 | - spin_lock(&bs->rescue_lock); |
1612 | - bio_list_add(&bs->rescue_list, bio); |
1613 | - queue_work(bs->rescue_workqueue, &bs->rescue_work); |
1614 | - spin_unlock(&bs->rescue_lock); |
1615 | } |
1616 | } |
1617 | |
1618 | diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c |
1619 | index 55b5e0e77b17..4c4aab02e311 100644 |
1620 | --- a/drivers/md/raid10.c |
1621 | +++ b/drivers/md/raid10.c |
1622 | @@ -941,7 +941,8 @@ static void wait_barrier(struct r10conf *conf) |
1623 | !conf->barrier || |
1624 | (atomic_read(&conf->nr_pending) && |
1625 | current->bio_list && |
1626 | - !bio_list_empty(current->bio_list)), |
1627 | + (!bio_list_empty(¤t->bio_list[0]) || |
1628 | + !bio_list_empty(¤t->bio_list[1]))), |
1629 | conf->resync_lock); |
1630 | conf->nr_waiting--; |
1631 | if (!conf->nr_waiting) |
1632 | diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c |
1633 | index 387ae1cbf698..a8b430ff117b 100644 |
1634 | --- a/drivers/mmc/host/sdhci-of-at91.c |
1635 | +++ b/drivers/mmc/host/sdhci-of-at91.c |
1636 | @@ -29,6 +29,8 @@ |
1637 | |
1638 | #include "sdhci-pltfm.h" |
1639 | |
1640 | +#define SDMMC_MC1R 0x204 |
1641 | +#define SDMMC_MC1R_DDR BIT(3) |
1642 | #define SDMMC_CACR 0x230 |
1643 | #define SDMMC_CACR_CAPWREN BIT(0) |
1644 | #define SDMMC_CACR_KEY (0x46 << 8) |
1645 | @@ -103,11 +105,18 @@ static void sdhci_at91_set_power(struct sdhci_host *host, unsigned char mode, |
1646 | sdhci_set_power_noreg(host, mode, vdd); |
1647 | } |
1648 | |
1649 | +void sdhci_at91_set_uhs_signaling(struct sdhci_host *host, unsigned int timing) |
1650 | +{ |
1651 | + if (timing == MMC_TIMING_MMC_DDR52) |
1652 | + sdhci_writeb(host, SDMMC_MC1R_DDR, SDMMC_MC1R); |
1653 | + sdhci_set_uhs_signaling(host, timing); |
1654 | +} |
1655 | + |
1656 | static const struct sdhci_ops sdhci_at91_sama5d2_ops = { |
1657 | .set_clock = sdhci_at91_set_clock, |
1658 | .set_bus_width = sdhci_set_bus_width, |
1659 | .reset = sdhci_reset, |
1660 | - .set_uhs_signaling = sdhci_set_uhs_signaling, |
1661 | + .set_uhs_signaling = sdhci_at91_set_uhs_signaling, |
1662 | .set_power = sdhci_at91_set_power, |
1663 | }; |
1664 | |
1665 | diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c |
1666 | index a983ba0349fb..7d275e72903a 100644 |
1667 | --- a/drivers/mmc/host/sdhci.c |
1668 | +++ b/drivers/mmc/host/sdhci.c |
1669 | @@ -1823,6 +1823,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable) |
1670 | struct sdhci_host *host = mmc_priv(mmc); |
1671 | unsigned long flags; |
1672 | |
1673 | + if (enable) |
1674 | + pm_runtime_get_noresume(host->mmc->parent); |
1675 | + |
1676 | spin_lock_irqsave(&host->lock, flags); |
1677 | if (enable) |
1678 | host->flags |= SDHCI_SDIO_IRQ_ENABLED; |
1679 | @@ -1831,6 +1834,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable) |
1680 | |
1681 | sdhci_enable_sdio_irq_nolock(host, enable); |
1682 | spin_unlock_irqrestore(&host->lock, flags); |
1683 | + |
1684 | + if (!enable) |
1685 | + pm_runtime_put_noidle(host->mmc->parent); |
1686 | } |
1687 | |
1688 | static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc, |
1689 | diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c |
1690 | index da10b484bd25..bde769b11e3b 100644 |
1691 | --- a/drivers/nvme/host/core.c |
1692 | +++ b/drivers/nvme/host/core.c |
1693 | @@ -2057,9 +2057,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) |
1694 | * Revalidating a dead namespace sets capacity to 0. This will |
1695 | * end buffered writers dirtying pages that can't be synced. |
1696 | */ |
1697 | - if (ns->disk && !test_and_set_bit(NVME_NS_DEAD, &ns->flags)) |
1698 | - revalidate_disk(ns->disk); |
1699 | - |
1700 | + if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags)) |
1701 | + continue; |
1702 | + revalidate_disk(ns->disk); |
1703 | blk_set_queue_dying(ns->queue); |
1704 | blk_mq_abort_requeue_list(ns->queue); |
1705 | blk_mq_start_stopped_hw_queues(ns->queue, true); |
1706 | diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c |
1707 | index 5e52034ab010..8a9c186898c7 100644 |
1708 | --- a/drivers/nvme/host/pci.c |
1709 | +++ b/drivers/nvme/host/pci.c |
1710 | @@ -1983,8 +1983,10 @@ static void nvme_remove(struct pci_dev *pdev) |
1711 | |
1712 | pci_set_drvdata(pdev, NULL); |
1713 | |
1714 | - if (!pci_device_is_present(pdev)) |
1715 | + if (!pci_device_is_present(pdev)) { |
1716 | nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD); |
1717 | + nvme_dev_disable(dev, false); |
1718 | + } |
1719 | |
1720 | flush_work(&dev->reset_work); |
1721 | nvme_uninit_ctrl(&dev->ctrl); |
1722 | diff --git a/drivers/pci/host/pcie-iproc-bcma.c b/drivers/pci/host/pcie-iproc-bcma.c |
1723 | index 8ce089043a27..46ca8ed031fe 100644 |
1724 | --- a/drivers/pci/host/pcie-iproc-bcma.c |
1725 | +++ b/drivers/pci/host/pcie-iproc-bcma.c |
1726 | @@ -44,8 +44,7 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev) |
1727 | { |
1728 | struct device *dev = &bdev->dev; |
1729 | struct iproc_pcie *pcie; |
1730 | - LIST_HEAD(res); |
1731 | - struct resource res_mem; |
1732 | + LIST_HEAD(resources); |
1733 | int ret; |
1734 | |
1735 | pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); |
1736 | @@ -62,22 +61,23 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev) |
1737 | |
1738 | pcie->base_addr = bdev->addr; |
1739 | |
1740 | - res_mem.start = bdev->addr_s[0]; |
1741 | - res_mem.end = bdev->addr_s[0] + SZ_128M - 1; |
1742 | - res_mem.name = "PCIe MEM space"; |
1743 | - res_mem.flags = IORESOURCE_MEM; |
1744 | - pci_add_resource(&res, &res_mem); |
1745 | + pcie->mem.start = bdev->addr_s[0]; |
1746 | + pcie->mem.end = bdev->addr_s[0] + SZ_128M - 1; |
1747 | + pcie->mem.name = "PCIe MEM space"; |
1748 | + pcie->mem.flags = IORESOURCE_MEM; |
1749 | + pci_add_resource(&resources, &pcie->mem); |
1750 | |
1751 | pcie->map_irq = iproc_pcie_bcma_map_irq; |
1752 | |
1753 | - ret = iproc_pcie_setup(pcie, &res); |
1754 | - if (ret) |
1755 | + ret = iproc_pcie_setup(pcie, &resources); |
1756 | + if (ret) { |
1757 | dev_err(dev, "PCIe controller setup failed\n"); |
1758 | - |
1759 | - pci_free_resource_list(&res); |
1760 | + pci_free_resource_list(&resources); |
1761 | + return ret; |
1762 | + } |
1763 | |
1764 | bcma_set_drvdata(bdev, pcie); |
1765 | - return ret; |
1766 | + return 0; |
1767 | } |
1768 | |
1769 | static void iproc_pcie_bcma_remove(struct bcma_device *bdev) |
1770 | diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c |
1771 | index a3de087976b3..7dcaddcd2f16 100644 |
1772 | --- a/drivers/pci/host/pcie-iproc-platform.c |
1773 | +++ b/drivers/pci/host/pcie-iproc-platform.c |
1774 | @@ -46,7 +46,7 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev) |
1775 | struct device_node *np = dev->of_node; |
1776 | struct resource reg; |
1777 | resource_size_t iobase = 0; |
1778 | - LIST_HEAD(res); |
1779 | + LIST_HEAD(resources); |
1780 | int ret; |
1781 | |
1782 | of_id = of_match_device(iproc_pcie_of_match_table, dev); |
1783 | @@ -108,23 +108,24 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev) |
1784 | pcie->phy = NULL; |
1785 | } |
1786 | |
1787 | - ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &res, &iobase); |
1788 | + ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &resources, |
1789 | + &iobase); |
1790 | if (ret) { |
1791 | - dev_err(dev, |
1792 | - "unable to get PCI host bridge resources\n"); |
1793 | + dev_err(dev, "unable to get PCI host bridge resources\n"); |
1794 | return ret; |
1795 | } |
1796 | |
1797 | pcie->map_irq = of_irq_parse_and_map_pci; |
1798 | |
1799 | - ret = iproc_pcie_setup(pcie, &res); |
1800 | - if (ret) |
1801 | + ret = iproc_pcie_setup(pcie, &resources); |
1802 | + if (ret) { |
1803 | dev_err(dev, "PCIe controller setup failed\n"); |
1804 | - |
1805 | - pci_free_resource_list(&res); |
1806 | + pci_free_resource_list(&resources); |
1807 | + return ret; |
1808 | + } |
1809 | |
1810 | platform_set_drvdata(pdev, pcie); |
1811 | - return ret; |
1812 | + return 0; |
1813 | } |
1814 | |
1815 | static int iproc_pcie_pltfm_remove(struct platform_device *pdev) |
1816 | diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h |
1817 | index e84d93c53c7b..fa4226742bcd 100644 |
1818 | --- a/drivers/pci/host/pcie-iproc.h |
1819 | +++ b/drivers/pci/host/pcie-iproc.h |
1820 | @@ -68,6 +68,7 @@ struct iproc_pcie { |
1821 | #ifdef CONFIG_ARM |
1822 | struct pci_sys_data sysdata; |
1823 | #endif |
1824 | + struct resource mem; |
1825 | struct pci_bus *root_bus; |
1826 | struct phy *phy; |
1827 | int (*map_irq)(const struct pci_dev *, u8, u8); |
1828 | diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c |
1829 | index 7bb20684e9fa..d3145799b92f 100644 |
1830 | --- a/drivers/scsi/device_handler/scsi_dh_alua.c |
1831 | +++ b/drivers/scsi/device_handler/scsi_dh_alua.c |
1832 | @@ -113,7 +113,7 @@ struct alua_queue_data { |
1833 | #define ALUA_POLICY_SWITCH_ALL 1 |
1834 | |
1835 | static void alua_rtpg_work(struct work_struct *work); |
1836 | -static void alua_rtpg_queue(struct alua_port_group *pg, |
1837 | +static bool alua_rtpg_queue(struct alua_port_group *pg, |
1838 | struct scsi_device *sdev, |
1839 | struct alua_queue_data *qdata, bool force); |
1840 | static void alua_check(struct scsi_device *sdev, bool force); |
1841 | @@ -862,7 +862,13 @@ static void alua_rtpg_work(struct work_struct *work) |
1842 | kref_put(&pg->kref, release_port_group); |
1843 | } |
1844 | |
1845 | -static void alua_rtpg_queue(struct alua_port_group *pg, |
1846 | +/** |
1847 | + * alua_rtpg_queue() - cause RTPG to be submitted asynchronously |
1848 | + * |
1849 | + * Returns true if and only if alua_rtpg_work() will be called asynchronously. |
1850 | + * That function is responsible for calling @qdata->fn(). |
1851 | + */ |
1852 | +static bool alua_rtpg_queue(struct alua_port_group *pg, |
1853 | struct scsi_device *sdev, |
1854 | struct alua_queue_data *qdata, bool force) |
1855 | { |
1856 | @@ -870,8 +876,8 @@ static void alua_rtpg_queue(struct alua_port_group *pg, |
1857 | unsigned long flags; |
1858 | struct workqueue_struct *alua_wq = kaluad_wq; |
1859 | |
1860 | - if (!pg) |
1861 | - return; |
1862 | + if (!pg || scsi_device_get(sdev)) |
1863 | + return false; |
1864 | |
1865 | spin_lock_irqsave(&pg->lock, flags); |
1866 | if (qdata) { |
1867 | @@ -884,14 +890,12 @@ static void alua_rtpg_queue(struct alua_port_group *pg, |
1868 | pg->flags |= ALUA_PG_RUN_RTPG; |
1869 | kref_get(&pg->kref); |
1870 | pg->rtpg_sdev = sdev; |
1871 | - scsi_device_get(sdev); |
1872 | start_queue = 1; |
1873 | } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) { |
1874 | pg->flags |= ALUA_PG_RUN_RTPG; |
1875 | /* Do not queue if the worker is already running */ |
1876 | if (!(pg->flags & ALUA_PG_RUNNING)) { |
1877 | kref_get(&pg->kref); |
1878 | - sdev = NULL; |
1879 | start_queue = 1; |
1880 | } |
1881 | } |
1882 | @@ -900,13 +904,17 @@ static void alua_rtpg_queue(struct alua_port_group *pg, |
1883 | alua_wq = kaluad_sync_wq; |
1884 | spin_unlock_irqrestore(&pg->lock, flags); |
1885 | |
1886 | - if (start_queue && |
1887 | - !queue_delayed_work(alua_wq, &pg->rtpg_work, |
1888 | - msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) { |
1889 | - if (sdev) |
1890 | - scsi_device_put(sdev); |
1891 | - kref_put(&pg->kref, release_port_group); |
1892 | + if (start_queue) { |
1893 | + if (queue_delayed_work(alua_wq, &pg->rtpg_work, |
1894 | + msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) |
1895 | + sdev = NULL; |
1896 | + else |
1897 | + kref_put(&pg->kref, release_port_group); |
1898 | } |
1899 | + if (sdev) |
1900 | + scsi_device_put(sdev); |
1901 | + |
1902 | + return true; |
1903 | } |
1904 | |
1905 | /* |
1906 | @@ -1007,11 +1015,13 @@ static int alua_activate(struct scsi_device *sdev, |
1907 | mutex_unlock(&h->init_mutex); |
1908 | goto out; |
1909 | } |
1910 | - fn = NULL; |
1911 | rcu_read_unlock(); |
1912 | mutex_unlock(&h->init_mutex); |
1913 | |
1914 | - alua_rtpg_queue(pg, sdev, qdata, true); |
1915 | + if (alua_rtpg_queue(pg, sdev, qdata, true)) |
1916 | + fn = NULL; |
1917 | + else |
1918 | + err = SCSI_DH_DEV_OFFLINED; |
1919 | kref_put(&pg->kref, release_port_group); |
1920 | out: |
1921 | if (fn) |
1922 | diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c |
1923 | index 763f012fdeca..87f5e694dbed 100644 |
1924 | --- a/drivers/scsi/libsas/sas_ata.c |
1925 | +++ b/drivers/scsi/libsas/sas_ata.c |
1926 | @@ -221,7 +221,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) |
1927 | task->num_scatter = qc->n_elem; |
1928 | } else { |
1929 | for_each_sg(qc->sg, sg, qc->n_elem, si) |
1930 | - xfer += sg->length; |
1931 | + xfer += sg_dma_len(sg); |
1932 | |
1933 | task->total_xfer_len = xfer; |
1934 | task->num_scatter = si; |
1935 | diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c |
1936 | index fe7469c901f7..ad33238cef17 100644 |
1937 | --- a/drivers/scsi/qla2xxx/qla_attr.c |
1938 | +++ b/drivers/scsi/qla2xxx/qla_attr.c |
1939 | @@ -2153,8 +2153,6 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) |
1940 | "Timer for the VP[%d] has stopped\n", vha->vp_idx); |
1941 | } |
1942 | |
1943 | - BUG_ON(atomic_read(&vha->vref_count)); |
1944 | - |
1945 | qla2x00_free_fcports(vha); |
1946 | |
1947 | mutex_lock(&ha->vport_lock); |
1948 | diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h |
1949 | index 73b12e41d992..8e63a7b90277 100644 |
1950 | --- a/drivers/scsi/qla2xxx/qla_def.h |
1951 | +++ b/drivers/scsi/qla2xxx/qla_def.h |
1952 | @@ -3742,6 +3742,7 @@ typedef struct scsi_qla_host { |
1953 | struct qla8044_reset_template reset_tmplt; |
1954 | struct qla_tgt_counters tgt_counters; |
1955 | uint16_t bbcr; |
1956 | + wait_queue_head_t vref_waitq; |
1957 | } scsi_qla_host_t; |
1958 | |
1959 | struct qla27xx_image_status { |
1960 | @@ -3780,6 +3781,7 @@ struct qla_tgt_vp_map { |
1961 | mb(); \ |
1962 | if (__vha->flags.delete_progress) { \ |
1963 | atomic_dec(&__vha->vref_count); \ |
1964 | + wake_up(&__vha->vref_waitq); \ |
1965 | __bail = 1; \ |
1966 | } else { \ |
1967 | __bail = 0; \ |
1968 | @@ -3788,6 +3790,7 @@ struct qla_tgt_vp_map { |
1969 | |
1970 | #define QLA_VHA_MARK_NOT_BUSY(__vha) do { \ |
1971 | atomic_dec(&__vha->vref_count); \ |
1972 | + wake_up(&__vha->vref_waitq); \ |
1973 | } while (0) |
1974 | |
1975 | /* |
1976 | diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c |
1977 | index 5b09296b46a3..8f12f6baa6b8 100644 |
1978 | --- a/drivers/scsi/qla2xxx/qla_init.c |
1979 | +++ b/drivers/scsi/qla2xxx/qla_init.c |
1980 | @@ -4356,6 +4356,7 @@ qla2x00_update_fcports(scsi_qla_host_t *base_vha) |
1981 | } |
1982 | } |
1983 | atomic_dec(&vha->vref_count); |
1984 | + wake_up(&vha->vref_waitq); |
1985 | } |
1986 | spin_unlock_irqrestore(&ha->vport_slock, flags); |
1987 | } |
1988 | diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c |
1989 | index cf7ba52bae66..3dfb54abc874 100644 |
1990 | --- a/drivers/scsi/qla2xxx/qla_mid.c |
1991 | +++ b/drivers/scsi/qla2xxx/qla_mid.c |
1992 | @@ -74,13 +74,14 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha) |
1993 | * ensures no active vp_list traversal while the vport is removed |
1994 | * from the queue) |
1995 | */ |
1996 | - spin_lock_irqsave(&ha->vport_slock, flags); |
1997 | - while (atomic_read(&vha->vref_count)) { |
1998 | - spin_unlock_irqrestore(&ha->vport_slock, flags); |
1999 | - |
2000 | - msleep(500); |
2001 | + wait_event_timeout(vha->vref_waitq, atomic_read(&vha->vref_count), |
2002 | + 10*HZ); |
2003 | |
2004 | - spin_lock_irqsave(&ha->vport_slock, flags); |
2005 | + spin_lock_irqsave(&ha->vport_slock, flags); |
2006 | + if (atomic_read(&vha->vref_count)) { |
2007 | + ql_dbg(ql_dbg_vport, vha, 0xfffa, |
2008 | + "vha->vref_count=%u timeout\n", vha->vref_count.counter); |
2009 | + vha->vref_count = (atomic_t)ATOMIC_INIT(0); |
2010 | } |
2011 | list_del(&vha->list); |
2012 | qlt_update_vp_map(vha, RESET_VP_IDX); |
2013 | @@ -269,6 +270,7 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb) |
2014 | |
2015 | spin_lock_irqsave(&ha->vport_slock, flags); |
2016 | atomic_dec(&vha->vref_count); |
2017 | + wake_up(&vha->vref_waitq); |
2018 | } |
2019 | i++; |
2020 | } |
2021 | diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c |
2022 | index bea819e5336d..4f361d8d84be 100644 |
2023 | --- a/drivers/scsi/qla2xxx/qla_os.c |
2024 | +++ b/drivers/scsi/qla2xxx/qla_os.c |
2025 | @@ -4045,6 +4045,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht, |
2026 | |
2027 | spin_lock_init(&vha->work_lock); |
2028 | spin_lock_init(&vha->cmd_list_lock); |
2029 | + init_waitqueue_head(&vha->vref_waitq); |
2030 | |
2031 | sprintf(vha->host_str, "%s_%ld", QLA2XXX_DRIVER_NAME, vha->host_no); |
2032 | ql_dbg(ql_dbg_init, vha, 0x0041, |
2033 | diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c |
2034 | index 121de0aaa6ad..f753df25ba34 100644 |
2035 | --- a/drivers/scsi/sg.c |
2036 | +++ b/drivers/scsi/sg.c |
2037 | @@ -998,6 +998,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) |
2038 | result = get_user(val, ip); |
2039 | if (result) |
2040 | return result; |
2041 | + if (val > SG_MAX_CDB_SIZE) |
2042 | + return -ENOMEM; |
2043 | sfp->next_cmd_len = (val > 0) ? val : 0; |
2044 | return 0; |
2045 | case SG_GET_VERSION_NUM: |
2046 | diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c |
2047 | index fabbe76203bb..4d079cdaa7a3 100644 |
2048 | --- a/drivers/tty/serial/atmel_serial.c |
2049 | +++ b/drivers/tty/serial/atmel_serial.c |
2050 | @@ -1938,6 +1938,11 @@ static void atmel_flush_buffer(struct uart_port *port) |
2051 | atmel_uart_writel(port, ATMEL_PDC_TCR, 0); |
2052 | atmel_port->pdc_tx.ofs = 0; |
2053 | } |
2054 | + /* |
2055 | + * in uart_flush_buffer(), the xmit circular buffer has just |
2056 | + * been cleared, so we have to reset tx_len accordingly. |
2057 | + */ |
2058 | + atmel_port->tx_len = 0; |
2059 | } |
2060 | |
2061 | /* |
2062 | @@ -2471,6 +2476,9 @@ static void atmel_console_write(struct console *co, const char *s, u_int count) |
2063 | pdc_tx = atmel_uart_readl(port, ATMEL_PDC_PTSR) & ATMEL_PDC_TXTEN; |
2064 | atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS); |
2065 | |
2066 | + /* Make sure that tx path is actually able to send characters */ |
2067 | + atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN); |
2068 | + |
2069 | uart_console_write(port, s, count, atmel_console_putchar); |
2070 | |
2071 | /* |
2072 | diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c |
2073 | index 770454e0dfa3..07390f8c3681 100644 |
2074 | --- a/drivers/tty/serial/mxs-auart.c |
2075 | +++ b/drivers/tty/serial/mxs-auart.c |
2076 | @@ -1085,7 +1085,7 @@ static void mxs_auart_settermios(struct uart_port *u, |
2077 | AUART_LINECTRL_BAUD_DIV_MAX); |
2078 | baud_max = u->uartclk * 32 / AUART_LINECTRL_BAUD_DIV_MIN; |
2079 | baud = uart_get_baud_rate(u, termios, old, baud_min, baud_max); |
2080 | - div = u->uartclk * 32 / baud; |
2081 | + div = DIV_ROUND_CLOSEST(u->uartclk * 32, baud); |
2082 | } |
2083 | |
2084 | ctrl |= AUART_LINECTRL_BAUD_DIVFRAC(div & 0x3F); |
2085 | diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c |
2086 | index 479e223f9cff..f029aad67183 100644 |
2087 | --- a/drivers/usb/core/hcd.c |
2088 | +++ b/drivers/usb/core/hcd.c |
2089 | @@ -520,8 +520,10 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb) |
2090 | */ |
2091 | tbuf_size = max_t(u16, sizeof(struct usb_hub_descriptor), wLength); |
2092 | tbuf = kzalloc(tbuf_size, GFP_KERNEL); |
2093 | - if (!tbuf) |
2094 | - return -ENOMEM; |
2095 | + if (!tbuf) { |
2096 | + status = -ENOMEM; |
2097 | + goto err_alloc; |
2098 | + } |
2099 | |
2100 | bufp = tbuf; |
2101 | |
2102 | @@ -734,6 +736,7 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb) |
2103 | } |
2104 | |
2105 | kfree(tbuf); |
2106 | + err_alloc: |
2107 | |
2108 | /* any errors get returned through the urb completion */ |
2109 | spin_lock_irq(&hcd_root_hub_lock); |
2110 | diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c |
2111 | index 1536aeb0abab..4e894d301c88 100644 |
2112 | --- a/fs/nfs/nfs4proc.c |
2113 | +++ b/fs/nfs/nfs4proc.c |
2114 | @@ -2532,17 +2532,14 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) |
2115 | } |
2116 | |
2117 | nfs4_stateid_copy(&stateid, &delegation->stateid); |
2118 | - if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { |
2119 | + if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) || |
2120 | + !test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, |
2121 | + &delegation->flags)) { |
2122 | rcu_read_unlock(); |
2123 | nfs_finish_clear_delegation_stateid(state, &stateid); |
2124 | return; |
2125 | } |
2126 | |
2127 | - if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) { |
2128 | - rcu_read_unlock(); |
2129 | - return; |
2130 | - } |
2131 | - |
2132 | cred = get_rpccred(delegation->cred); |
2133 | rcu_read_unlock(); |
2134 | status = nfs41_test_and_free_expired_stateid(server, &stateid, cred); |
2135 | diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c |
2136 | index 010aff5c5a79..536009e50387 100644 |
2137 | --- a/fs/nfsd/nfsproc.c |
2138 | +++ b/fs/nfsd/nfsproc.c |
2139 | @@ -790,6 +790,7 @@ nfserrno (int errno) |
2140 | { nfserr_serverfault, -ESERVERFAULT }, |
2141 | { nfserr_serverfault, -ENFILE }, |
2142 | { nfserr_io, -EUCLEAN }, |
2143 | + { nfserr_perm, -ENOKEY }, |
2144 | }; |
2145 | int i; |
2146 | |
2147 | diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c |
2148 | index d346d42c54d1..33db69be4832 100644 |
2149 | --- a/fs/xfs/libxfs/xfs_ag_resv.c |
2150 | +++ b/fs/xfs/libxfs/xfs_ag_resv.c |
2151 | @@ -39,6 +39,7 @@ |
2152 | #include "xfs_rmap_btree.h" |
2153 | #include "xfs_btree.h" |
2154 | #include "xfs_refcount_btree.h" |
2155 | +#include "xfs_ialloc_btree.h" |
2156 | |
2157 | /* |
2158 | * Per-AG Block Reservations |
2159 | @@ -200,22 +201,30 @@ __xfs_ag_resv_init( |
2160 | struct xfs_mount *mp = pag->pag_mount; |
2161 | struct xfs_ag_resv *resv; |
2162 | int error; |
2163 | + xfs_extlen_t reserved; |
2164 | |
2165 | - resv = xfs_perag_resv(pag, type); |
2166 | if (used > ask) |
2167 | ask = used; |
2168 | - resv->ar_asked = ask; |
2169 | - resv->ar_reserved = resv->ar_orig_reserved = ask - used; |
2170 | - mp->m_ag_max_usable -= ask; |
2171 | + reserved = ask - used; |
2172 | |
2173 | - trace_xfs_ag_resv_init(pag, type, ask); |
2174 | - |
2175 | - error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true); |
2176 | - if (error) |
2177 | + error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true); |
2178 | + if (error) { |
2179 | trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, |
2180 | error, _RET_IP_); |
2181 | + xfs_warn(mp, |
2182 | +"Per-AG reservation for AG %u failed. Filesystem may run out of space.", |
2183 | + pag->pag_agno); |
2184 | + return error; |
2185 | + } |
2186 | |
2187 | - return error; |
2188 | + mp->m_ag_max_usable -= ask; |
2189 | + |
2190 | + resv = xfs_perag_resv(pag, type); |
2191 | + resv->ar_asked = ask; |
2192 | + resv->ar_reserved = resv->ar_orig_reserved = reserved; |
2193 | + |
2194 | + trace_xfs_ag_resv_init(pag, type, ask); |
2195 | + return 0; |
2196 | } |
2197 | |
2198 | /* Create a per-AG block reservation. */ |
2199 | @@ -223,6 +232,8 @@ int |
2200 | xfs_ag_resv_init( |
2201 | struct xfs_perag *pag) |
2202 | { |
2203 | + struct xfs_mount *mp = pag->pag_mount; |
2204 | + xfs_agnumber_t agno = pag->pag_agno; |
2205 | xfs_extlen_t ask; |
2206 | xfs_extlen_t used; |
2207 | int error = 0; |
2208 | @@ -231,23 +242,45 @@ xfs_ag_resv_init( |
2209 | if (pag->pag_meta_resv.ar_asked == 0) { |
2210 | ask = used = 0; |
2211 | |
2212 | - error = xfs_refcountbt_calc_reserves(pag->pag_mount, |
2213 | - pag->pag_agno, &ask, &used); |
2214 | + error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used); |
2215 | if (error) |
2216 | goto out; |
2217 | |
2218 | - error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, |
2219 | - ask, used); |
2220 | + error = xfs_finobt_calc_reserves(mp, agno, &ask, &used); |
2221 | if (error) |
2222 | goto out; |
2223 | + |
2224 | + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, |
2225 | + ask, used); |
2226 | + if (error) { |
2227 | + /* |
2228 | + * Because we didn't have per-AG reservations when the |
2229 | + * finobt feature was added we might not be able to |
2230 | + * reserve all needed blocks. Warn and fall back to the |
2231 | + * old and potentially buggy code in that case, but |
2232 | + * ensure we do have the reservation for the refcountbt. |
2233 | + */ |
2234 | + ask = used = 0; |
2235 | + |
2236 | + mp->m_inotbt_nores = true; |
2237 | + |
2238 | + error = xfs_refcountbt_calc_reserves(mp, agno, &ask, |
2239 | + &used); |
2240 | + if (error) |
2241 | + goto out; |
2242 | + |
2243 | + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, |
2244 | + ask, used); |
2245 | + if (error) |
2246 | + goto out; |
2247 | + } |
2248 | } |
2249 | |
2250 | /* Create the AGFL metadata reservation */ |
2251 | if (pag->pag_agfl_resv.ar_asked == 0) { |
2252 | ask = used = 0; |
2253 | |
2254 | - error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno, |
2255 | - &ask, &used); |
2256 | + error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used); |
2257 | if (error) |
2258 | goto out; |
2259 | |
2260 | @@ -256,9 +289,16 @@ xfs_ag_resv_init( |
2261 | goto out; |
2262 | } |
2263 | |
2264 | +#ifdef DEBUG |
2265 | + /* need to read in the AGF for the ASSERT below to work */ |
2266 | + error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0); |
2267 | + if (error) |
2268 | + return error; |
2269 | + |
2270 | ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + |
2271 | xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= |
2272 | pag->pagf_freeblks + pag->pagf_flcount); |
2273 | +#endif |
2274 | out: |
2275 | return error; |
2276 | } |
2277 | diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c |
2278 | index f52fd63fce19..5a508b011e27 100644 |
2279 | --- a/fs/xfs/libxfs/xfs_bmap.c |
2280 | +++ b/fs/xfs/libxfs/xfs_bmap.c |
2281 | @@ -769,8 +769,8 @@ xfs_bmap_extents_to_btree( |
2282 | args.type = XFS_ALLOCTYPE_START_BNO; |
2283 | args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); |
2284 | } else if (dfops->dop_low) { |
2285 | -try_another_ag: |
2286 | args.type = XFS_ALLOCTYPE_START_BNO; |
2287 | +try_another_ag: |
2288 | args.fsbno = *firstblock; |
2289 | } else { |
2290 | args.type = XFS_ALLOCTYPE_NEAR_BNO; |
2291 | @@ -796,17 +796,19 @@ xfs_bmap_extents_to_btree( |
2292 | if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && |
2293 | args.fsbno == NULLFSBLOCK && |
2294 | args.type == XFS_ALLOCTYPE_NEAR_BNO) { |
2295 | - dfops->dop_low = true; |
2296 | + args.type = XFS_ALLOCTYPE_FIRST_AG; |
2297 | goto try_another_ag; |
2298 | } |
2299 | + if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { |
2300 | + xfs_iroot_realloc(ip, -1, whichfork); |
2301 | + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); |
2302 | + return -ENOSPC; |
2303 | + } |
2304 | /* |
2305 | * Allocation can't fail, the space was reserved. |
2306 | */ |
2307 | - ASSERT(args.fsbno != NULLFSBLOCK); |
2308 | ASSERT(*firstblock == NULLFSBLOCK || |
2309 | - args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) || |
2310 | - (dfops->dop_low && |
2311 | - args.agno > XFS_FSB_TO_AGNO(mp, *firstblock))); |
2312 | + args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock)); |
2313 | *firstblock = cur->bc_private.b.firstblock = args.fsbno; |
2314 | cur->bc_private.b.allocated++; |
2315 | ip->i_d.di_nblocks++; |
2316 | @@ -1278,7 +1280,6 @@ xfs_bmap_read_extents( |
2317 | /* REFERENCED */ |
2318 | xfs_extnum_t room; /* number of entries there's room for */ |
2319 | |
2320 | - bno = NULLFSBLOCK; |
2321 | mp = ip->i_mount; |
2322 | ifp = XFS_IFORK_PTR(ip, whichfork); |
2323 | exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE : |
2324 | @@ -1291,9 +1292,7 @@ xfs_bmap_read_extents( |
2325 | ASSERT(level > 0); |
2326 | pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); |
2327 | bno = be64_to_cpu(*pp); |
2328 | - ASSERT(bno != NULLFSBLOCK); |
2329 | - ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); |
2330 | - ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); |
2331 | + |
2332 | /* |
2333 | * Go down the tree until leaf level is reached, following the first |
2334 | * pointer (leftmost) at each level. |
2335 | @@ -1955,6 +1954,7 @@ xfs_bmap_add_extent_delay_real( |
2336 | */ |
2337 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); |
2338 | xfs_bmbt_set_startblock(ep, new->br_startblock); |
2339 | + xfs_bmbt_set_state(ep, new->br_state); |
2340 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); |
2341 | |
2342 | (*nextents)++; |
2343 | @@ -2293,6 +2293,7 @@ STATIC int /* error */ |
2344 | xfs_bmap_add_extent_unwritten_real( |
2345 | struct xfs_trans *tp, |
2346 | xfs_inode_t *ip, /* incore inode pointer */ |
2347 | + int whichfork, |
2348 | xfs_extnum_t *idx, /* extent number to update/insert */ |
2349 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ |
2350 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
2351 | @@ -2312,12 +2313,14 @@ xfs_bmap_add_extent_unwritten_real( |
2352 | /* left is 0, right is 1, prev is 2 */ |
2353 | int rval=0; /* return value (logging flags) */ |
2354 | int state = 0;/* state bits, accessed thru macros */ |
2355 | - struct xfs_mount *mp = tp->t_mountp; |
2356 | + struct xfs_mount *mp = ip->i_mount; |
2357 | |
2358 | *logflagsp = 0; |
2359 | |
2360 | cur = *curp; |
2361 | - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); |
2362 | + ifp = XFS_IFORK_PTR(ip, whichfork); |
2363 | + if (whichfork == XFS_COW_FORK) |
2364 | + state |= BMAP_COWFORK; |
2365 | |
2366 | ASSERT(*idx >= 0); |
2367 | ASSERT(*idx <= xfs_iext_count(ifp)); |
2368 | @@ -2376,7 +2379,7 @@ xfs_bmap_add_extent_unwritten_real( |
2369 | * Don't set contiguous if the combined extent would be too large. |
2370 | * Also check for all-three-contiguous being too large. |
2371 | */ |
2372 | - if (*idx < xfs_iext_count(&ip->i_df) - 1) { |
2373 | + if (*idx < xfs_iext_count(ifp) - 1) { |
2374 | state |= BMAP_RIGHT_VALID; |
2375 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); |
2376 | if (isnullstartblock(RIGHT.br_startblock)) |
2377 | @@ -2416,7 +2419,8 @@ xfs_bmap_add_extent_unwritten_real( |
2378 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
2379 | |
2380 | xfs_iext_remove(ip, *idx + 1, 2, state); |
2381 | - ip->i_d.di_nextents -= 2; |
2382 | + XFS_IFORK_NEXT_SET(ip, whichfork, |
2383 | + XFS_IFORK_NEXTENTS(ip, whichfork) - 2); |
2384 | if (cur == NULL) |
2385 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
2386 | else { |
2387 | @@ -2459,7 +2463,8 @@ xfs_bmap_add_extent_unwritten_real( |
2388 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
2389 | |
2390 | xfs_iext_remove(ip, *idx + 1, 1, state); |
2391 | - ip->i_d.di_nextents--; |
2392 | + XFS_IFORK_NEXT_SET(ip, whichfork, |
2393 | + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); |
2394 | if (cur == NULL) |
2395 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
2396 | else { |
2397 | @@ -2494,7 +2499,8 @@ xfs_bmap_add_extent_unwritten_real( |
2398 | xfs_bmbt_set_state(ep, newext); |
2399 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
2400 | xfs_iext_remove(ip, *idx + 1, 1, state); |
2401 | - ip->i_d.di_nextents--; |
2402 | + XFS_IFORK_NEXT_SET(ip, whichfork, |
2403 | + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); |
2404 | if (cur == NULL) |
2405 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
2406 | else { |
2407 | @@ -2606,7 +2612,8 @@ xfs_bmap_add_extent_unwritten_real( |
2408 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
2409 | |
2410 | xfs_iext_insert(ip, *idx, 1, new, state); |
2411 | - ip->i_d.di_nextents++; |
2412 | + XFS_IFORK_NEXT_SET(ip, whichfork, |
2413 | + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); |
2414 | if (cur == NULL) |
2415 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
2416 | else { |
2417 | @@ -2684,7 +2691,8 @@ xfs_bmap_add_extent_unwritten_real( |
2418 | ++*idx; |
2419 | xfs_iext_insert(ip, *idx, 1, new, state); |
2420 | |
2421 | - ip->i_d.di_nextents++; |
2422 | + XFS_IFORK_NEXT_SET(ip, whichfork, |
2423 | + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); |
2424 | if (cur == NULL) |
2425 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
2426 | else { |
2427 | @@ -2732,7 +2740,8 @@ xfs_bmap_add_extent_unwritten_real( |
2428 | ++*idx; |
2429 | xfs_iext_insert(ip, *idx, 2, &r[0], state); |
2430 | |
2431 | - ip->i_d.di_nextents += 2; |
2432 | + XFS_IFORK_NEXT_SET(ip, whichfork, |
2433 | + XFS_IFORK_NEXTENTS(ip, whichfork) + 2); |
2434 | if (cur == NULL) |
2435 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
2436 | else { |
2437 | @@ -2786,17 +2795,17 @@ xfs_bmap_add_extent_unwritten_real( |
2438 | } |
2439 | |
2440 | /* update reverse mappings */ |
2441 | - error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new); |
2442 | + error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new); |
2443 | if (error) |
2444 | goto done; |
2445 | |
2446 | /* convert to a btree if necessary */ |
2447 | - if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { |
2448 | + if (xfs_bmap_needs_btree(ip, whichfork)) { |
2449 | int tmp_logflags; /* partial log flag return val */ |
2450 | |
2451 | ASSERT(cur == NULL); |
2452 | error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur, |
2453 | - 0, &tmp_logflags, XFS_DATA_FORK); |
2454 | + 0, &tmp_logflags, whichfork); |
2455 | *logflagsp |= tmp_logflags; |
2456 | if (error) |
2457 | goto done; |
2458 | @@ -2808,7 +2817,7 @@ xfs_bmap_add_extent_unwritten_real( |
2459 | *curp = cur; |
2460 | } |
2461 | |
2462 | - xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK); |
2463 | + xfs_bmap_check_leaf_extents(*curp, ip, whichfork); |
2464 | done: |
2465 | *logflagsp |= rval; |
2466 | return error; |
2467 | @@ -2900,7 +2909,8 @@ xfs_bmap_add_extent_hole_delay( |
2468 | oldlen = startblockval(left.br_startblock) + |
2469 | startblockval(new->br_startblock) + |
2470 | startblockval(right.br_startblock); |
2471 | - newlen = xfs_bmap_worst_indlen(ip, temp); |
2472 | + newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
2473 | + oldlen); |
2474 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), |
2475 | nullstartblock((int)newlen)); |
2476 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
2477 | @@ -2921,7 +2931,8 @@ xfs_bmap_add_extent_hole_delay( |
2478 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); |
2479 | oldlen = startblockval(left.br_startblock) + |
2480 | startblockval(new->br_startblock); |
2481 | - newlen = xfs_bmap_worst_indlen(ip, temp); |
2482 | + newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
2483 | + oldlen); |
2484 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), |
2485 | nullstartblock((int)newlen)); |
2486 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
2487 | @@ -2937,7 +2948,8 @@ xfs_bmap_add_extent_hole_delay( |
2488 | temp = new->br_blockcount + right.br_blockcount; |
2489 | oldlen = startblockval(new->br_startblock) + |
2490 | startblockval(right.br_startblock); |
2491 | - newlen = xfs_bmap_worst_indlen(ip, temp); |
2492 | + newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
2493 | + oldlen); |
2494 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), |
2495 | new->br_startoff, |
2496 | nullstartblock((int)newlen), temp, right.br_state); |
2497 | @@ -3913,17 +3925,13 @@ xfs_bmap_btalloc( |
2498 | * the first block that was allocated. |
2499 | */ |
2500 | ASSERT(*ap->firstblock == NULLFSBLOCK || |
2501 | - XFS_FSB_TO_AGNO(mp, *ap->firstblock) == |
2502 | - XFS_FSB_TO_AGNO(mp, args.fsbno) || |
2503 | - (ap->dfops->dop_low && |
2504 | - XFS_FSB_TO_AGNO(mp, *ap->firstblock) < |
2505 | - XFS_FSB_TO_AGNO(mp, args.fsbno))); |
2506 | + XFS_FSB_TO_AGNO(mp, *ap->firstblock) <= |
2507 | + XFS_FSB_TO_AGNO(mp, args.fsbno)); |
2508 | |
2509 | ap->blkno = args.fsbno; |
2510 | if (*ap->firstblock == NULLFSBLOCK) |
2511 | *ap->firstblock = args.fsbno; |
2512 | - ASSERT(nullfb || fb_agno == args.agno || |
2513 | - (ap->dfops->dop_low && fb_agno < args.agno)); |
2514 | + ASSERT(nullfb || fb_agno <= args.agno); |
2515 | ap->length = args.len; |
2516 | if (!(ap->flags & XFS_BMAPI_COWFORK)) |
2517 | ap->ip->i_d.di_nblocks += args.len; |
2518 | @@ -4249,6 +4257,19 @@ xfs_bmapi_read( |
2519 | return 0; |
2520 | } |
2521 | |
2522 | +/* |
2523 | + * Add a delayed allocation extent to an inode. Blocks are reserved from the |
2524 | + * global pool and the extent inserted into the inode in-core extent tree. |
2525 | + * |
2526 | + * On entry, got refers to the first extent beyond the offset of the extent to |
2527 | + * allocate or eof is specified if no such extent exists. On return, got refers |
2528 | + * to the extent record that was inserted to the inode fork. |
2529 | + * |
2530 | + * Note that the allocated extent may have been merged with contiguous extents |
2531 | + * during insertion into the inode fork. Thus, got does not reflect the current |
2532 | + * state of the inode fork on return. If necessary, the caller can use lastx to |
2533 | + * look up the updated record in the inode fork. |
2534 | + */ |
2535 | int |
2536 | xfs_bmapi_reserve_delalloc( |
2537 | struct xfs_inode *ip, |
2538 | @@ -4335,13 +4356,8 @@ xfs_bmapi_reserve_delalloc( |
2539 | got->br_startblock = nullstartblock(indlen); |
2540 | got->br_blockcount = alen; |
2541 | got->br_state = XFS_EXT_NORM; |
2542 | - xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); |
2543 | |
2544 | - /* |
2545 | - * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay |
2546 | - * might have merged it into one of the neighbouring ones. |
2547 | - */ |
2548 | - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); |
2549 | + xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); |
2550 | |
2551 | /* |
2552 | * Tag the inode if blocks were preallocated. Note that COW fork |
2553 | @@ -4353,10 +4369,6 @@ xfs_bmapi_reserve_delalloc( |
2554 | if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) |
2555 | xfs_inode_set_cowblocks_tag(ip); |
2556 | |
2557 | - ASSERT(got->br_startoff <= aoff); |
2558 | - ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); |
2559 | - ASSERT(isnullstartblock(got->br_startblock)); |
2560 | - ASSERT(got->br_state == XFS_EXT_NORM); |
2561 | return 0; |
2562 | |
2563 | out_unreserve_blocks: |
2564 | @@ -4461,10 +4473,16 @@ xfs_bmapi_allocate( |
2565 | bma->got.br_state = XFS_EXT_NORM; |
2566 | |
2567 | /* |
2568 | - * A wasdelay extent has been initialized, so shouldn't be flagged |
2569 | - * as unwritten. |
2570 | + * In the data fork, a wasdelay extent has been initialized, so |
2571 | + * shouldn't be flagged as unwritten. |
2572 | + * |
2573 | + * For the cow fork, however, we convert delalloc reservations |
2574 | + * (extents allocated for speculative preallocation) to |
2575 | + * allocated unwritten extents, and only convert the unwritten |
2576 | + * extents to real extents when we're about to write the data. |
2577 | */ |
2578 | - if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) && |
2579 | + if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) && |
2580 | + (bma->flags & XFS_BMAPI_PREALLOC) && |
2581 | xfs_sb_version_hasextflgbit(&mp->m_sb)) |
2582 | bma->got.br_state = XFS_EXT_UNWRITTEN; |
2583 | |
2584 | @@ -4515,8 +4533,6 @@ xfs_bmapi_convert_unwritten( |
2585 | (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) |
2586 | return 0; |
2587 | |
2588 | - ASSERT(whichfork != XFS_COW_FORK); |
2589 | - |
2590 | /* |
2591 | * Modify (by adding) the state flag, if writing. |
2592 | */ |
2593 | @@ -4541,8 +4557,8 @@ xfs_bmapi_convert_unwritten( |
2594 | return error; |
2595 | } |
2596 | |
2597 | - error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, |
2598 | - &bma->cur, mval, bma->firstblock, bma->dfops, |
2599 | + error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork, |
2600 | + &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops, |
2601 | &tmp_logflags); |
2602 | /* |
2603 | * Log the inode core unconditionally in the unwritten extent conversion |
2604 | @@ -4551,8 +4567,12 @@ xfs_bmapi_convert_unwritten( |
2605 | * in the transaction for the sake of fsync(), even if nothing has |
2606 | * changed, because fsync() will not force the log for this transaction |
2607 | * unless it sees the inode pinned. |
2608 | + * |
2609 | + * Note: If we're only converting cow fork extents, there aren't |
2610 | + * any on-disk updates to make, so we don't need to log anything. |
2611 | */ |
2612 | - bma->logflags |= tmp_logflags | XFS_ILOG_CORE; |
2613 | + if (whichfork != XFS_COW_FORK) |
2614 | + bma->logflags |= tmp_logflags | XFS_ILOG_CORE; |
2615 | if (error) |
2616 | return error; |
2617 | |
2618 | @@ -4626,15 +4646,15 @@ xfs_bmapi_write( |
2619 | ASSERT(*nmap >= 1); |
2620 | ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); |
2621 | ASSERT(!(flags & XFS_BMAPI_IGSTATE)); |
2622 | - ASSERT(tp != NULL); |
2623 | + ASSERT(tp != NULL || |
2624 | + (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) == |
2625 | + (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)); |
2626 | ASSERT(len > 0); |
2627 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); |
2628 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
2629 | ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK); |
2630 | ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP)); |
2631 | ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP)); |
2632 | - ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK); |
2633 | - ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK); |
2634 | |
2635 | /* zeroing is for currently only for data extents, not metadata */ |
2636 | ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != |
2637 | @@ -4840,13 +4860,9 @@ xfs_bmapi_write( |
2638 | if (bma.cur) { |
2639 | if (!error) { |
2640 | ASSERT(*firstblock == NULLFSBLOCK || |
2641 | - XFS_FSB_TO_AGNO(mp, *firstblock) == |
2642 | + XFS_FSB_TO_AGNO(mp, *firstblock) <= |
2643 | XFS_FSB_TO_AGNO(mp, |
2644 | - bma.cur->bc_private.b.firstblock) || |
2645 | - (dfops->dop_low && |
2646 | - XFS_FSB_TO_AGNO(mp, *firstblock) < |
2647 | - XFS_FSB_TO_AGNO(mp, |
2648 | - bma.cur->bc_private.b.firstblock))); |
2649 | + bma.cur->bc_private.b.firstblock)); |
2650 | *firstblock = bma.cur->bc_private.b.firstblock; |
2651 | } |
2652 | xfs_btree_del_cursor(bma.cur, |
2653 | @@ -4881,34 +4897,59 @@ xfs_bmap_split_indlen( |
2654 | xfs_filblks_t len2 = *indlen2; |
2655 | xfs_filblks_t nres = len1 + len2; /* new total res. */ |
2656 | xfs_filblks_t stolen = 0; |
2657 | + xfs_filblks_t resfactor; |
2658 | |
2659 | /* |
2660 | * Steal as many blocks as we can to try and satisfy the worst case |
2661 | * indlen for both new extents. |
2662 | */ |
2663 | - while (nres > ores && avail) { |
2664 | - nres--; |
2665 | - avail--; |
2666 | - stolen++; |
2667 | - } |
2668 | + if (ores < nres && avail) |
2669 | + stolen = XFS_FILBLKS_MIN(nres - ores, avail); |
2670 | + ores += stolen; |
2671 | + |
2672 | + /* nothing else to do if we've satisfied the new reservation */ |
2673 | + if (ores >= nres) |
2674 | + return stolen; |
2675 | + |
2676 | + /* |
2677 | + * We can't meet the total required reservation for the two extents. |
2678 | + * Calculate the percent of the overall shortage between both extents |
2679 | + * and apply this percentage to each of the requested indlen values. |
2680 | + * This distributes the shortage fairly and reduces the chances that one |
2681 | + * of the two extents is left with nothing when extents are repeatedly |
2682 | + * split. |
2683 | + */ |
2684 | + resfactor = (ores * 100); |
2685 | + do_div(resfactor, nres); |
2686 | + len1 *= resfactor; |
2687 | + do_div(len1, 100); |
2688 | + len2 *= resfactor; |
2689 | + do_div(len2, 100); |
2690 | + ASSERT(len1 + len2 <= ores); |
2691 | + ASSERT(len1 < *indlen1 && len2 < *indlen2); |
2692 | |
2693 | /* |
2694 | - * The only blocks available are those reserved for the original |
2695 | - * extent and what we can steal from the extent being removed. |
2696 | - * If this still isn't enough to satisfy the combined |
2697 | - * requirements for the two new extents, skim blocks off of each |
2698 | - * of the new reservations until they match what is available. |
2699 | + * Hand out the remainder to each extent. If one of the two reservations |
2700 | + * is zero, we want to make sure that one gets a block first. The loop |
2701 | + * below starts with len1, so hand len2 a block right off the bat if it |
2702 | + * is zero. |
2703 | */ |
2704 | - while (nres > ores) { |
2705 | - if (len1) { |
2706 | - len1--; |
2707 | - nres--; |
2708 | + ores -= (len1 + len2); |
2709 | + ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores); |
2710 | + if (ores && !len2 && *indlen2) { |
2711 | + len2++; |
2712 | + ores--; |
2713 | + } |
2714 | + while (ores) { |
2715 | + if (len1 < *indlen1) { |
2716 | + len1++; |
2717 | + ores--; |
2718 | } |
2719 | - if (nres == ores) |
2720 | + if (!ores) |
2721 | break; |
2722 | - if (len2) { |
2723 | - len2--; |
2724 | - nres--; |
2725 | + if (len2 < *indlen2) { |
2726 | + len2++; |
2727 | + ores--; |
2728 | } |
2729 | } |
2730 | |
2731 | @@ -5656,8 +5697,8 @@ __xfs_bunmapi( |
2732 | } |
2733 | del.br_state = XFS_EXT_UNWRITTEN; |
2734 | error = xfs_bmap_add_extent_unwritten_real(tp, ip, |
2735 | - &lastx, &cur, &del, firstblock, dfops, |
2736 | - &logflags); |
2737 | + whichfork, &lastx, &cur, &del, |
2738 | + firstblock, dfops, &logflags); |
2739 | if (error) |
2740 | goto error0; |
2741 | goto nodelete; |
2742 | @@ -5714,8 +5755,9 @@ __xfs_bunmapi( |
2743 | prev.br_state = XFS_EXT_UNWRITTEN; |
2744 | lastx--; |
2745 | error = xfs_bmap_add_extent_unwritten_real(tp, |
2746 | - ip, &lastx, &cur, &prev, |
2747 | - firstblock, dfops, &logflags); |
2748 | + ip, whichfork, &lastx, &cur, |
2749 | + &prev, firstblock, dfops, |
2750 | + &logflags); |
2751 | if (error) |
2752 | goto error0; |
2753 | goto nodelete; |
2754 | @@ -5723,8 +5765,9 @@ __xfs_bunmapi( |
2755 | ASSERT(del.br_state == XFS_EXT_NORM); |
2756 | del.br_state = XFS_EXT_UNWRITTEN; |
2757 | error = xfs_bmap_add_extent_unwritten_real(tp, |
2758 | - ip, &lastx, &cur, &del, |
2759 | - firstblock, dfops, &logflags); |
2760 | + ip, whichfork, &lastx, &cur, |
2761 | + &del, firstblock, dfops, |
2762 | + &logflags); |
2763 | if (error) |
2764 | goto error0; |
2765 | goto nodelete; |
2766 | diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c |
2767 | index f76c1693ff01..5c3918678bb6 100644 |
2768 | --- a/fs/xfs/libxfs/xfs_bmap_btree.c |
2769 | +++ b/fs/xfs/libxfs/xfs_bmap_btree.c |
2770 | @@ -453,8 +453,8 @@ xfs_bmbt_alloc_block( |
2771 | |
2772 | if (args.fsbno == NULLFSBLOCK) { |
2773 | args.fsbno = be64_to_cpu(start->l); |
2774 | -try_another_ag: |
2775 | args.type = XFS_ALLOCTYPE_START_BNO; |
2776 | +try_another_ag: |
2777 | /* |
2778 | * Make sure there is sufficient room left in the AG to |
2779 | * complete a full tree split for an extent insert. If |
2780 | @@ -494,8 +494,8 @@ xfs_bmbt_alloc_block( |
2781 | if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && |
2782 | args.fsbno == NULLFSBLOCK && |
2783 | args.type == XFS_ALLOCTYPE_NEAR_BNO) { |
2784 | - cur->bc_private.b.dfops->dop_low = true; |
2785 | args.fsbno = cur->bc_private.b.firstblock; |
2786 | + args.type = XFS_ALLOCTYPE_FIRST_AG; |
2787 | goto try_another_ag; |
2788 | } |
2789 | |
2790 | @@ -512,7 +512,7 @@ xfs_bmbt_alloc_block( |
2791 | goto error0; |
2792 | cur->bc_private.b.dfops->dop_low = true; |
2793 | } |
2794 | - if (args.fsbno == NULLFSBLOCK) { |
2795 | + if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { |
2796 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); |
2797 | *stat = 0; |
2798 | return 0; |
2799 | diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c |
2800 | index 21e6a6ab6b9a..2849d3fa3d0b 100644 |
2801 | --- a/fs/xfs/libxfs/xfs_btree.c |
2802 | +++ b/fs/xfs/libxfs/xfs_btree.c |
2803 | @@ -810,7 +810,8 @@ xfs_btree_read_bufl( |
2804 | xfs_daddr_t d; /* real disk block address */ |
2805 | int error; |
2806 | |
2807 | - ASSERT(fsbno != NULLFSBLOCK); |
2808 | + if (!XFS_FSB_SANITY_CHECK(mp, fsbno)) |
2809 | + return -EFSCORRUPTED; |
2810 | d = XFS_FSB_TO_DADDR(mp, fsbno); |
2811 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, |
2812 | mp->m_bsize, lock, &bp, ops); |
2813 | diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h |
2814 | index c2b01d1c79ee..3b0fc1afada5 100644 |
2815 | --- a/fs/xfs/libxfs/xfs_btree.h |
2816 | +++ b/fs/xfs/libxfs/xfs_btree.h |
2817 | @@ -491,7 +491,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) |
2818 | #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) |
2819 | |
2820 | #define XFS_FSB_SANITY_CHECK(mp,fsb) \ |
2821 | - (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ |
2822 | + (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ |
2823 | XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) |
2824 | |
2825 | /* |
2826 | diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c |
2827 | index f2dc1a950c85..1bdf2888295b 100644 |
2828 | --- a/fs/xfs/libxfs/xfs_da_btree.c |
2829 | +++ b/fs/xfs/libxfs/xfs_da_btree.c |
2830 | @@ -2633,7 +2633,7 @@ xfs_da_read_buf( |
2831 | /* |
2832 | * Readahead the dir/attr block. |
2833 | */ |
2834 | -xfs_daddr_t |
2835 | +int |
2836 | xfs_da_reada_buf( |
2837 | struct xfs_inode *dp, |
2838 | xfs_dablk_t bno, |
2839 | @@ -2664,7 +2664,5 @@ xfs_da_reada_buf( |
2840 | if (mapp != &map) |
2841 | kmem_free(mapp); |
2842 | |
2843 | - if (error) |
2844 | - return -1; |
2845 | - return mappedbno; |
2846 | + return error; |
2847 | } |
2848 | diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h |
2849 | index 98c75cbe6ac2..4e29cb6a3627 100644 |
2850 | --- a/fs/xfs/libxfs/xfs_da_btree.h |
2851 | +++ b/fs/xfs/libxfs/xfs_da_btree.h |
2852 | @@ -201,7 +201,7 @@ int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp, |
2853 | xfs_dablk_t bno, xfs_daddr_t mappedbno, |
2854 | struct xfs_buf **bpp, int whichfork, |
2855 | const struct xfs_buf_ops *ops); |
2856 | -xfs_daddr_t xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, |
2857 | +int xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, |
2858 | xfs_daddr_t mapped_bno, int whichfork, |
2859 | const struct xfs_buf_ops *ops); |
2860 | int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, |
2861 | diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c |
2862 | index 75a557432d0f..bbd1238852b3 100644 |
2863 | --- a/fs/xfs/libxfs/xfs_dir2_node.c |
2864 | +++ b/fs/xfs/libxfs/xfs_dir2_node.c |
2865 | @@ -155,6 +155,42 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = { |
2866 | .verify_write = xfs_dir3_free_write_verify, |
2867 | }; |
2868 | |
2869 | +/* Everything ok in the free block header? */ |
2870 | +static bool |
2871 | +xfs_dir3_free_header_check( |
2872 | + struct xfs_inode *dp, |
2873 | + xfs_dablk_t fbno, |
2874 | + struct xfs_buf *bp) |
2875 | +{ |
2876 | + struct xfs_mount *mp = dp->i_mount; |
2877 | + unsigned int firstdb; |
2878 | + int maxbests; |
2879 | + |
2880 | + maxbests = dp->d_ops->free_max_bests(mp->m_dir_geo); |
2881 | + firstdb = (xfs_dir2_da_to_db(mp->m_dir_geo, fbno) - |
2882 | + xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) * |
2883 | + maxbests; |
2884 | + if (xfs_sb_version_hascrc(&mp->m_sb)) { |
2885 | + struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; |
2886 | + |
2887 | + if (be32_to_cpu(hdr3->firstdb) != firstdb) |
2888 | + return false; |
2889 | + if (be32_to_cpu(hdr3->nvalid) > maxbests) |
2890 | + return false; |
2891 | + if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused)) |
2892 | + return false; |
2893 | + } else { |
2894 | + struct xfs_dir2_free_hdr *hdr = bp->b_addr; |
2895 | + |
2896 | + if (be32_to_cpu(hdr->firstdb) != firstdb) |
2897 | + return false; |
2898 | + if (be32_to_cpu(hdr->nvalid) > maxbests) |
2899 | + return false; |
2900 | + if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused)) |
2901 | + return false; |
2902 | + } |
2903 | + return true; |
2904 | +} |
2905 | |
2906 | static int |
2907 | __xfs_dir3_free_read( |
2908 | @@ -168,11 +204,22 @@ __xfs_dir3_free_read( |
2909 | |
2910 | err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, |
2911 | XFS_DATA_FORK, &xfs_dir3_free_buf_ops); |
2912 | + if (err || !*bpp) |
2913 | + return err; |
2914 | + |
2915 | + /* Check things that we can't do in the verifier. */ |
2916 | + if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) { |
2917 | + xfs_buf_ioerror(*bpp, -EFSCORRUPTED); |
2918 | + xfs_verifier_error(*bpp); |
2919 | + xfs_trans_brelse(tp, *bpp); |
2920 | + return -EFSCORRUPTED; |
2921 | + } |
2922 | |
2923 | /* try read returns without an error or *bpp if it lands in a hole */ |
2924 | - if (!err && tp && *bpp) |
2925 | + if (tp) |
2926 | xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF); |
2927 | - return err; |
2928 | + |
2929 | + return 0; |
2930 | } |
2931 | |
2932 | int |
2933 | diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c |
2934 | index d45c03779dae..a2818f6e8598 100644 |
2935 | --- a/fs/xfs/libxfs/xfs_ialloc.c |
2936 | +++ b/fs/xfs/libxfs/xfs_ialloc.c |
2937 | @@ -51,8 +51,7 @@ xfs_ialloc_cluster_alignment( |
2938 | struct xfs_mount *mp) |
2939 | { |
2940 | if (xfs_sb_version_hasalign(&mp->m_sb) && |
2941 | - mp->m_sb.sb_inoalignmt >= |
2942 | - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) |
2943 | + mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) |
2944 | return mp->m_sb.sb_inoalignmt; |
2945 | return 1; |
2946 | } |
2947 | diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c |
2948 | index 6c6b95947e71..b9c351ff0422 100644 |
2949 | --- a/fs/xfs/libxfs/xfs_ialloc_btree.c |
2950 | +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c |
2951 | @@ -82,11 +82,12 @@ xfs_finobt_set_root( |
2952 | } |
2953 | |
2954 | STATIC int |
2955 | -xfs_inobt_alloc_block( |
2956 | +__xfs_inobt_alloc_block( |
2957 | struct xfs_btree_cur *cur, |
2958 | union xfs_btree_ptr *start, |
2959 | union xfs_btree_ptr *new, |
2960 | - int *stat) |
2961 | + int *stat, |
2962 | + enum xfs_ag_resv_type resv) |
2963 | { |
2964 | xfs_alloc_arg_t args; /* block allocation args */ |
2965 | int error; /* error return value */ |
2966 | @@ -103,6 +104,7 @@ xfs_inobt_alloc_block( |
2967 | args.maxlen = 1; |
2968 | args.prod = 1; |
2969 | args.type = XFS_ALLOCTYPE_NEAR_BNO; |
2970 | + args.resv = resv; |
2971 | |
2972 | error = xfs_alloc_vextent(&args); |
2973 | if (error) { |
2974 | @@ -123,6 +125,27 @@ xfs_inobt_alloc_block( |
2975 | } |
2976 | |
2977 | STATIC int |
2978 | +xfs_inobt_alloc_block( |
2979 | + struct xfs_btree_cur *cur, |
2980 | + union xfs_btree_ptr *start, |
2981 | + union xfs_btree_ptr *new, |
2982 | + int *stat) |
2983 | +{ |
2984 | + return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE); |
2985 | +} |
2986 | + |
2987 | +STATIC int |
2988 | +xfs_finobt_alloc_block( |
2989 | + struct xfs_btree_cur *cur, |
2990 | + union xfs_btree_ptr *start, |
2991 | + union xfs_btree_ptr *new, |
2992 | + int *stat) |
2993 | +{ |
2994 | + return __xfs_inobt_alloc_block(cur, start, new, stat, |
2995 | + XFS_AG_RESV_METADATA); |
2996 | +} |
2997 | + |
2998 | +STATIC int |
2999 | xfs_inobt_free_block( |
3000 | struct xfs_btree_cur *cur, |
3001 | struct xfs_buf *bp) |
3002 | @@ -328,7 +351,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = { |
3003 | |
3004 | .dup_cursor = xfs_inobt_dup_cursor, |
3005 | .set_root = xfs_finobt_set_root, |
3006 | - .alloc_block = xfs_inobt_alloc_block, |
3007 | + .alloc_block = xfs_finobt_alloc_block, |
3008 | .free_block = xfs_inobt_free_block, |
3009 | .get_minrecs = xfs_inobt_get_minrecs, |
3010 | .get_maxrecs = xfs_inobt_get_maxrecs, |
3011 | @@ -478,3 +501,64 @@ xfs_inobt_rec_check_count( |
3012 | return 0; |
3013 | } |
3014 | #endif /* DEBUG */ |
3015 | + |
3016 | +static xfs_extlen_t |
3017 | +xfs_inobt_max_size( |
3018 | + struct xfs_mount *mp) |
3019 | +{ |
3020 | + /* Bail out if we're uninitialized, which can happen in mkfs. */ |
3021 | + if (mp->m_inobt_mxr[0] == 0) |
3022 | + return 0; |
3023 | + |
3024 | + return xfs_btree_calc_size(mp, mp->m_inobt_mnr, |
3025 | + (uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock / |
3026 | + XFS_INODES_PER_CHUNK); |
3027 | +} |
3028 | + |
3029 | +static int |
3030 | +xfs_inobt_count_blocks( |
3031 | + struct xfs_mount *mp, |
3032 | + xfs_agnumber_t agno, |
3033 | + xfs_btnum_t btnum, |
3034 | + xfs_extlen_t *tree_blocks) |
3035 | +{ |
3036 | + struct xfs_buf *agbp; |
3037 | + struct xfs_btree_cur *cur; |
3038 | + int error; |
3039 | + |
3040 | + error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); |
3041 | + if (error) |
3042 | + return error; |
3043 | + |
3044 | + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, btnum); |
3045 | + error = xfs_btree_count_blocks(cur, tree_blocks); |
3046 | + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); |
3047 | + xfs_buf_relse(agbp); |
3048 | + |
3049 | + return error; |
3050 | +} |
3051 | + |
3052 | +/* |
3053 | + * Figure out how many blocks to reserve and how many are used by this btree. |
3054 | + */ |
3055 | +int |
3056 | +xfs_finobt_calc_reserves( |
3057 | + struct xfs_mount *mp, |
3058 | + xfs_agnumber_t agno, |
3059 | + xfs_extlen_t *ask, |
3060 | + xfs_extlen_t *used) |
3061 | +{ |
3062 | + xfs_extlen_t tree_len = 0; |
3063 | + int error; |
3064 | + |
3065 | + if (!xfs_sb_version_hasfinobt(&mp->m_sb)) |
3066 | + return 0; |
3067 | + |
3068 | + error = xfs_inobt_count_blocks(mp, agno, XFS_BTNUM_FINO, &tree_len); |
3069 | + if (error) |
3070 | + return error; |
3071 | + |
3072 | + *ask += xfs_inobt_max_size(mp); |
3073 | + *used += tree_len; |
3074 | + return 0; |
3075 | +} |
3076 | diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h |
3077 | index bd88453217ce..aa81e2e63f3f 100644 |
3078 | --- a/fs/xfs/libxfs/xfs_ialloc_btree.h |
3079 | +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h |
3080 | @@ -72,4 +72,7 @@ int xfs_inobt_rec_check_count(struct xfs_mount *, |
3081 | #define xfs_inobt_rec_check_count(mp, rec) 0 |
3082 | #endif /* DEBUG */ |
3083 | |
3084 | +int xfs_finobt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, |
3085 | + xfs_extlen_t *ask, xfs_extlen_t *used); |
3086 | + |
3087 | #endif /* __XFS_IALLOC_BTREE_H__ */ |
3088 | diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c |
3089 | index 222e103356c6..25c1e078aef6 100644 |
3090 | --- a/fs/xfs/libxfs/xfs_inode_fork.c |
3091 | +++ b/fs/xfs/libxfs/xfs_inode_fork.c |
3092 | @@ -26,6 +26,7 @@ |
3093 | #include "xfs_inode.h" |
3094 | #include "xfs_trans.h" |
3095 | #include "xfs_inode_item.h" |
3096 | +#include "xfs_btree.h" |
3097 | #include "xfs_bmap_btree.h" |
3098 | #include "xfs_bmap.h" |
3099 | #include "xfs_error.h" |
3100 | @@ -429,11 +430,13 @@ xfs_iformat_btree( |
3101 | /* REFERENCED */ |
3102 | int nrecs; |
3103 | int size; |
3104 | + int level; |
3105 | |
3106 | ifp = XFS_IFORK_PTR(ip, whichfork); |
3107 | dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); |
3108 | size = XFS_BMAP_BROOT_SPACE(mp, dfp); |
3109 | nrecs = be16_to_cpu(dfp->bb_numrecs); |
3110 | + level = be16_to_cpu(dfp->bb_level); |
3111 | |
3112 | /* |
3113 | * blow out if -- fork has less extents than can fit in |
3114 | @@ -446,7 +449,8 @@ xfs_iformat_btree( |
3115 | XFS_IFORK_MAXEXT(ip, whichfork) || |
3116 | XFS_BMDR_SPACE_CALC(nrecs) > |
3117 | XFS_DFORK_SIZE(dip, mp, whichfork) || |
3118 | - XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { |
3119 | + XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) || |
3120 | + level == 0 || level > XFS_BTREE_MAXLEVELS) { |
3121 | xfs_warn(mp, "corrupt inode %Lu (btree).", |
3122 | (unsigned long long) ip->i_ino); |
3123 | XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, |
3124 | @@ -497,15 +501,14 @@ xfs_iread_extents( |
3125 | * We know that the size is valid (it's checked in iformat_btree) |
3126 | */ |
3127 | ifp->if_bytes = ifp->if_real_bytes = 0; |
3128 | - ifp->if_flags |= XFS_IFEXTENTS; |
3129 | xfs_iext_add(ifp, 0, nextents); |
3130 | error = xfs_bmap_read_extents(tp, ip, whichfork); |
3131 | if (error) { |
3132 | xfs_iext_destroy(ifp); |
3133 | - ifp->if_flags &= ~XFS_IFEXTENTS; |
3134 | return error; |
3135 | } |
3136 | xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); |
3137 | + ifp->if_flags |= XFS_IFEXTENTS; |
3138 | return 0; |
3139 | } |
3140 | /* |
3141 | diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c |
3142 | index 06763f5cc701..0457abe4118a 100644 |
3143 | --- a/fs/xfs/xfs_aops.c |
3144 | +++ b/fs/xfs/xfs_aops.c |
3145 | @@ -279,54 +279,49 @@ xfs_end_io( |
3146 | struct xfs_ioend *ioend = |
3147 | container_of(work, struct xfs_ioend, io_work); |
3148 | struct xfs_inode *ip = XFS_I(ioend->io_inode); |
3149 | + xfs_off_t offset = ioend->io_offset; |
3150 | + size_t size = ioend->io_size; |
3151 | int error = ioend->io_bio->bi_error; |
3152 | |
3153 | /* |
3154 | - * Set an error if the mount has shut down and proceed with end I/O |
3155 | - * processing so it can perform whatever cleanups are necessary. |
3156 | + * Just clean up the in-memory strutures if the fs has been shut down. |
3157 | */ |
3158 | - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
3159 | + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
3160 | error = -EIO; |
3161 | + goto done; |
3162 | + } |
3163 | |
3164 | /* |
3165 | - * For a CoW extent, we need to move the mapping from the CoW fork |
3166 | - * to the data fork. If instead an error happened, just dump the |
3167 | - * new blocks. |
3168 | + * Clean up any COW blocks on an I/O error. |
3169 | */ |
3170 | - if (ioend->io_type == XFS_IO_COW) { |
3171 | - if (error) |
3172 | - goto done; |
3173 | - if (ioend->io_bio->bi_error) { |
3174 | - error = xfs_reflink_cancel_cow_range(ip, |
3175 | - ioend->io_offset, ioend->io_size); |
3176 | - goto done; |
3177 | + if (unlikely(error)) { |
3178 | + switch (ioend->io_type) { |
3179 | + case XFS_IO_COW: |
3180 | + xfs_reflink_cancel_cow_range(ip, offset, size, true); |
3181 | + break; |
3182 | } |
3183 | - error = xfs_reflink_end_cow(ip, ioend->io_offset, |
3184 | - ioend->io_size); |
3185 | - if (error) |
3186 | - goto done; |
3187 | + |
3188 | + goto done; |
3189 | } |
3190 | |
3191 | /* |
3192 | - * For unwritten extents we need to issue transactions to convert a |
3193 | - * range to normal written extens after the data I/O has finished. |
3194 | - * Detecting and handling completion IO errors is done individually |
3195 | - * for each case as different cleanup operations need to be performed |
3196 | - * on error. |
3197 | + * Success: commit the COW or unwritten blocks if needed. |
3198 | */ |
3199 | - if (ioend->io_type == XFS_IO_UNWRITTEN) { |
3200 | - if (error) |
3201 | - goto done; |
3202 | - error = xfs_iomap_write_unwritten(ip, ioend->io_offset, |
3203 | - ioend->io_size); |
3204 | - } else if (ioend->io_append_trans) { |
3205 | - error = xfs_setfilesize_ioend(ioend, error); |
3206 | - } else { |
3207 | - ASSERT(!xfs_ioend_is_append(ioend) || |
3208 | - ioend->io_type == XFS_IO_COW); |
3209 | + switch (ioend->io_type) { |
3210 | + case XFS_IO_COW: |
3211 | + error = xfs_reflink_end_cow(ip, offset, size); |
3212 | + break; |
3213 | + case XFS_IO_UNWRITTEN: |
3214 | + error = xfs_iomap_write_unwritten(ip, offset, size); |
3215 | + break; |
3216 | + default: |
3217 | + ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); |
3218 | + break; |
3219 | } |
3220 | |
3221 | done: |
3222 | + if (ioend->io_append_trans) |
3223 | + error = xfs_setfilesize_ioend(ioend, error); |
3224 | xfs_destroy_ioend(ioend, error); |
3225 | } |
3226 | |
3227 | @@ -486,6 +481,12 @@ xfs_submit_ioend( |
3228 | struct xfs_ioend *ioend, |
3229 | int status) |
3230 | { |
3231 | + /* Convert CoW extents to regular */ |
3232 | + if (!status && ioend->io_type == XFS_IO_COW) { |
3233 | + status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), |
3234 | + ioend->io_offset, ioend->io_size); |
3235 | + } |
3236 | + |
3237 | /* Reserve log space if we might write beyond the on-disk inode size. */ |
3238 | if (!status && |
3239 | ioend->io_type != XFS_IO_UNWRITTEN && |
3240 | @@ -1257,44 +1258,6 @@ xfs_map_trim_size( |
3241 | bh_result->b_size = mapping_size; |
3242 | } |
3243 | |
3244 | -/* Bounce unaligned directio writes to the page cache. */ |
3245 | -static int |
3246 | -xfs_bounce_unaligned_dio_write( |
3247 | - struct xfs_inode *ip, |
3248 | - xfs_fileoff_t offset_fsb, |
3249 | - struct xfs_bmbt_irec *imap) |
3250 | -{ |
3251 | - struct xfs_bmbt_irec irec; |
3252 | - xfs_fileoff_t delta; |
3253 | - bool shared; |
3254 | - bool x; |
3255 | - int error; |
3256 | - |
3257 | - irec = *imap; |
3258 | - if (offset_fsb > irec.br_startoff) { |
3259 | - delta = offset_fsb - irec.br_startoff; |
3260 | - irec.br_blockcount -= delta; |
3261 | - irec.br_startblock += delta; |
3262 | - irec.br_startoff = offset_fsb; |
3263 | - } |
3264 | - error = xfs_reflink_trim_around_shared(ip, &irec, &shared, &x); |
3265 | - if (error) |
3266 | - return error; |
3267 | - |
3268 | - /* |
3269 | - * We're here because we're trying to do a directio write to a |
3270 | - * region that isn't aligned to a filesystem block. If any part |
3271 | - * of the extent is shared, fall back to buffered mode to handle |
3272 | - * the RMW. This is done by returning -EREMCHG ("remote addr |
3273 | - * changed"), which is caught further up the call stack. |
3274 | - */ |
3275 | - if (shared) { |
3276 | - trace_xfs_reflink_bounce_dio_write(ip, imap); |
3277 | - return -EREMCHG; |
3278 | - } |
3279 | - return 0; |
3280 | -} |
3281 | - |
3282 | STATIC int |
3283 | __xfs_get_blocks( |
3284 | struct inode *inode, |
3285 | @@ -1432,13 +1395,6 @@ __xfs_get_blocks( |
3286 | if (imap.br_startblock != HOLESTARTBLOCK && |
3287 | imap.br_startblock != DELAYSTARTBLOCK && |
3288 | (create || !ISUNWRITTEN(&imap))) { |
3289 | - if (create && direct && !is_cow) { |
3290 | - error = xfs_bounce_unaligned_dio_write(ip, offset_fsb, |
3291 | - &imap); |
3292 | - if (error) |
3293 | - return error; |
3294 | - } |
3295 | - |
3296 | xfs_map_buffer(inode, bh_result, &imap, offset); |
3297 | if (ISUNWRITTEN(&imap)) |
3298 | set_buffer_unwritten(bh_result); |
3299 | diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c |
3300 | index efb8ccd6bbf2..5c395e485170 100644 |
3301 | --- a/fs/xfs/xfs_bmap_util.c |
3302 | +++ b/fs/xfs/xfs_bmap_util.c |
3303 | @@ -917,17 +917,18 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) |
3304 | */ |
3305 | int |
3306 | xfs_free_eofblocks( |
3307 | - xfs_mount_t *mp, |
3308 | - xfs_inode_t *ip, |
3309 | - bool need_iolock) |
3310 | + struct xfs_inode *ip) |
3311 | { |
3312 | - xfs_trans_t *tp; |
3313 | - int error; |
3314 | - xfs_fileoff_t end_fsb; |
3315 | - xfs_fileoff_t last_fsb; |
3316 | - xfs_filblks_t map_len; |
3317 | - int nimaps; |
3318 | - xfs_bmbt_irec_t imap; |
3319 | + struct xfs_trans *tp; |
3320 | + int error; |
3321 | + xfs_fileoff_t end_fsb; |
3322 | + xfs_fileoff_t last_fsb; |
3323 | + xfs_filblks_t map_len; |
3324 | + int nimaps; |
3325 | + struct xfs_bmbt_irec imap; |
3326 | + struct xfs_mount *mp = ip->i_mount; |
3327 | + |
3328 | + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); |
3329 | |
3330 | /* |
3331 | * Figure out if there are any blocks beyond the end |
3332 | @@ -944,6 +945,10 @@ xfs_free_eofblocks( |
3333 | error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); |
3334 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
3335 | |
3336 | + /* |
3337 | + * If there are blocks after the end of file, truncate the file to its |
3338 | + * current size to free them up. |
3339 | + */ |
3340 | if (!error && (nimaps != 0) && |
3341 | (imap.br_startblock != HOLESTARTBLOCK || |
3342 | ip->i_delayed_blks)) { |
3343 | @@ -954,22 +959,13 @@ xfs_free_eofblocks( |
3344 | if (error) |
3345 | return error; |
3346 | |
3347 | - /* |
3348 | - * There are blocks after the end of file. |
3349 | - * Free them up now by truncating the file to |
3350 | - * its current size. |
3351 | - */ |
3352 | - if (need_iolock) { |
3353 | - if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) |
3354 | - return -EAGAIN; |
3355 | - } |
3356 | + /* wait on dio to ensure i_size has settled */ |
3357 | + inode_dio_wait(VFS_I(ip)); |
3358 | |
3359 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, |
3360 | &tp); |
3361 | if (error) { |
3362 | ASSERT(XFS_FORCED_SHUTDOWN(mp)); |
3363 | - if (need_iolock) |
3364 | - xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
3365 | return error; |
3366 | } |
3367 | |
3368 | @@ -997,8 +993,6 @@ xfs_free_eofblocks( |
3369 | } |
3370 | |
3371 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
3372 | - if (need_iolock) |
3373 | - xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
3374 | } |
3375 | return error; |
3376 | } |
3377 | @@ -1393,10 +1387,16 @@ xfs_shift_file_space( |
3378 | xfs_fileoff_t stop_fsb; |
3379 | xfs_fileoff_t next_fsb; |
3380 | xfs_fileoff_t shift_fsb; |
3381 | + uint resblks; |
3382 | |
3383 | ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT); |
3384 | |
3385 | if (direction == SHIFT_LEFT) { |
3386 | + /* |
3387 | + * Reserve blocks to cover potential extent merges after left |
3388 | + * shift operations. |
3389 | + */ |
3390 | + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); |
3391 | next_fsb = XFS_B_TO_FSB(mp, offset + len); |
3392 | stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size); |
3393 | } else { |
3394 | @@ -1404,6 +1404,7 @@ xfs_shift_file_space( |
3395 | * If right shift, delegate the work of initialization of |
3396 | * next_fsb to xfs_bmap_shift_extent as it has ilock held. |
3397 | */ |
3398 | + resblks = 0; |
3399 | next_fsb = NULLFSBLOCK; |
3400 | stop_fsb = XFS_B_TO_FSB(mp, offset); |
3401 | } |
3402 | @@ -1415,7 +1416,7 @@ xfs_shift_file_space( |
3403 | * into the accessible region of the file. |
3404 | */ |
3405 | if (xfs_can_free_eofblocks(ip, true)) { |
3406 | - error = xfs_free_eofblocks(mp, ip, false); |
3407 | + error = xfs_free_eofblocks(ip); |
3408 | if (error) |
3409 | return error; |
3410 | } |
3411 | @@ -1445,21 +1446,14 @@ xfs_shift_file_space( |
3412 | } |
3413 | |
3414 | while (!error && !done) { |
3415 | - /* |
3416 | - * We would need to reserve permanent block for transaction. |
3417 | - * This will come into picture when after shifting extent into |
3418 | - * hole we found that adjacent extents can be merged which |
3419 | - * may lead to freeing of a block during record update. |
3420 | - */ |
3421 | - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, |
3422 | - XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); |
3423 | + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, |
3424 | + &tp); |
3425 | if (error) |
3426 | break; |
3427 | |
3428 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
3429 | error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, |
3430 | - ip->i_gdquot, ip->i_pdquot, |
3431 | - XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, |
3432 | + ip->i_gdquot, ip->i_pdquot, resblks, 0, |
3433 | XFS_QMOPT_RES_REGBLKS); |
3434 | if (error) |
3435 | goto out_trans_cancel; |
3436 | diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h |
3437 | index 68a621a8e0c0..f1005393785c 100644 |
3438 | --- a/fs/xfs/xfs_bmap_util.h |
3439 | +++ b/fs/xfs/xfs_bmap_util.h |
3440 | @@ -63,8 +63,7 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset, |
3441 | |
3442 | /* EOF block manipulation functions */ |
3443 | bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); |
3444 | -int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip, |
3445 | - bool need_iolock); |
3446 | +int xfs_free_eofblocks(struct xfs_inode *ip); |
3447 | |
3448 | int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, |
3449 | struct xfs_swapext *sx); |
3450 | diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c |
3451 | index 2975cb2319f4..0306168af332 100644 |
3452 | --- a/fs/xfs/xfs_buf_item.c |
3453 | +++ b/fs/xfs/xfs_buf_item.c |
3454 | @@ -1162,6 +1162,7 @@ xfs_buf_iodone_callbacks( |
3455 | */ |
3456 | bp->b_last_error = 0; |
3457 | bp->b_retries = 0; |
3458 | + bp->b_first_retry_time = 0; |
3459 | |
3460 | xfs_buf_do_callbacks(bp); |
3461 | bp->b_fspriv = NULL; |
3462 | diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c |
3463 | index 162dc186cf04..29c2f997aedf 100644 |
3464 | --- a/fs/xfs/xfs_extent_busy.c |
3465 | +++ b/fs/xfs/xfs_extent_busy.c |
3466 | @@ -45,18 +45,7 @@ xfs_extent_busy_insert( |
3467 | struct rb_node **rbp; |
3468 | struct rb_node *parent = NULL; |
3469 | |
3470 | - new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL); |
3471 | - if (!new) { |
3472 | - /* |
3473 | - * No Memory! Since it is now not possible to track the free |
3474 | - * block, make this a synchronous transaction to insure that |
3475 | - * the block is not reused before this transaction commits. |
3476 | - */ |
3477 | - trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len); |
3478 | - xfs_trans_set_sync(tp); |
3479 | - return; |
3480 | - } |
3481 | - |
3482 | + new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_SLEEP); |
3483 | new->agno = agno; |
3484 | new->bno = bno; |
3485 | new->length = len; |
3486 | diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c |
3487 | index 9a5d64b5f35a..1209ad29e902 100644 |
3488 | --- a/fs/xfs/xfs_file.c |
3489 | +++ b/fs/xfs/xfs_file.c |
3490 | @@ -554,6 +554,15 @@ xfs_file_dio_aio_write( |
3491 | if ((iocb->ki_pos & mp->m_blockmask) || |
3492 | ((iocb->ki_pos + count) & mp->m_blockmask)) { |
3493 | unaligned_io = 1; |
3494 | + |
3495 | + /* |
3496 | + * We can't properly handle unaligned direct I/O to reflink |
3497 | + * files yet, as we can't unshare a partial block. |
3498 | + */ |
3499 | + if (xfs_is_reflink_inode(ip)) { |
3500 | + trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count); |
3501 | + return -EREMCHG; |
3502 | + } |
3503 | iolock = XFS_IOLOCK_EXCL; |
3504 | } else { |
3505 | iolock = XFS_IOLOCK_SHARED; |
3506 | @@ -675,8 +684,10 @@ xfs_file_buffered_aio_write( |
3507 | struct xfs_inode *ip = XFS_I(inode); |
3508 | ssize_t ret; |
3509 | int enospc = 0; |
3510 | - int iolock = XFS_IOLOCK_EXCL; |
3511 | + int iolock; |
3512 | |
3513 | +write_retry: |
3514 | + iolock = XFS_IOLOCK_EXCL; |
3515 | xfs_rw_ilock(ip, iolock); |
3516 | |
3517 | ret = xfs_file_aio_write_checks(iocb, from, &iolock); |
3518 | @@ -686,7 +697,6 @@ xfs_file_buffered_aio_write( |
3519 | /* We can write back this queue in page reclaim */ |
3520 | current->backing_dev_info = inode_to_bdi(inode); |
3521 | |
3522 | -write_retry: |
3523 | trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos); |
3524 | ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops); |
3525 | if (likely(ret >= 0)) |
3526 | @@ -702,18 +712,21 @@ xfs_file_buffered_aio_write( |
3527 | * running at the same time. |
3528 | */ |
3529 | if (ret == -EDQUOT && !enospc) { |
3530 | + xfs_rw_iunlock(ip, iolock); |
3531 | enospc = xfs_inode_free_quota_eofblocks(ip); |
3532 | if (enospc) |
3533 | goto write_retry; |
3534 | enospc = xfs_inode_free_quota_cowblocks(ip); |
3535 | if (enospc) |
3536 | goto write_retry; |
3537 | + iolock = 0; |
3538 | } else if (ret == -ENOSPC && !enospc) { |
3539 | struct xfs_eofblocks eofb = {0}; |
3540 | |
3541 | enospc = 1; |
3542 | xfs_flush_inodes(ip->i_mount); |
3543 | - eofb.eof_scan_owner = ip->i_ino; /* for locking */ |
3544 | + |
3545 | + xfs_rw_iunlock(ip, iolock); |
3546 | eofb.eof_flags = XFS_EOF_FLAGS_SYNC; |
3547 | xfs_icache_free_eofblocks(ip->i_mount, &eofb); |
3548 | goto write_retry; |
3549 | @@ -721,7 +734,8 @@ xfs_file_buffered_aio_write( |
3550 | |
3551 | current->backing_dev_info = NULL; |
3552 | out: |
3553 | - xfs_rw_iunlock(ip, iolock); |
3554 | + if (iolock) |
3555 | + xfs_rw_iunlock(ip, iolock); |
3556 | return ret; |
3557 | } |
3558 | |
3559 | @@ -987,9 +1001,9 @@ xfs_dir_open( |
3560 | */ |
3561 | mode = xfs_ilock_data_map_shared(ip); |
3562 | if (ip->i_d.di_nextents > 0) |
3563 | - xfs_dir3_data_readahead(ip, 0, -1); |
3564 | + error = xfs_dir3_data_readahead(ip, 0, -1); |
3565 | xfs_iunlock(ip, mode); |
3566 | - return 0; |
3567 | + return error; |
3568 | } |
3569 | |
3570 | STATIC int |
3571 | diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c |
3572 | index 29cc9886a3cb..3fb1f3fb8efe 100644 |
3573 | --- a/fs/xfs/xfs_icache.c |
3574 | +++ b/fs/xfs/xfs_icache.c |
3575 | @@ -1324,13 +1324,10 @@ xfs_inode_free_eofblocks( |
3576 | int flags, |
3577 | void *args) |
3578 | { |
3579 | - int ret; |
3580 | + int ret = 0; |
3581 | struct xfs_eofblocks *eofb = args; |
3582 | - bool need_iolock = true; |
3583 | int match; |
3584 | |
3585 | - ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); |
3586 | - |
3587 | if (!xfs_can_free_eofblocks(ip, false)) { |
3588 | /* inode could be preallocated or append-only */ |
3589 | trace_xfs_inode_free_eofblocks_invalid(ip); |
3590 | @@ -1358,21 +1355,19 @@ xfs_inode_free_eofblocks( |
3591 | if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && |
3592 | XFS_ISIZE(ip) < eofb->eof_min_file_size) |
3593 | return 0; |
3594 | - |
3595 | - /* |
3596 | - * A scan owner implies we already hold the iolock. Skip it in |
3597 | - * xfs_free_eofblocks() to avoid deadlock. This also eliminates |
3598 | - * the possibility of EAGAIN being returned. |
3599 | - */ |
3600 | - if (eofb->eof_scan_owner == ip->i_ino) |
3601 | - need_iolock = false; |
3602 | } |
3603 | |
3604 | - ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock); |
3605 | - |
3606 | - /* don't revisit the inode if we're not waiting */ |
3607 | - if (ret == -EAGAIN && !(flags & SYNC_WAIT)) |
3608 | - ret = 0; |
3609 | + /* |
3610 | + * If the caller is waiting, return -EAGAIN to keep the background |
3611 | + * scanner moving and revisit the inode in a subsequent pass. |
3612 | + */ |
3613 | + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { |
3614 | + if (flags & SYNC_WAIT) |
3615 | + ret = -EAGAIN; |
3616 | + return ret; |
3617 | + } |
3618 | + ret = xfs_free_eofblocks(ip); |
3619 | + xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
3620 | |
3621 | return ret; |
3622 | } |
3623 | @@ -1419,15 +1414,10 @@ __xfs_inode_free_quota_eofblocks( |
3624 | struct xfs_eofblocks eofb = {0}; |
3625 | struct xfs_dquot *dq; |
3626 | |
3627 | - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); |
3628 | - |
3629 | /* |
3630 | - * Set the scan owner to avoid a potential livelock. Otherwise, the scan |
3631 | - * can repeatedly trylock on the inode we're currently processing. We |
3632 | - * run a sync scan to increase effectiveness and use the union filter to |
3633 | + * Run a sync scan to increase effectiveness and use the union filter to |
3634 | * cover all applicable quotas in a single scan. |
3635 | */ |
3636 | - eofb.eof_scan_owner = ip->i_ino; |
3637 | eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC; |
3638 | |
3639 | if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) { |
3640 | @@ -1579,12 +1569,9 @@ xfs_inode_free_cowblocks( |
3641 | { |
3642 | int ret; |
3643 | struct xfs_eofblocks *eofb = args; |
3644 | - bool need_iolock = true; |
3645 | int match; |
3646 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); |
3647 | |
3648 | - ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); |
3649 | - |
3650 | /* |
3651 | * Just clear the tag if we have an empty cow fork or none at all. It's |
3652 | * possible the inode was fully unshared since it was originally tagged. |
3653 | @@ -1617,28 +1604,16 @@ xfs_inode_free_cowblocks( |
3654 | if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && |
3655 | XFS_ISIZE(ip) < eofb->eof_min_file_size) |
3656 | return 0; |
3657 | - |
3658 | - /* |
3659 | - * A scan owner implies we already hold the iolock. Skip it in |
3660 | - * xfs_free_eofblocks() to avoid deadlock. This also eliminates |
3661 | - * the possibility of EAGAIN being returned. |
3662 | - */ |
3663 | - if (eofb->eof_scan_owner == ip->i_ino) |
3664 | - need_iolock = false; |
3665 | } |
3666 | |
3667 | /* Free the CoW blocks */ |
3668 | - if (need_iolock) { |
3669 | - xfs_ilock(ip, XFS_IOLOCK_EXCL); |
3670 | - xfs_ilock(ip, XFS_MMAPLOCK_EXCL); |
3671 | - } |
3672 | + xfs_ilock(ip, XFS_IOLOCK_EXCL); |
3673 | + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); |
3674 | |
3675 | - ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); |
3676 | + ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); |
3677 | |
3678 | - if (need_iolock) { |
3679 | - xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); |
3680 | - xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
3681 | - } |
3682 | + xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); |
3683 | + xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
3684 | |
3685 | return ret; |
3686 | } |
3687 | diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h |
3688 | index a1e02f4708ab..8a7c849b4dea 100644 |
3689 | --- a/fs/xfs/xfs_icache.h |
3690 | +++ b/fs/xfs/xfs_icache.h |
3691 | @@ -27,7 +27,6 @@ struct xfs_eofblocks { |
3692 | kgid_t eof_gid; |
3693 | prid_t eof_prid; |
3694 | __u64 eof_min_file_size; |
3695 | - xfs_ino_t eof_scan_owner; |
3696 | }; |
3697 | |
3698 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ |
3699 | @@ -102,7 +101,6 @@ xfs_fs_eofblocks_from_user( |
3700 | dst->eof_flags = src->eof_flags; |
3701 | dst->eof_prid = src->eof_prid; |
3702 | dst->eof_min_file_size = src->eof_min_file_size; |
3703 | - dst->eof_scan_owner = NULLFSINO; |
3704 | |
3705 | dst->eof_uid = INVALID_UID; |
3706 | if (src->eof_flags & XFS_EOF_FLAGS_UID) { |
3707 | diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c |
3708 | index 512ff13ed66a..e50636c9a89c 100644 |
3709 | --- a/fs/xfs/xfs_inode.c |
3710 | +++ b/fs/xfs/xfs_inode.c |
3711 | @@ -1624,7 +1624,7 @@ xfs_itruncate_extents( |
3712 | |
3713 | /* Remove all pending CoW reservations. */ |
3714 | error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block, |
3715 | - last_block); |
3716 | + last_block, true); |
3717 | if (error) |
3718 | goto out; |
3719 | |
3720 | @@ -1701,32 +1701,34 @@ xfs_release( |
3721 | if (xfs_can_free_eofblocks(ip, false)) { |
3722 | |
3723 | /* |
3724 | + * Check if the inode is being opened, written and closed |
3725 | + * frequently and we have delayed allocation blocks outstanding |
3726 | + * (e.g. streaming writes from the NFS server), truncating the |
3727 | + * blocks past EOF will cause fragmentation to occur. |
3728 | + * |
3729 | + * In this case don't do the truncation, but we have to be |
3730 | + * careful how we detect this case. Blocks beyond EOF show up as |
3731 | + * i_delayed_blks even when the inode is clean, so we need to |
3732 | + * truncate them away first before checking for a dirty release. |
3733 | + * Hence on the first dirty close we will still remove the |
3734 | + * speculative allocation, but after that we will leave it in |
3735 | + * place. |
3736 | + */ |
3737 | + if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) |
3738 | + return 0; |
3739 | + /* |
3740 | * If we can't get the iolock just skip truncating the blocks |
3741 | * past EOF because we could deadlock with the mmap_sem |
3742 | - * otherwise. We'll get another chance to drop them once the |
3743 | + * otherwise. We'll get another chance to drop them once the |
3744 | * last reference to the inode is dropped, so we'll never leak |
3745 | * blocks permanently. |
3746 | - * |
3747 | - * Further, check if the inode is being opened, written and |
3748 | - * closed frequently and we have delayed allocation blocks |
3749 | - * outstanding (e.g. streaming writes from the NFS server), |
3750 | - * truncating the blocks past EOF will cause fragmentation to |
3751 | - * occur. |
3752 | - * |
3753 | - * In this case don't do the truncation, either, but we have to |
3754 | - * be careful how we detect this case. Blocks beyond EOF show |
3755 | - * up as i_delayed_blks even when the inode is clean, so we |
3756 | - * need to truncate them away first before checking for a dirty |
3757 | - * release. Hence on the first dirty close we will still remove |
3758 | - * the speculative allocation, but after that we will leave it |
3759 | - * in place. |
3760 | */ |
3761 | - if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) |
3762 | - return 0; |
3763 | - |
3764 | - error = xfs_free_eofblocks(mp, ip, true); |
3765 | - if (error && error != -EAGAIN) |
3766 | - return error; |
3767 | + if (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { |
3768 | + error = xfs_free_eofblocks(ip); |
3769 | + xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
3770 | + if (error) |
3771 | + return error; |
3772 | + } |
3773 | |
3774 | /* delalloc blocks after truncation means it really is dirty */ |
3775 | if (ip->i_delayed_blks) |
3776 | @@ -1801,22 +1803,23 @@ xfs_inactive_ifree( |
3777 | int error; |
3778 | |
3779 | /* |
3780 | - * The ifree transaction might need to allocate blocks for record |
3781 | - * insertion to the finobt. We don't want to fail here at ENOSPC, so |
3782 | - * allow ifree to dip into the reserved block pool if necessary. |
3783 | - * |
3784 | - * Freeing large sets of inodes generally means freeing inode chunks, |
3785 | - * directory and file data blocks, so this should be relatively safe. |
3786 | - * Only under severe circumstances should it be possible to free enough |
3787 | - * inodes to exhaust the reserve block pool via finobt expansion while |
3788 | - * at the same time not creating free space in the filesystem. |
3789 | + * We try to use a per-AG reservation for any block needed by the finobt |
3790 | + * tree, but as the finobt feature predates the per-AG reservation |
3791 | + * support a degraded file system might not have enough space for the |
3792 | + * reservation at mount time. In that case try to dip into the reserved |
3793 | + * pool and pray. |
3794 | * |
3795 | * Send a warning if the reservation does happen to fail, as the inode |
3796 | * now remains allocated and sits on the unlinked list until the fs is |
3797 | * repaired. |
3798 | */ |
3799 | - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, |
3800 | - XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); |
3801 | + if (unlikely(mp->m_inotbt_nores)) { |
3802 | + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, |
3803 | + XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, |
3804 | + &tp); |
3805 | + } else { |
3806 | + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp); |
3807 | + } |
3808 | if (error) { |
3809 | if (error == -ENOSPC) { |
3810 | xfs_warn_ratelimited(mp, |
3811 | @@ -1912,8 +1915,11 @@ xfs_inactive( |
3812 | * cache. Post-eof blocks must be freed, lest we end up with |
3813 | * broken free space accounting. |
3814 | */ |
3815 | - if (xfs_can_free_eofblocks(ip, true)) |
3816 | - xfs_free_eofblocks(mp, ip, false); |
3817 | + if (xfs_can_free_eofblocks(ip, true)) { |
3818 | + xfs_ilock(ip, XFS_IOLOCK_EXCL); |
3819 | + xfs_free_eofblocks(ip); |
3820 | + xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
3821 | + } |
3822 | |
3823 | return; |
3824 | } |
3825 | diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c |
3826 | index e8889614cec3..360562484e7b 100644 |
3827 | --- a/fs/xfs/xfs_iomap.c |
3828 | +++ b/fs/xfs/xfs_iomap.c |
3829 | @@ -637,6 +637,11 @@ xfs_file_iomap_begin_delay( |
3830 | goto out_unlock; |
3831 | } |
3832 | |
3833 | + /* |
3834 | + * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch |
3835 | + * them out if the write happens to fail. |
3836 | + */ |
3837 | + iomap->flags = IOMAP_F_NEW; |
3838 | trace_xfs_iomap_alloc(ip, offset, count, 0, &got); |
3839 | done: |
3840 | if (isnullstartblock(got.br_startblock)) |
3841 | @@ -685,7 +690,7 @@ xfs_iomap_write_allocate( |
3842 | int nres; |
3843 | |
3844 | if (whichfork == XFS_COW_FORK) |
3845 | - flags |= XFS_BMAPI_COWFORK; |
3846 | + flags |= XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC; |
3847 | |
3848 | /* |
3849 | * Make sure that the dquots are there. |
3850 | @@ -1061,7 +1066,8 @@ xfs_file_iomap_end_delalloc( |
3851 | struct xfs_inode *ip, |
3852 | loff_t offset, |
3853 | loff_t length, |
3854 | - ssize_t written) |
3855 | + ssize_t written, |
3856 | + struct iomap *iomap) |
3857 | { |
3858 | struct xfs_mount *mp = ip->i_mount; |
3859 | xfs_fileoff_t start_fsb; |
3860 | @@ -1080,14 +1086,14 @@ xfs_file_iomap_end_delalloc( |
3861 | end_fsb = XFS_B_TO_FSB(mp, offset + length); |
3862 | |
3863 | /* |
3864 | - * Trim back delalloc blocks if we didn't manage to write the whole |
3865 | - * range reserved. |
3866 | + * Trim delalloc blocks if they were allocated by this write and we |
3867 | + * didn't manage to write the whole range. |
3868 | * |
3869 | * We don't need to care about racing delalloc as we hold i_mutex |
3870 | * across the reserve/allocate/unreserve calls. If there are delalloc |
3871 | * blocks in the range, they are ours. |
3872 | */ |
3873 | - if (start_fsb < end_fsb) { |
3874 | + if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) { |
3875 | truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), |
3876 | XFS_FSB_TO_B(mp, end_fsb) - 1); |
3877 | |
3878 | @@ -1117,7 +1123,7 @@ xfs_file_iomap_end( |
3879 | { |
3880 | if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) |
3881 | return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, |
3882 | - length, written); |
3883 | + length, written, iomap); |
3884 | return 0; |
3885 | } |
3886 | |
3887 | diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c |
3888 | index b341f10cf481..13796f212f98 100644 |
3889 | --- a/fs/xfs/xfs_mount.c |
3890 | +++ b/fs/xfs/xfs_mount.c |
3891 | @@ -502,8 +502,7 @@ STATIC void |
3892 | xfs_set_inoalignment(xfs_mount_t *mp) |
3893 | { |
3894 | if (xfs_sb_version_hasalign(&mp->m_sb) && |
3895 | - mp->m_sb.sb_inoalignmt >= |
3896 | - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) |
3897 | + mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) |
3898 | mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; |
3899 | else |
3900 | mp->m_inoalign_mask = 0; |
3901 | diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h |
3902 | index 819b80b15bfb..1bf878b0492c 100644 |
3903 | --- a/fs/xfs/xfs_mount.h |
3904 | +++ b/fs/xfs/xfs_mount.h |
3905 | @@ -140,6 +140,7 @@ typedef struct xfs_mount { |
3906 | int m_fixedfsid[2]; /* unchanged for life of FS */ |
3907 | uint m_dmevmask; /* DMI events for this FS */ |
3908 | __uint64_t m_flags; /* global mount flags */ |
3909 | + bool m_inotbt_nores; /* no per-AG finobt resv. */ |
3910 | int m_ialloc_inos; /* inodes in inode allocation */ |
3911 | int m_ialloc_blks; /* blocks in inode allocation */ |
3912 | int m_ialloc_min_blks;/* min blocks in sparse inode |
3913 | diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c |
3914 | index 4d3f74e3c5e1..2252f163c38f 100644 |
3915 | --- a/fs/xfs/xfs_reflink.c |
3916 | +++ b/fs/xfs/xfs_reflink.c |
3917 | @@ -82,11 +82,22 @@ |
3918 | * mappings are a reservation against the free space in the filesystem; |
3919 | * adjacent mappings can also be combined into fewer larger mappings. |
3920 | * |
3921 | + * As an optimization, the CoW extent size hint (cowextsz) creates |
3922 | + * outsized aligned delalloc reservations in the hope of landing out of |
3923 | + * order nearby CoW writes in a single extent on disk, thereby reducing |
3924 | + * fragmentation and improving future performance. |
3925 | + * |
3926 | + * D: --RRRRRRSSSRRRRRRRR--- (data fork) |
3927 | + * C: ------DDDDDDD--------- (CoW fork) |
3928 | + * |
3929 | * When dirty pages are being written out (typically in writepage), the |
3930 | - * delalloc reservations are converted into real mappings by allocating |
3931 | - * blocks and replacing the delalloc mapping with real ones. A delalloc |
3932 | - * mapping can be replaced by several real ones if the free space is |
3933 | - * fragmented. |
3934 | + * delalloc reservations are converted into unwritten mappings by |
3935 | + * allocating blocks and replacing the delalloc mapping with real ones. |
3936 | + * A delalloc mapping can be replaced by several unwritten ones if the |
3937 | + * free space is fragmented. |
3938 | + * |
3939 | + * D: --RRRRRRSSSRRRRRRRR--- |
3940 | + * C: ------UUUUUUU--------- |
3941 | * |
3942 | * We want to adapt the delalloc mechanism for copy-on-write, since the |
3943 | * write paths are similar. The first two steps (creating the reservation |
3944 | @@ -101,13 +112,29 @@ |
3945 | * Block-aligned directio writes will use the same mechanism as buffered |
3946 | * writes. |
3947 | * |
3948 | + * Just prior to submitting the actual disk write requests, we convert |
3949 | + * the extents representing the range of the file actually being written |
3950 | + * (as opposed to extra pieces created for the cowextsize hint) to real |
3951 | + * extents. This will become important in the next step: |
3952 | + * |
3953 | + * D: --RRRRRRSSSRRRRRRRR--- |
3954 | + * C: ------UUrrUUU--------- |
3955 | + * |
3956 | * CoW remapping must be done after the data block write completes, |
3957 | * because we don't want to destroy the old data fork map until we're sure |
3958 | * the new block has been written. Since the new mappings are kept in a |
3959 | * separate fork, we can simply iterate these mappings to find the ones |
3960 | * that cover the file blocks that we just CoW'd. For each extent, simply |
3961 | * unmap the corresponding range in the data fork, map the new range into |
3962 | - * the data fork, and remove the extent from the CoW fork. |
3963 | + * the data fork, and remove the extent from the CoW fork. Because of |
3964 | + * the presence of the cowextsize hint, however, we must be careful |
3965 | + * only to remap the blocks that we've actually written out -- we must |
3966 | + * never remap delalloc reservations nor CoW staging blocks that have |
3967 | + * yet to be written. This corresponds exactly to the real extents in |
3968 | + * the CoW fork: |
3969 | + * |
3970 | + * D: --RRRRRRrrSRRRRRRRR--- |
3971 | + * C: ------UU--UUU--------- |
3972 | * |
3973 | * Since the remapping operation can be applied to an arbitrary file |
3974 | * range, we record the need for the remap step as a flag in the ioend |
3975 | @@ -296,6 +323,65 @@ xfs_reflink_reserve_cow( |
3976 | return 0; |
3977 | } |
3978 | |
3979 | +/* Convert part of an unwritten CoW extent to a real one. */ |
3980 | +STATIC int |
3981 | +xfs_reflink_convert_cow_extent( |
3982 | + struct xfs_inode *ip, |
3983 | + struct xfs_bmbt_irec *imap, |
3984 | + xfs_fileoff_t offset_fsb, |
3985 | + xfs_filblks_t count_fsb, |
3986 | + struct xfs_defer_ops *dfops) |
3987 | +{ |
3988 | + struct xfs_bmbt_irec irec = *imap; |
3989 | + xfs_fsblock_t first_block; |
3990 | + int nimaps = 1; |
3991 | + |
3992 | + if (imap->br_state == XFS_EXT_NORM) |
3993 | + return 0; |
3994 | + |
3995 | + xfs_trim_extent(&irec, offset_fsb, count_fsb); |
3996 | + trace_xfs_reflink_convert_cow(ip, &irec); |
3997 | + if (irec.br_blockcount == 0) |
3998 | + return 0; |
3999 | + return xfs_bmapi_write(NULL, ip, irec.br_startoff, irec.br_blockcount, |
4000 | + XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block, |
4001 | + 0, &irec, &nimaps, dfops); |
4002 | +} |
4003 | + |
4004 | +/* Convert all of the unwritten CoW extents in a file's range to real ones. */ |
4005 | +int |
4006 | +xfs_reflink_convert_cow( |
4007 | + struct xfs_inode *ip, |
4008 | + xfs_off_t offset, |
4009 | + xfs_off_t count) |
4010 | +{ |
4011 | + struct xfs_bmbt_irec got; |
4012 | + struct xfs_defer_ops dfops; |
4013 | + struct xfs_mount *mp = ip->i_mount; |
4014 | + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); |
4015 | + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); |
4016 | + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); |
4017 | + xfs_extnum_t idx; |
4018 | + bool found; |
4019 | + int error = 0; |
4020 | + |
4021 | + xfs_ilock(ip, XFS_ILOCK_EXCL); |
4022 | + |
4023 | + /* Convert all the extents to real from unwritten. */ |
4024 | + for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); |
4025 | + found && got.br_startoff < end_fsb; |
4026 | + found = xfs_iext_get_extent(ifp, ++idx, &got)) { |
4027 | + error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb, |
4028 | + end_fsb - offset_fsb, &dfops); |
4029 | + if (error) |
4030 | + break; |
4031 | + } |
4032 | + |
4033 | + /* Finish up. */ |
4034 | + xfs_iunlock(ip, XFS_ILOCK_EXCL); |
4035 | + return error; |
4036 | +} |
4037 | + |
4038 | /* Allocate all CoW reservations covering a range of blocks in a file. */ |
4039 | static int |
4040 | __xfs_reflink_allocate_cow( |
4041 | @@ -328,6 +414,7 @@ __xfs_reflink_allocate_cow( |
4042 | goto out_unlock; |
4043 | ASSERT(nimaps == 1); |
4044 | |
4045 | + /* Make sure there's a CoW reservation for it. */ |
4046 | error = xfs_reflink_reserve_cow(ip, &imap, &shared); |
4047 | if (error) |
4048 | goto out_trans_cancel; |
4049 | @@ -337,14 +424,16 @@ __xfs_reflink_allocate_cow( |
4050 | goto out_trans_cancel; |
4051 | } |
4052 | |
4053 | + /* Allocate the entire reservation as unwritten blocks. */ |
4054 | xfs_trans_ijoin(tp, ip, 0); |
4055 | error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount, |
4056 | - XFS_BMAPI_COWFORK, &first_block, |
4057 | + XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block, |
4058 | XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), |
4059 | &imap, &nimaps, &dfops); |
4060 | if (error) |
4061 | goto out_trans_cancel; |
4062 | |
4063 | + /* Finish up. */ |
4064 | error = xfs_defer_finish(&tp, &dfops, NULL); |
4065 | if (error) |
4066 | goto out_trans_cancel; |
4067 | @@ -389,11 +478,12 @@ xfs_reflink_allocate_cow_range( |
4068 | if (error) { |
4069 | trace_xfs_reflink_allocate_cow_range_error(ip, error, |
4070 | _RET_IP_); |
4071 | - break; |
4072 | + return error; |
4073 | } |
4074 | } |
4075 | |
4076 | - return error; |
4077 | + /* Convert the CoW extents to regular. */ |
4078 | + return xfs_reflink_convert_cow(ip, offset, count); |
4079 | } |
4080 | |
4081 | /* |
4082 | @@ -481,14 +571,18 @@ xfs_reflink_trim_irec_to_next_cow( |
4083 | } |
4084 | |
4085 | /* |
4086 | - * Cancel all pending CoW reservations for some block range of an inode. |
4087 | + * Cancel CoW reservations for some block range of an inode. |
4088 | + * |
4089 | + * If cancel_real is true this function cancels all COW fork extents for the |
4090 | + * inode; if cancel_real is false, real extents are not cleared. |
4091 | */ |
4092 | int |
4093 | xfs_reflink_cancel_cow_blocks( |
4094 | struct xfs_inode *ip, |
4095 | struct xfs_trans **tpp, |
4096 | xfs_fileoff_t offset_fsb, |
4097 | - xfs_fileoff_t end_fsb) |
4098 | + xfs_fileoff_t end_fsb, |
4099 | + bool cancel_real) |
4100 | { |
4101 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); |
4102 | struct xfs_bmbt_irec got, prev, del; |
4103 | @@ -515,7 +609,7 @@ xfs_reflink_cancel_cow_blocks( |
4104 | &idx, &got, &del); |
4105 | if (error) |
4106 | break; |
4107 | - } else { |
4108 | + } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { |
4109 | xfs_trans_ijoin(*tpp, ip, 0); |
4110 | xfs_defer_init(&dfops, &firstfsb); |
4111 | |
4112 | @@ -558,13 +652,17 @@ xfs_reflink_cancel_cow_blocks( |
4113 | } |
4114 | |
4115 | /* |
4116 | - * Cancel all pending CoW reservations for some byte range of an inode. |
4117 | + * Cancel CoW reservations for some byte range of an inode. |
4118 | + * |
4119 | + * If cancel_real is true this function cancels all COW fork extents for the |
4120 | + * inode; if cancel_real is false, real extents are not cleared. |
4121 | */ |
4122 | int |
4123 | xfs_reflink_cancel_cow_range( |
4124 | struct xfs_inode *ip, |
4125 | xfs_off_t offset, |
4126 | - xfs_off_t count) |
4127 | + xfs_off_t count, |
4128 | + bool cancel_real) |
4129 | { |
4130 | struct xfs_trans *tp; |
4131 | xfs_fileoff_t offset_fsb; |
4132 | @@ -590,7 +688,8 @@ xfs_reflink_cancel_cow_range( |
4133 | xfs_trans_ijoin(tp, ip, 0); |
4134 | |
4135 | /* Scrape out the old CoW reservations */ |
4136 | - error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb); |
4137 | + error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb, |
4138 | + cancel_real); |
4139 | if (error) |
4140 | goto out_cancel; |
4141 | |
4142 | @@ -669,6 +768,16 @@ xfs_reflink_end_cow( |
4143 | |
4144 | ASSERT(!isnullstartblock(got.br_startblock)); |
4145 | |
4146 | + /* |
4147 | + * Don't remap unwritten extents; these are |
4148 | + * speculatively preallocated CoW extents that have been |
4149 | + * allocated but have not yet been involved in a write. |
4150 | + */ |
4151 | + if (got.br_state == XFS_EXT_UNWRITTEN) { |
4152 | + idx--; |
4153 | + goto next_extent; |
4154 | + } |
4155 | + |
4156 | /* Unmap the old blocks in the data fork. */ |
4157 | xfs_defer_init(&dfops, &firstfsb); |
4158 | rlen = del.br_blockcount; |
4159 | @@ -885,13 +994,14 @@ STATIC int |
4160 | xfs_reflink_update_dest( |
4161 | struct xfs_inode *dest, |
4162 | xfs_off_t newlen, |
4163 | - xfs_extlen_t cowextsize) |
4164 | + xfs_extlen_t cowextsize, |
4165 | + bool is_dedupe) |
4166 | { |
4167 | struct xfs_mount *mp = dest->i_mount; |
4168 | struct xfs_trans *tp; |
4169 | int error; |
4170 | |
4171 | - if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) |
4172 | + if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) |
4173 | return 0; |
4174 | |
4175 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); |
4176 | @@ -912,6 +1022,10 @@ xfs_reflink_update_dest( |
4177 | dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; |
4178 | } |
4179 | |
4180 | + if (!is_dedupe) { |
4181 | + xfs_trans_ichgtime(tp, dest, |
4182 | + XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
4183 | + } |
4184 | xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); |
4185 | |
4186 | error = xfs_trans_commit(tp); |
4187 | @@ -1428,7 +1542,8 @@ xfs_reflink_remap_range( |
4188 | !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) |
4189 | cowextsize = src->i_d.di_cowextsize; |
4190 | |
4191 | - ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize); |
4192 | + ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, |
4193 | + is_dedupe); |
4194 | |
4195 | out_unlock: |
4196 | xfs_iunlock(src, XFS_MMAPLOCK_EXCL); |
4197 | @@ -1580,7 +1695,7 @@ xfs_reflink_clear_inode_flag( |
4198 | * We didn't find any shared blocks so turn off the reflink flag. |
4199 | * First, get rid of any leftover CoW mappings. |
4200 | */ |
4201 | - error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF); |
4202 | + error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true); |
4203 | if (error) |
4204 | return error; |
4205 | |
4206 | diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h |
4207 | index 97ea9b487884..a57966fc7ddd 100644 |
4208 | --- a/fs/xfs/xfs_reflink.h |
4209 | +++ b/fs/xfs/xfs_reflink.h |
4210 | @@ -30,6 +30,8 @@ extern int xfs_reflink_reserve_cow(struct xfs_inode *ip, |
4211 | struct xfs_bmbt_irec *imap, bool *shared); |
4212 | extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, |
4213 | xfs_off_t offset, xfs_off_t count); |
4214 | +extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset, |
4215 | + xfs_off_t count); |
4216 | extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, |
4217 | struct xfs_bmbt_irec *imap, bool *need_alloc); |
4218 | extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, |
4219 | @@ -37,9 +39,9 @@ extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, |
4220 | |
4221 | extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip, |
4222 | struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, |
4223 | - xfs_fileoff_t end_fsb); |
4224 | + xfs_fileoff_t end_fsb, bool cancel_real); |
4225 | extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, |
4226 | - xfs_off_t count); |
4227 | + xfs_off_t count, bool cancel_real); |
4228 | extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, |
4229 | xfs_off_t count); |
4230 | extern int xfs_reflink_recover_cow(struct xfs_mount *mp); |
4231 | diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c |
4232 | index ade4691e3f74..dbbd3f1fd2b7 100644 |
4233 | --- a/fs/xfs/xfs_super.c |
4234 | +++ b/fs/xfs/xfs_super.c |
4235 | @@ -948,7 +948,7 @@ xfs_fs_destroy_inode( |
4236 | XFS_STATS_INC(ip->i_mount, vn_remove); |
4237 | |
4238 | if (xfs_is_reflink_inode(ip)) { |
4239 | - error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); |
4240 | + error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); |
4241 | if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) |
4242 | xfs_warn(ip->i_mount, |
4243 | "Error %d while evicting CoW blocks for inode %llu.", |
4244 | diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h |
4245 | index 0907752be62d..828f383df121 100644 |
4246 | --- a/fs/xfs/xfs_trace.h |
4247 | +++ b/fs/xfs/xfs_trace.h |
4248 | @@ -3183,6 +3183,7 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class, |
4249 | __field(xfs_fileoff_t, lblk) |
4250 | __field(xfs_extlen_t, len) |
4251 | __field(xfs_fsblock_t, pblk) |
4252 | + __field(int, state) |
4253 | ), |
4254 | TP_fast_assign( |
4255 | __entry->dev = VFS_I(ip)->i_sb->s_dev; |
4256 | @@ -3190,13 +3191,15 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class, |
4257 | __entry->lblk = irec->br_startoff; |
4258 | __entry->len = irec->br_blockcount; |
4259 | __entry->pblk = irec->br_startblock; |
4260 | + __entry->state = irec->br_state; |
4261 | ), |
4262 | - TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu", |
4263 | + TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu st %d", |
4264 | MAJOR(__entry->dev), MINOR(__entry->dev), |
4265 | __entry->ino, |
4266 | __entry->lblk, |
4267 | __entry->len, |
4268 | - __entry->pblk) |
4269 | + __entry->pblk, |
4270 | + __entry->state) |
4271 | ); |
4272 | #define DEFINE_INODE_IREC_EVENT(name) \ |
4273 | DEFINE_EVENT(xfs_inode_irec_class, name, \ |
4274 | @@ -3345,11 +3348,12 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared); |
4275 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc); |
4276 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); |
4277 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); |
4278 | +DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow); |
4279 | |
4280 | DEFINE_RW_EVENT(xfs_reflink_reserve_cow); |
4281 | DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range); |
4282 | |
4283 | -DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write); |
4284 | +DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write); |
4285 | DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping); |
4286 | DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec); |
4287 | |
4288 | diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h |
4289 | index 01c0b9cc3915..8c58db2c09c6 100644 |
4290 | --- a/include/linux/kvm_host.h |
4291 | +++ b/include/linux/kvm_host.h |
4292 | @@ -162,8 +162,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, |
4293 | int len, void *val); |
4294 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
4295 | int len, struct kvm_io_device *dev); |
4296 | -int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
4297 | - struct kvm_io_device *dev); |
4298 | +void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
4299 | + struct kvm_io_device *dev); |
4300 | struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
4301 | gpa_t addr); |
4302 | |
4303 | diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h |
4304 | index 254698856b8f..8b35bdbdc214 100644 |
4305 | --- a/include/linux/memcontrol.h |
4306 | +++ b/include/linux/memcontrol.h |
4307 | @@ -739,6 +739,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) |
4308 | return false; |
4309 | } |
4310 | |
4311 | +static inline void mem_cgroup_update_page_stat(struct page *page, |
4312 | + enum mem_cgroup_stat_index idx, |
4313 | + int nr) |
4314 | +{ |
4315 | +} |
4316 | + |
4317 | static inline void mem_cgroup_inc_page_stat(struct page *page, |
4318 | enum mem_cgroup_stat_index idx) |
4319 | { |
4320 | diff --git a/kernel/padata.c b/kernel/padata.c |
4321 | index 7848f0566403..b4a3c0ae649b 100644 |
4322 | --- a/kernel/padata.c |
4323 | +++ b/kernel/padata.c |
4324 | @@ -190,19 +190,20 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) |
4325 | |
4326 | reorder = &next_queue->reorder; |
4327 | |
4328 | + spin_lock(&reorder->lock); |
4329 | if (!list_empty(&reorder->list)) { |
4330 | padata = list_entry(reorder->list.next, |
4331 | struct padata_priv, list); |
4332 | |
4333 | - spin_lock(&reorder->lock); |
4334 | list_del_init(&padata->list); |
4335 | atomic_dec(&pd->reorder_objects); |
4336 | - spin_unlock(&reorder->lock); |
4337 | |
4338 | pd->processed++; |
4339 | |
4340 | + spin_unlock(&reorder->lock); |
4341 | goto out; |
4342 | } |
4343 | + spin_unlock(&reorder->lock); |
4344 | |
4345 | if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) { |
4346 | padata = ERR_PTR(-ENODATA); |
4347 | diff --git a/lib/syscall.c b/lib/syscall.c |
4348 | index 63239e097b13..a72cd0996230 100644 |
4349 | --- a/lib/syscall.c |
4350 | +++ b/lib/syscall.c |
4351 | @@ -11,6 +11,7 @@ static int collect_syscall(struct task_struct *target, long *callno, |
4352 | |
4353 | if (!try_get_task_stack(target)) { |
4354 | /* Task has no stack, so the task isn't in a syscall. */ |
4355 | + *sp = *pc = 0; |
4356 | *callno = -1; |
4357 | return 0; |
4358 | } |
4359 | diff --git a/mm/hugetlb.c b/mm/hugetlb.c |
4360 | index b6adedbafaf5..65c36acf8a6b 100644 |
4361 | --- a/mm/hugetlb.c |
4362 | +++ b/mm/hugetlb.c |
4363 | @@ -4471,6 +4471,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
4364 | { |
4365 | struct page *page = NULL; |
4366 | spinlock_t *ptl; |
4367 | + pte_t pte; |
4368 | retry: |
4369 | ptl = pmd_lockptr(mm, pmd); |
4370 | spin_lock(ptl); |
4371 | @@ -4480,12 +4481,13 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
4372 | */ |
4373 | if (!pmd_huge(*pmd)) |
4374 | goto out; |
4375 | - if (pmd_present(*pmd)) { |
4376 | + pte = huge_ptep_get((pte_t *)pmd); |
4377 | + if (pte_present(pte)) { |
4378 | page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT); |
4379 | if (flags & FOLL_GET) |
4380 | get_page(page); |
4381 | } else { |
4382 | - if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) { |
4383 | + if (is_hugetlb_entry_migration(pte)) { |
4384 | spin_unlock(ptl); |
4385 | __migration_entry_wait(mm, (pte_t *)pmd, ptl); |
4386 | goto retry; |
4387 | diff --git a/mm/rmap.c b/mm/rmap.c |
4388 | index 1ef36404e7b2..cd37c1c7e21b 100644 |
4389 | --- a/mm/rmap.c |
4390 | +++ b/mm/rmap.c |
4391 | @@ -1295,7 +1295,7 @@ void page_add_file_rmap(struct page *page, bool compound) |
4392 | goto out; |
4393 | } |
4394 | __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr); |
4395 | - mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); |
4396 | + mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, nr); |
4397 | out: |
4398 | unlock_page_memcg(page); |
4399 | } |
4400 | @@ -1335,7 +1335,7 @@ static void page_remove_file_rmap(struct page *page, bool compound) |
4401 | * pte lock(a spinlock) is held, which implies preemption disabled. |
4402 | */ |
4403 | __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr); |
4404 | - mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); |
4405 | + mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, -nr); |
4406 | |
4407 | if (unlikely(PageMlocked(page))) |
4408 | clear_page_mlock(page); |
4409 | diff --git a/mm/workingset.c b/mm/workingset.c |
4410 | index 33f6f4db32fd..4c4f05655e6e 100644 |
4411 | --- a/mm/workingset.c |
4412 | +++ b/mm/workingset.c |
4413 | @@ -492,7 +492,7 @@ static int __init workingset_init(void) |
4414 | pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", |
4415 | timestamp_bits, max_order, bucket_order); |
4416 | |
4417 | - ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key); |
4418 | + ret = __list_lru_init(&workingset_shadow_nodes, true, &shadow_nodes_key); |
4419 | if (ret) |
4420 | goto err; |
4421 | ret = register_shrinker(&workingset_shadow_shrinker); |
4422 | diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c |
4423 | index 2efb335deada..25a30be862e9 100644 |
4424 | --- a/net/ceph/messenger.c |
4425 | +++ b/net/ceph/messenger.c |
4426 | @@ -7,6 +7,7 @@ |
4427 | #include <linux/kthread.h> |
4428 | #include <linux/net.h> |
4429 | #include <linux/nsproxy.h> |
4430 | +#include <linux/sched.h> |
4431 | #include <linux/slab.h> |
4432 | #include <linux/socket.h> |
4433 | #include <linux/string.h> |
4434 | @@ -469,11 +470,16 @@ static int ceph_tcp_connect(struct ceph_connection *con) |
4435 | { |
4436 | struct sockaddr_storage *paddr = &con->peer_addr.in_addr; |
4437 | struct socket *sock; |
4438 | + unsigned int noio_flag; |
4439 | int ret; |
4440 | |
4441 | BUG_ON(con->sock); |
4442 | + |
4443 | + /* sock_create_kern() allocates with GFP_KERNEL */ |
4444 | + noio_flag = memalloc_noio_save(); |
4445 | ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, |
4446 | SOCK_STREAM, IPPROTO_TCP, &sock); |
4447 | + memalloc_noio_restore(noio_flag); |
4448 | if (ret) |
4449 | return ret; |
4450 | sock->sk->sk_allocation = GFP_NOFS; |
4451 | diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c |
4452 | index 3f4efcb85df5..3490d21ab9e7 100644 |
4453 | --- a/sound/core/seq/seq_fifo.c |
4454 | +++ b/sound/core/seq/seq_fifo.c |
4455 | @@ -265,6 +265,10 @@ int snd_seq_fifo_resize(struct snd_seq_fifo *f, int poolsize) |
4456 | /* NOTE: overflow flag is not cleared */ |
4457 | spin_unlock_irqrestore(&f->lock, flags); |
4458 | |
4459 | + /* close the old pool and wait until all users are gone */ |
4460 | + snd_seq_pool_mark_closing(oldpool); |
4461 | + snd_use_lock_sync(&f->use_lock); |
4462 | + |
4463 | /* release cells in old pool */ |
4464 | for (cell = oldhead; cell; cell = next) { |
4465 | next = cell->next; |
4466 | diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c |
4467 | index 112caa2d3c14..bb1aad39d987 100644 |
4468 | --- a/sound/pci/hda/patch_realtek.c |
4469 | +++ b/sound/pci/hda/patch_realtek.c |
4470 | @@ -4846,6 +4846,7 @@ enum { |
4471 | ALC292_FIXUP_DISABLE_AAMIX, |
4472 | ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK, |
4473 | ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, |
4474 | + ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE, |
4475 | ALC275_FIXUP_DELL_XPS, |
4476 | ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE, |
4477 | ALC293_FIXUP_LENOVO_SPK_NOISE, |
4478 | @@ -5446,6 +5447,15 @@ static const struct hda_fixup alc269_fixups[] = { |
4479 | .chained = true, |
4480 | .chain_id = ALC269_FIXUP_HEADSET_MODE |
4481 | }, |
4482 | + [ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE] = { |
4483 | + .type = HDA_FIXUP_PINS, |
4484 | + .v.pins = (const struct hda_pintbl[]) { |
4485 | + { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */ |
4486 | + { } |
4487 | + }, |
4488 | + .chained = true, |
4489 | + .chain_id = ALC269_FIXUP_HEADSET_MODE |
4490 | + }, |
4491 | [ALC275_FIXUP_DELL_XPS] = { |
4492 | .type = HDA_FIXUP_VERBS, |
4493 | .v.verbs = (const struct hda_verb[]) { |
4494 | @@ -5518,7 +5528,7 @@ static const struct hda_fixup alc269_fixups[] = { |
4495 | .type = HDA_FIXUP_FUNC, |
4496 | .v.func = alc298_fixup_speaker_volume, |
4497 | .chained = true, |
4498 | - .chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, |
4499 | + .chain_id = ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE, |
4500 | }, |
4501 | [ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = { |
4502 | .type = HDA_FIXUP_PINS, |
4503 | diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c |
4504 | index 89ac5f5a93eb..7ae46c2647d4 100644 |
4505 | --- a/sound/soc/atmel/atmel-classd.c |
4506 | +++ b/sound/soc/atmel/atmel-classd.c |
4507 | @@ -349,7 +349,7 @@ static int atmel_classd_codec_dai_digital_mute(struct snd_soc_dai *codec_dai, |
4508 | } |
4509 | |
4510 | #define CLASSD_ACLK_RATE_11M2896_MPY_8 (112896 * 100 * 8) |
4511 | -#define CLASSD_ACLK_RATE_12M288_MPY_8 (12228 * 1000 * 8) |
4512 | +#define CLASSD_ACLK_RATE_12M288_MPY_8 (12288 * 1000 * 8) |
4513 | |
4514 | static struct { |
4515 | int rate; |
4516 | diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c |
4517 | index b5b1934d8550..bef8a4546c12 100644 |
4518 | --- a/sound/soc/intel/skylake/skl-topology.c |
4519 | +++ b/sound/soc/intel/skylake/skl-topology.c |
4520 | @@ -448,7 +448,7 @@ static int skl_tplg_set_module_init_data(struct snd_soc_dapm_widget *w) |
4521 | if (bc->set_params != SKL_PARAM_INIT) |
4522 | continue; |
4523 | |
4524 | - mconfig->formats_config.caps = (u32 *)&bc->params; |
4525 | + mconfig->formats_config.caps = (u32 *)bc->params; |
4526 | mconfig->formats_config.caps_size = bc->size; |
4527 | |
4528 | break; |
4529 | diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c |
4530 | index a29786dd9522..4d28a9ddbee0 100644 |
4531 | --- a/virt/kvm/eventfd.c |
4532 | +++ b/virt/kvm/eventfd.c |
4533 | @@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, |
4534 | continue; |
4535 | |
4536 | kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); |
4537 | - kvm->buses[bus_idx]->ioeventfd_count--; |
4538 | + if (kvm->buses[bus_idx]) |
4539 | + kvm->buses[bus_idx]->ioeventfd_count--; |
4540 | ioeventfd_release(p); |
4541 | ret = 0; |
4542 | break; |
4543 | diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c |
4544 | index 7f9ee2929cfe..f4c6d4f6d2e8 100644 |
4545 | --- a/virt/kvm/kvm_main.c |
4546 | +++ b/virt/kvm/kvm_main.c |
4547 | @@ -720,8 +720,11 @@ static void kvm_destroy_vm(struct kvm *kvm) |
4548 | list_del(&kvm->vm_list); |
4549 | spin_unlock(&kvm_lock); |
4550 | kvm_free_irq_routing(kvm); |
4551 | - for (i = 0; i < KVM_NR_BUSES; i++) |
4552 | - kvm_io_bus_destroy(kvm->buses[i]); |
4553 | + for (i = 0; i < KVM_NR_BUSES; i++) { |
4554 | + if (kvm->buses[i]) |
4555 | + kvm_io_bus_destroy(kvm->buses[i]); |
4556 | + kvm->buses[i] = NULL; |
4557 | + } |
4558 | kvm_coalesced_mmio_free(kvm); |
4559 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) |
4560 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); |
4561 | @@ -3463,6 +3466,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, |
4562 | }; |
4563 | |
4564 | bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); |
4565 | + if (!bus) |
4566 | + return -ENOMEM; |
4567 | r = __kvm_io_bus_write(vcpu, bus, &range, val); |
4568 | return r < 0 ? r : 0; |
4569 | } |
4570 | @@ -3480,6 +3485,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, |
4571 | }; |
4572 | |
4573 | bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); |
4574 | + if (!bus) |
4575 | + return -ENOMEM; |
4576 | |
4577 | /* First try the device referenced by cookie. */ |
4578 | if ((cookie >= 0) && (cookie < bus->dev_count) && |
4579 | @@ -3530,6 +3537,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, |
4580 | }; |
4581 | |
4582 | bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); |
4583 | + if (!bus) |
4584 | + return -ENOMEM; |
4585 | r = __kvm_io_bus_read(vcpu, bus, &range, val); |
4586 | return r < 0 ? r : 0; |
4587 | } |
4588 | @@ -3542,6 +3551,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
4589 | struct kvm_io_bus *new_bus, *bus; |
4590 | |
4591 | bus = kvm->buses[bus_idx]; |
4592 | + if (!bus) |
4593 | + return -ENOMEM; |
4594 | + |
4595 | /* exclude ioeventfd which is limited by maximum fd */ |
4596 | if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) |
4597 | return -ENOSPC; |
4598 | @@ -3561,37 +3573,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
4599 | } |
4600 | |
4601 | /* Caller must hold slots_lock. */ |
4602 | -int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
4603 | - struct kvm_io_device *dev) |
4604 | +void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
4605 | + struct kvm_io_device *dev) |
4606 | { |
4607 | - int i, r; |
4608 | + int i; |
4609 | struct kvm_io_bus *new_bus, *bus; |
4610 | |
4611 | bus = kvm->buses[bus_idx]; |
4612 | - r = -ENOENT; |
4613 | + if (!bus) |
4614 | + return; |
4615 | + |
4616 | for (i = 0; i < bus->dev_count; i++) |
4617 | if (bus->range[i].dev == dev) { |
4618 | - r = 0; |
4619 | break; |
4620 | } |
4621 | |
4622 | - if (r) |
4623 | - return r; |
4624 | + if (i == bus->dev_count) |
4625 | + return; |
4626 | |
4627 | new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * |
4628 | sizeof(struct kvm_io_range)), GFP_KERNEL); |
4629 | - if (!new_bus) |
4630 | - return -ENOMEM; |
4631 | + if (!new_bus) { |
4632 | + pr_err("kvm: failed to shrink bus, removing it completely\n"); |
4633 | + goto broken; |
4634 | + } |
4635 | |
4636 | memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); |
4637 | new_bus->dev_count--; |
4638 | memcpy(new_bus->range + i, bus->range + i + 1, |
4639 | (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); |
4640 | |
4641 | +broken: |
4642 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); |
4643 | synchronize_srcu_expedited(&kvm->srcu); |
4644 | kfree(bus); |
4645 | - return r; |
4646 | + return; |
4647 | } |
4648 | |
4649 | struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
4650 | @@ -3604,6 +3620,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
4651 | srcu_idx = srcu_read_lock(&kvm->srcu); |
4652 | |
4653 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); |
4654 | + if (!bus) |
4655 | + goto out_unlock; |
4656 | |
4657 | dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1); |
4658 | if (dev_idx < 0) |