Magellan Linux

Contents of /trunk/kernel-alx/patches-4.19/0163-4.19.64-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3442 - (show annotations) (download)
Mon Aug 5 07:52:44 2019 UTC (4 years, 8 months ago) by niro
File size: 78299 byte(s)
-linux-4.19.64
1 diff --git a/Makefile b/Makefile
2 index 8ad77a93de30..203d9e80a315 100644
3 --- a/Makefile
4 +++ b/Makefile
5 @@ -1,7 +1,7 @@
6 # SPDX-License-Identifier: GPL-2.0
7 VERSION = 4
8 PATCHLEVEL = 19
9 -SUBLEVEL = 63
10 +SUBLEVEL = 64
11 EXTRAVERSION =
12 NAME = "People's Front"
13
14 diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
15 index 1a037b94eba1..cee28a05ee98 100644
16 --- a/arch/arm64/include/asm/compat.h
17 +++ b/arch/arm64/include/asm/compat.h
18 @@ -159,6 +159,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
19 }
20
21 #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current)))
22 +#define COMPAT_MINSIGSTKSZ 2048
23
24 static inline void __user *arch_compat_alloc_user_space(long len)
25 {
26 diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
27 index 6394b4f0a69b..f42feab25dcf 100644
28 --- a/arch/sh/boards/Kconfig
29 +++ b/arch/sh/boards/Kconfig
30 @@ -8,27 +8,19 @@ config SH_ALPHA_BOARD
31 bool
32
33 config SH_DEVICE_TREE
34 - bool "Board Described by Device Tree"
35 + bool
36 select OF
37 select OF_EARLY_FLATTREE
38 select TIMER_OF
39 select COMMON_CLK
40 select GENERIC_CALIBRATE_DELAY
41 - help
42 - Select Board Described by Device Tree to build a kernel that
43 - does not hard-code any board-specific knowledge but instead uses
44 - a device tree blob provided by the boot-loader. You must enable
45 - drivers for any hardware you want to use separately. At this
46 - time, only boards based on the open-hardware J-Core processors
47 - have sufficient driver coverage to use this option; do not
48 - select it if you are using original SuperH hardware.
49
50 config SH_JCORE_SOC
51 bool "J-Core SoC"
52 - depends on SH_DEVICE_TREE && (CPU_SH2 || CPU_J2)
53 + select SH_DEVICE_TREE
54 select CLKSRC_JCORE_PIT
55 select JCORE_AIC
56 - default y if CPU_J2
57 + depends on CPU_J2
58 help
59 Select this option to include drivers core components of the
60 J-Core SoC, including interrupt controllers and timers.
61 diff --git a/block/blk-core.c b/block/blk-core.c
62 index 9ca703bcfe3b..4a3e1f417880 100644
63 --- a/block/blk-core.c
64 +++ b/block/blk-core.c
65 @@ -421,24 +421,25 @@ void blk_sync_queue(struct request_queue *q)
66 EXPORT_SYMBOL(blk_sync_queue);
67
68 /**
69 - * blk_set_preempt_only - set QUEUE_FLAG_PREEMPT_ONLY
70 + * blk_set_pm_only - increment pm_only counter
71 * @q: request queue pointer
72 - *
73 - * Returns the previous value of the PREEMPT_ONLY flag - 0 if the flag was not
74 - * set and 1 if the flag was already set.
75 */
76 -int blk_set_preempt_only(struct request_queue *q)
77 +void blk_set_pm_only(struct request_queue *q)
78 {
79 - return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
80 + atomic_inc(&q->pm_only);
81 }
82 -EXPORT_SYMBOL_GPL(blk_set_preempt_only);
83 +EXPORT_SYMBOL_GPL(blk_set_pm_only);
84
85 -void blk_clear_preempt_only(struct request_queue *q)
86 +void blk_clear_pm_only(struct request_queue *q)
87 {
88 - blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
89 - wake_up_all(&q->mq_freeze_wq);
90 + int pm_only;
91 +
92 + pm_only = atomic_dec_return(&q->pm_only);
93 + WARN_ON_ONCE(pm_only < 0);
94 + if (pm_only == 0)
95 + wake_up_all(&q->mq_freeze_wq);
96 }
97 -EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
98 +EXPORT_SYMBOL_GPL(blk_clear_pm_only);
99
100 /**
101 * __blk_run_queue_uncond - run a queue whether or not it has been stopped
102 @@ -916,7 +917,7 @@ EXPORT_SYMBOL(blk_alloc_queue);
103 */
104 int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
105 {
106 - const bool preempt = flags & BLK_MQ_REQ_PREEMPT;
107 + const bool pm = flags & BLK_MQ_REQ_PREEMPT;
108
109 while (true) {
110 bool success = false;
111 @@ -924,11 +925,11 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
112 rcu_read_lock();
113 if (percpu_ref_tryget_live(&q->q_usage_counter)) {
114 /*
115 - * The code that sets the PREEMPT_ONLY flag is
116 - * responsible for ensuring that that flag is globally
117 - * visible before the queue is unfrozen.
118 + * The code that increments the pm_only counter is
119 + * responsible for ensuring that that counter is
120 + * globally visible before the queue is unfrozen.
121 */
122 - if (preempt || !blk_queue_preempt_only(q)) {
123 + if (pm || !blk_queue_pm_only(q)) {
124 success = true;
125 } else {
126 percpu_ref_put(&q->q_usage_counter);
127 @@ -953,7 +954,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
128
129 wait_event(q->mq_freeze_wq,
130 (atomic_read(&q->mq_freeze_depth) == 0 &&
131 - (preempt || !blk_queue_preempt_only(q))) ||
132 + (pm || !blk_queue_pm_only(q))) ||
133 blk_queue_dying(q));
134 if (blk_queue_dying(q))
135 return -ENODEV;
136 diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
137 index cb1e6cf7ac48..a5ea86835fcb 100644
138 --- a/block/blk-mq-debugfs.c
139 +++ b/block/blk-mq-debugfs.c
140 @@ -102,6 +102,14 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags,
141 return 0;
142 }
143
144 +static int queue_pm_only_show(void *data, struct seq_file *m)
145 +{
146 + struct request_queue *q = data;
147 +
148 + seq_printf(m, "%d\n", atomic_read(&q->pm_only));
149 + return 0;
150 +}
151 +
152 #define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name
153 static const char *const blk_queue_flag_name[] = {
154 QUEUE_FLAG_NAME(QUEUED),
155 @@ -132,7 +140,6 @@ static const char *const blk_queue_flag_name[] = {
156 QUEUE_FLAG_NAME(REGISTERED),
157 QUEUE_FLAG_NAME(SCSI_PASSTHROUGH),
158 QUEUE_FLAG_NAME(QUIESCED),
159 - QUEUE_FLAG_NAME(PREEMPT_ONLY),
160 };
161 #undef QUEUE_FLAG_NAME
162
163 @@ -209,6 +216,7 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf,
164 static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
165 { "poll_stat", 0400, queue_poll_stat_show },
166 { "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops },
167 + { "pm_only", 0600, queue_pm_only_show, NULL },
168 { "state", 0600, queue_state_show, queue_state_write },
169 { "write_hints", 0600, queue_write_hint_show, queue_write_hint_store },
170 { "zone_wlock", 0400, queue_zone_wlock_show, NULL },
171 diff --git a/drivers/android/binder.c b/drivers/android/binder.c
172 index 1e0e438f079f..6e04e7a707a1 100644
173 --- a/drivers/android/binder.c
174 +++ b/drivers/android/binder.c
175 @@ -1960,8 +1960,18 @@ static struct binder_thread *binder_get_txn_from_and_acq_inner(
176
177 static void binder_free_transaction(struct binder_transaction *t)
178 {
179 - if (t->buffer)
180 - t->buffer->transaction = NULL;
181 + struct binder_proc *target_proc = t->to_proc;
182 +
183 + if (target_proc) {
184 + binder_inner_proc_lock(target_proc);
185 + if (t->buffer)
186 + t->buffer->transaction = NULL;
187 + binder_inner_proc_unlock(target_proc);
188 + }
189 + /*
190 + * If the transaction has no target_proc, then
191 + * t->buffer->transaction has already been cleared.
192 + */
193 kfree(t);
194 binder_stats_deleted(BINDER_STAT_TRANSACTION);
195 }
196 @@ -3484,10 +3494,12 @@ static int binder_thread_write(struct binder_proc *proc,
197 buffer->debug_id,
198 buffer->transaction ? "active" : "finished");
199
200 + binder_inner_proc_lock(proc);
201 if (buffer->transaction) {
202 buffer->transaction->buffer = NULL;
203 buffer->transaction = NULL;
204 }
205 + binder_inner_proc_unlock(proc);
206 if (buffer->async_transaction && buffer->target_node) {
207 struct binder_node *buf_node;
208 struct binder_work *w;
209 diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c
210 index d568fbd94d6c..20235925344d 100644
211 --- a/drivers/bluetooth/hci_ath.c
212 +++ b/drivers/bluetooth/hci_ath.c
213 @@ -112,6 +112,9 @@ static int ath_open(struct hci_uart *hu)
214
215 BT_DBG("hu %p", hu);
216
217 + if (!hci_uart_has_flow_control(hu))
218 + return -EOPNOTSUPP;
219 +
220 ath = kzalloc(sizeof(*ath), GFP_KERNEL);
221 if (!ath)
222 return -ENOMEM;
223 diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
224 index 800132369134..aa6b7ed9fdf1 100644
225 --- a/drivers/bluetooth/hci_bcm.c
226 +++ b/drivers/bluetooth/hci_bcm.c
227 @@ -369,6 +369,9 @@ static int bcm_open(struct hci_uart *hu)
228
229 bt_dev_dbg(hu->hdev, "hu %p", hu);
230
231 + if (!hci_uart_has_flow_control(hu))
232 + return -EOPNOTSUPP;
233 +
234 bcm = kzalloc(sizeof(*bcm), GFP_KERNEL);
235 if (!bcm)
236 return -ENOMEM;
237 diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c
238 index 46ace321bf60..e9228520e4c7 100644
239 --- a/drivers/bluetooth/hci_intel.c
240 +++ b/drivers/bluetooth/hci_intel.c
241 @@ -406,6 +406,9 @@ static int intel_open(struct hci_uart *hu)
242
243 BT_DBG("hu %p", hu);
244
245 + if (!hci_uart_has_flow_control(hu))
246 + return -EOPNOTSUPP;
247 +
248 intel = kzalloc(sizeof(*intel), GFP_KERNEL);
249 if (!intel)
250 return -ENOMEM;
251 diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
252 index c915daf01a89..efeb8137ec67 100644
253 --- a/drivers/bluetooth/hci_ldisc.c
254 +++ b/drivers/bluetooth/hci_ldisc.c
255 @@ -299,6 +299,19 @@ static int hci_uart_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
256 return 0;
257 }
258
259 +/* Check the underlying device or tty has flow control support */
260 +bool hci_uart_has_flow_control(struct hci_uart *hu)
261 +{
262 + /* serdev nodes check if the needed operations are present */
263 + if (hu->serdev)
264 + return true;
265 +
266 + if (hu->tty->driver->ops->tiocmget && hu->tty->driver->ops->tiocmset)
267 + return true;
268 +
269 + return false;
270 +}
271 +
272 /* Flow control or un-flow control the device */
273 void hci_uart_set_flow_control(struct hci_uart *hu, bool enable)
274 {
275 diff --git a/drivers/bluetooth/hci_mrvl.c b/drivers/bluetooth/hci_mrvl.c
276 index ffb00669346f..23791df081ba 100644
277 --- a/drivers/bluetooth/hci_mrvl.c
278 +++ b/drivers/bluetooth/hci_mrvl.c
279 @@ -66,6 +66,9 @@ static int mrvl_open(struct hci_uart *hu)
280
281 BT_DBG("hu %p", hu);
282
283 + if (!hci_uart_has_flow_control(hu))
284 + return -EOPNOTSUPP;
285 +
286 mrvl = kzalloc(sizeof(*mrvl), GFP_KERNEL);
287 if (!mrvl)
288 return -ENOMEM;
289 diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
290 index 77004c29da08..f96e58de049b 100644
291 --- a/drivers/bluetooth/hci_qca.c
292 +++ b/drivers/bluetooth/hci_qca.c
293 @@ -450,6 +450,9 @@ static int qca_open(struct hci_uart *hu)
294
295 BT_DBG("hu %p qca_open", hu);
296
297 + if (!hci_uart_has_flow_control(hu))
298 + return -EOPNOTSUPP;
299 +
300 qca = kzalloc(sizeof(struct qca_data), GFP_KERNEL);
301 if (!qca)
302 return -ENOMEM;
303 diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h
304 index 00cab2fd7a1b..067a610f1372 100644
305 --- a/drivers/bluetooth/hci_uart.h
306 +++ b/drivers/bluetooth/hci_uart.h
307 @@ -118,6 +118,7 @@ int hci_uart_tx_wakeup(struct hci_uart *hu);
308 int hci_uart_init_ready(struct hci_uart *hu);
309 void hci_uart_init_work(struct work_struct *work);
310 void hci_uart_set_baudrate(struct hci_uart *hu, unsigned int speed);
311 +bool hci_uart_has_flow_control(struct hci_uart *hu);
312 void hci_uart_set_flow_control(struct hci_uart *hu, bool enable);
313 void hci_uart_set_speeds(struct hci_uart *hu, unsigned int init_speed,
314 unsigned int oper_speed);
315 diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
316 index c1439019dd12..b9af2419006f 100644
317 --- a/drivers/iommu/intel-iommu.c
318 +++ b/drivers/iommu/intel-iommu.c
319 @@ -3721,7 +3721,7 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
320
321 freelist = domain_unmap(domain, start_pfn, last_pfn);
322
323 - if (intel_iommu_strict) {
324 + if (intel_iommu_strict || !has_iova_flush_queue(&domain->iovad)) {
325 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
326 nrpages, !freelist, 0);
327 /* free iova */
328 diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
329 index 83fe2621effe..60348d707b99 100644
330 --- a/drivers/iommu/iova.c
331 +++ b/drivers/iommu/iova.c
332 @@ -65,9 +65,14 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
333 }
334 EXPORT_SYMBOL_GPL(init_iova_domain);
335
336 +bool has_iova_flush_queue(struct iova_domain *iovad)
337 +{
338 + return !!iovad->fq;
339 +}
340 +
341 static void free_iova_flush_queue(struct iova_domain *iovad)
342 {
343 - if (!iovad->fq)
344 + if (!has_iova_flush_queue(iovad))
345 return;
346
347 if (timer_pending(&iovad->fq_timer))
348 @@ -85,13 +90,14 @@ static void free_iova_flush_queue(struct iova_domain *iovad)
349 int init_iova_flush_queue(struct iova_domain *iovad,
350 iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
351 {
352 + struct iova_fq __percpu *queue;
353 int cpu;
354
355 atomic64_set(&iovad->fq_flush_start_cnt, 0);
356 atomic64_set(&iovad->fq_flush_finish_cnt, 0);
357
358 - iovad->fq = alloc_percpu(struct iova_fq);
359 - if (!iovad->fq)
360 + queue = alloc_percpu(struct iova_fq);
361 + if (!queue)
362 return -ENOMEM;
363
364 iovad->flush_cb = flush_cb;
365 @@ -100,13 +106,17 @@ int init_iova_flush_queue(struct iova_domain *iovad,
366 for_each_possible_cpu(cpu) {
367 struct iova_fq *fq;
368
369 - fq = per_cpu_ptr(iovad->fq, cpu);
370 + fq = per_cpu_ptr(queue, cpu);
371 fq->head = 0;
372 fq->tail = 0;
373
374 spin_lock_init(&fq->lock);
375 }
376
377 + smp_wmb();
378 +
379 + iovad->fq = queue;
380 +
381 timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
382 atomic_set(&iovad->fq_timer_on, 0);
383
384 diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c
385 index 6d05946b445e..060dc7fd66c1 100644
386 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c
387 +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c
388 @@ -1967,6 +1967,9 @@ hfcsusb_probe(struct usb_interface *intf, const struct usb_device_id *id)
389
390 /* get endpoint base */
391 idx = ((ep_addr & 0x7f) - 1) * 2;
392 + if (idx > 15)
393 + return -EIO;
394 +
395 if (ep_addr & 0x80)
396 idx++;
397 attr = ep->desc.bmAttributes;
398 diff --git a/drivers/media/radio/radio-raremono.c b/drivers/media/radio/radio-raremono.c
399 index 9a5079d64c4a..729600c4a056 100644
400 --- a/drivers/media/radio/radio-raremono.c
401 +++ b/drivers/media/radio/radio-raremono.c
402 @@ -271,6 +271,14 @@ static int vidioc_g_frequency(struct file *file, void *priv,
403 return 0;
404 }
405
406 +static void raremono_device_release(struct v4l2_device *v4l2_dev)
407 +{
408 + struct raremono_device *radio = to_raremono_dev(v4l2_dev);
409 +
410 + kfree(radio->buffer);
411 + kfree(radio);
412 +}
413 +
414 /* File system interface */
415 static const struct v4l2_file_operations usb_raremono_fops = {
416 .owner = THIS_MODULE,
417 @@ -295,12 +303,14 @@ static int usb_raremono_probe(struct usb_interface *intf,
418 struct raremono_device *radio;
419 int retval = 0;
420
421 - radio = devm_kzalloc(&intf->dev, sizeof(struct raremono_device), GFP_KERNEL);
422 - if (radio)
423 - radio->buffer = devm_kmalloc(&intf->dev, BUFFER_LENGTH, GFP_KERNEL);
424 -
425 - if (!radio || !radio->buffer)
426 + radio = kzalloc(sizeof(*radio), GFP_KERNEL);
427 + if (!radio)
428 + return -ENOMEM;
429 + radio->buffer = kmalloc(BUFFER_LENGTH, GFP_KERNEL);
430 + if (!radio->buffer) {
431 + kfree(radio);
432 return -ENOMEM;
433 + }
434
435 radio->usbdev = interface_to_usbdev(intf);
436 radio->intf = intf;
437 @@ -324,7 +334,8 @@ static int usb_raremono_probe(struct usb_interface *intf,
438 if (retval != 3 ||
439 (get_unaligned_be16(&radio->buffer[1]) & 0xfff) == 0x0242) {
440 dev_info(&intf->dev, "this is not Thanko's Raremono.\n");
441 - return -ENODEV;
442 + retval = -ENODEV;
443 + goto free_mem;
444 }
445
446 dev_info(&intf->dev, "Thanko's Raremono connected: (%04X:%04X)\n",
447 @@ -333,7 +344,7 @@ static int usb_raremono_probe(struct usb_interface *intf,
448 retval = v4l2_device_register(&intf->dev, &radio->v4l2_dev);
449 if (retval < 0) {
450 dev_err(&intf->dev, "couldn't register v4l2_device\n");
451 - return retval;
452 + goto free_mem;
453 }
454
455 mutex_init(&radio->lock);
456 @@ -345,6 +356,7 @@ static int usb_raremono_probe(struct usb_interface *intf,
457 radio->vdev.ioctl_ops = &usb_raremono_ioctl_ops;
458 radio->vdev.lock = &radio->lock;
459 radio->vdev.release = video_device_release_empty;
460 + radio->v4l2_dev.release = raremono_device_release;
461
462 usb_set_intfdata(intf, &radio->v4l2_dev);
463
464 @@ -360,6 +372,10 @@ static int usb_raremono_probe(struct usb_interface *intf,
465 }
466 dev_err(&intf->dev, "could not register video device\n");
467 v4l2_device_unregister(&radio->v4l2_dev);
468 +
469 +free_mem:
470 + kfree(radio->buffer);
471 + kfree(radio);
472 return retval;
473 }
474
475 diff --git a/drivers/media/usb/au0828/au0828-core.c b/drivers/media/usb/au0828/au0828-core.c
476 index 257ae0d8cfe2..e3f63299f85c 100644
477 --- a/drivers/media/usb/au0828/au0828-core.c
478 +++ b/drivers/media/usb/au0828/au0828-core.c
479 @@ -623,6 +623,12 @@ static int au0828_usb_probe(struct usb_interface *interface,
480 /* Setup */
481 au0828_card_setup(dev);
482
483 + /*
484 + * Store the pointer to the au0828_dev so it can be accessed in
485 + * au0828_usb_disconnect
486 + */
487 + usb_set_intfdata(interface, dev);
488 +
489 /* Analog TV */
490 retval = au0828_analog_register(dev, interface);
491 if (retval) {
492 @@ -641,12 +647,6 @@ static int au0828_usb_probe(struct usb_interface *interface,
493 /* Remote controller */
494 au0828_rc_register(dev);
495
496 - /*
497 - * Store the pointer to the au0828_dev so it can be accessed in
498 - * au0828_usb_disconnect
499 - */
500 - usb_set_intfdata(interface, dev);
501 -
502 pr_info("Registered device AU0828 [%s]\n",
503 dev->board.name == NULL ? "Unset" : dev->board.name);
504
505 diff --git a/drivers/media/usb/cpia2/cpia2_usb.c b/drivers/media/usb/cpia2/cpia2_usb.c
506 index a771e0a52610..f5b04594e209 100644
507 --- a/drivers/media/usb/cpia2/cpia2_usb.c
508 +++ b/drivers/media/usb/cpia2/cpia2_usb.c
509 @@ -902,7 +902,6 @@ static void cpia2_usb_disconnect(struct usb_interface *intf)
510 cpia2_unregister_camera(cam);
511 v4l2_device_disconnect(&cam->v4l2_dev);
512 mutex_unlock(&cam->v4l2_lock);
513 - v4l2_device_put(&cam->v4l2_dev);
514
515 if(cam->buffers) {
516 DBG("Wakeup waiting processes\n");
517 @@ -911,6 +910,8 @@ static void cpia2_usb_disconnect(struct usb_interface *intf)
518 wake_up_interruptible(&cam->wq_stream);
519 }
520
521 + v4l2_device_put(&cam->v4l2_dev);
522 +
523 LOG("CPiA2 camera disconnected.\n");
524 }
525
526 diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
527 index 673fdca8d2da..fcb201a40920 100644
528 --- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
529 +++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
530 @@ -1680,7 +1680,7 @@ static int pvr2_decoder_enable(struct pvr2_hdw *hdw,int enablefl)
531 }
532 if (!hdw->flag_decoder_missed) {
533 pvr2_trace(PVR2_TRACE_ERROR_LEGS,
534 - "WARNING: No decoder present");
535 + "***WARNING*** No decoder present");
536 hdw->flag_decoder_missed = !0;
537 trace_stbit("flag_decoder_missed",
538 hdw->flag_decoder_missed);
539 @@ -2366,7 +2366,7 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf,
540 if (hdw_desc->flag_is_experimental) {
541 pvr2_trace(PVR2_TRACE_INFO, "**********");
542 pvr2_trace(PVR2_TRACE_INFO,
543 - "WARNING: Support for this device (%s) is experimental.",
544 + "***WARNING*** Support for this device (%s) is experimental.",
545 hdw_desc->description);
546 pvr2_trace(PVR2_TRACE_INFO,
547 "Important functionality might not be entirely working.");
548 diff --git a/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c b/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c
549 index f3003ca05f4b..922c06279663 100644
550 --- a/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c
551 +++ b/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c
552 @@ -343,11 +343,11 @@ static int i2c_hack_cx25840(struct pvr2_hdw *hdw,
553
554 if ((ret != 0) || (*rdata == 0x04) || (*rdata == 0x0a)) {
555 pvr2_trace(PVR2_TRACE_ERROR_LEGS,
556 - "WARNING: Detected a wedged cx25840 chip; the device will not work.");
557 + "***WARNING*** Detected a wedged cx25840 chip; the device will not work.");
558 pvr2_trace(PVR2_TRACE_ERROR_LEGS,
559 - "WARNING: Try power cycling the pvrusb2 device.");
560 + "***WARNING*** Try power cycling the pvrusb2 device.");
561 pvr2_trace(PVR2_TRACE_ERROR_LEGS,
562 - "WARNING: Disabling further access to the device to prevent other foul-ups.");
563 + "***WARNING*** Disabling further access to the device to prevent other foul-ups.");
564 // This blocks all further communication with the part.
565 hdw->i2c_func[0x44] = NULL;
566 pvr2_hdw_render_useless(hdw);
567 diff --git a/drivers/media/usb/pvrusb2/pvrusb2-std.c b/drivers/media/usb/pvrusb2/pvrusb2-std.c
568 index 6b651f8b54df..37dc299a1ca2 100644
569 --- a/drivers/media/usb/pvrusb2/pvrusb2-std.c
570 +++ b/drivers/media/usb/pvrusb2/pvrusb2-std.c
571 @@ -353,7 +353,7 @@ struct v4l2_standard *pvr2_std_create_enum(unsigned int *countptr,
572 bcnt = pvr2_std_id_to_str(buf,sizeof(buf),fmsk);
573 pvr2_trace(
574 PVR2_TRACE_ERROR_LEGS,
575 - "WARNING: Failed to classify the following standard(s): %.*s",
576 + "***WARNING*** Failed to classify the following standard(s): %.*s",
577 bcnt,buf);
578 }
579
580 diff --git a/drivers/net/wireless/ath/ath10k/usb.c b/drivers/net/wireless/ath/ath10k/usb.c
581 index d4803ff5a78a..f09a4ad2e9de 100644
582 --- a/drivers/net/wireless/ath/ath10k/usb.c
583 +++ b/drivers/net/wireless/ath/ath10k/usb.c
584 @@ -1025,7 +1025,7 @@ static int ath10k_usb_probe(struct usb_interface *interface,
585 }
586
587 /* TODO: remove this once USB support is fully implemented */
588 - ath10k_warn(ar, "WARNING: ath10k USB support is incomplete, don't expect anything to work!\n");
589 + ath10k_warn(ar, "Warning: ath10k USB support is incomplete, don't expect anything to work!\n");
590
591 return 0;
592
593 diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c
594 index 8febacb8fc54..0951564b6830 100644
595 --- a/drivers/pps/pps.c
596 +++ b/drivers/pps/pps.c
597 @@ -166,6 +166,14 @@ static long pps_cdev_ioctl(struct file *file,
598 pps->params.mode |= PPS_CANWAIT;
599 pps->params.api_version = PPS_API_VERS;
600
601 + /*
602 + * Clear unused fields of pps_kparams to avoid leaking
603 + * uninitialized data of the PPS_SETPARAMS caller via
604 + * PPS_GETPARAMS
605 + */
606 + pps->params.assert_off_tu.flags = 0;
607 + pps->params.clear_off_tu.flags = 0;
608 +
609 spin_unlock_irq(&pps->lock);
610
611 break;
612 diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
613 index 32652b2c5e7c..75b926e70076 100644
614 --- a/drivers/scsi/scsi_lib.c
615 +++ b/drivers/scsi/scsi_lib.c
616 @@ -3059,11 +3059,14 @@ scsi_device_quiesce(struct scsi_device *sdev)
617 */
618 WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current);
619
620 - blk_set_preempt_only(q);
621 + if (sdev->quiesced_by == current)
622 + return 0;
623 +
624 + blk_set_pm_only(q);
625
626 blk_mq_freeze_queue(q);
627 /*
628 - * Ensure that the effect of blk_set_preempt_only() will be visible
629 + * Ensure that the effect of blk_set_pm_only() will be visible
630 * for percpu_ref_tryget() callers that occur after the queue
631 * unfreeze even if the queue was already frozen before this function
632 * was called. See also https://lwn.net/Articles/573497/.
633 @@ -3076,7 +3079,7 @@ scsi_device_quiesce(struct scsi_device *sdev)
634 if (err == 0)
635 sdev->quiesced_by = current;
636 else
637 - blk_clear_preempt_only(q);
638 + blk_clear_pm_only(q);
639 mutex_unlock(&sdev->state_mutex);
640
641 return err;
642 @@ -3099,8 +3102,10 @@ void scsi_device_resume(struct scsi_device *sdev)
643 * device deleted during suspend)
644 */
645 mutex_lock(&sdev->state_mutex);
646 - sdev->quiesced_by = NULL;
647 - blk_clear_preempt_only(sdev->request_queue);
648 + if (sdev->quiesced_by) {
649 + sdev->quiesced_by = NULL;
650 + blk_clear_pm_only(sdev->request_queue);
651 + }
652 if (sdev->sdev_state == SDEV_QUIESCE)
653 scsi_device_set_state(sdev, SDEV_RUNNING);
654 mutex_unlock(&sdev->state_mutex);
655 diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
656 index 03614ef64ca4..3f68edde0f03 100644
657 --- a/drivers/usb/dwc2/gadget.c
658 +++ b/drivers/usb/dwc2/gadget.c
659 @@ -3125,6 +3125,7 @@ void dwc2_hsotg_disconnect(struct dwc2_hsotg *hsotg)
660 hsotg->connected = 0;
661 hsotg->test_mode = 0;
662
663 + /* all endpoints should be shutdown */
664 for (ep = 0; ep < hsotg->num_of_eps; ep++) {
665 if (hsotg->eps_in[ep])
666 kill_all_requests(hsotg, hsotg->eps_in[ep],
667 @@ -3175,6 +3176,7 @@ static void dwc2_hsotg_irq_fifoempty(struct dwc2_hsotg *hsotg, bool periodic)
668 GINTSTS_PTXFEMP | \
669 GINTSTS_RXFLVL)
670
671 +static int dwc2_hsotg_ep_disable(struct usb_ep *ep);
672 /**
673 * dwc2_hsotg_core_init - issue softreset to the core
674 * @hsotg: The device state
675 @@ -3189,13 +3191,23 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
676 u32 val;
677 u32 usbcfg;
678 u32 dcfg = 0;
679 + int ep;
680
681 /* Kill any ep0 requests as controller will be reinitialized */
682 kill_all_requests(hsotg, hsotg->eps_out[0], -ECONNRESET);
683
684 - if (!is_usb_reset)
685 + if (!is_usb_reset) {
686 if (dwc2_core_reset(hsotg, true))
687 return;
688 + } else {
689 + /* all endpoints should be shutdown */
690 + for (ep = 1; ep < hsotg->num_of_eps; ep++) {
691 + if (hsotg->eps_in[ep])
692 + dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep);
693 + if (hsotg->eps_out[ep])
694 + dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep);
695 + }
696 + }
697
698 /*
699 * we must now enable ep0 ready for host detection and then
700 @@ -3993,7 +4005,6 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep)
701 struct dwc2_hsotg *hsotg = hs_ep->parent;
702 int dir_in = hs_ep->dir_in;
703 int index = hs_ep->index;
704 - unsigned long flags;
705 u32 epctrl_reg;
706 u32 ctrl;
707
708 @@ -4011,8 +4022,6 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep)
709
710 epctrl_reg = dir_in ? DIEPCTL(index) : DOEPCTL(index);
711
712 - spin_lock_irqsave(&hsotg->lock, flags);
713 -
714 ctrl = dwc2_readl(hsotg, epctrl_reg);
715
716 if (ctrl & DXEPCTL_EPENA)
717 @@ -4035,10 +4044,22 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep)
718 hs_ep->fifo_index = 0;
719 hs_ep->fifo_size = 0;
720
721 - spin_unlock_irqrestore(&hsotg->lock, flags);
722 return 0;
723 }
724
725 +static int dwc2_hsotg_ep_disable_lock(struct usb_ep *ep)
726 +{
727 + struct dwc2_hsotg_ep *hs_ep = our_ep(ep);
728 + struct dwc2_hsotg *hsotg = hs_ep->parent;
729 + unsigned long flags;
730 + int ret;
731 +
732 + spin_lock_irqsave(&hsotg->lock, flags);
733 + ret = dwc2_hsotg_ep_disable(ep);
734 + spin_unlock_irqrestore(&hsotg->lock, flags);
735 + return ret;
736 +}
737 +
738 /**
739 * on_list - check request is on the given endpoint
740 * @ep: The endpoint to check.
741 @@ -4186,7 +4207,7 @@ static int dwc2_hsotg_ep_sethalt_lock(struct usb_ep *ep, int value)
742
743 static const struct usb_ep_ops dwc2_hsotg_ep_ops = {
744 .enable = dwc2_hsotg_ep_enable,
745 - .disable = dwc2_hsotg_ep_disable,
746 + .disable = dwc2_hsotg_ep_disable_lock,
747 .alloc_request = dwc2_hsotg_ep_alloc_request,
748 .free_request = dwc2_hsotg_ep_free_request,
749 .queue = dwc2_hsotg_ep_queue_lock,
750 @@ -4326,9 +4347,9 @@ static int dwc2_hsotg_udc_stop(struct usb_gadget *gadget)
751 /* all endpoints should be shutdown */
752 for (ep = 1; ep < hsotg->num_of_eps; ep++) {
753 if (hsotg->eps_in[ep])
754 - dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep);
755 + dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep);
756 if (hsotg->eps_out[ep])
757 - dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep);
758 + dwc2_hsotg_ep_disable_lock(&hsotg->eps_out[ep]->ep);
759 }
760
761 spin_lock_irqsave(&hsotg->lock, flags);
762 @@ -4776,9 +4797,9 @@ int dwc2_hsotg_suspend(struct dwc2_hsotg *hsotg)
763
764 for (ep = 0; ep < hsotg->num_of_eps; ep++) {
765 if (hsotg->eps_in[ep])
766 - dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep);
767 + dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep);
768 if (hsotg->eps_out[ep])
769 - dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep);
770 + dwc2_hsotg_ep_disable_lock(&hsotg->eps_out[ep]->ep);
771 }
772 }
773
774 diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
775 index ae704658b528..124356dc39e1 100644
776 --- a/drivers/vhost/net.c
777 +++ b/drivers/vhost/net.c
778 @@ -497,12 +497,6 @@ static size_t init_iov_iter(struct vhost_virtqueue *vq, struct iov_iter *iter,
779 return iov_iter_count(iter);
780 }
781
782 -static bool vhost_exceeds_weight(int pkts, int total_len)
783 -{
784 - return total_len >= VHOST_NET_WEIGHT ||
785 - pkts >= VHOST_NET_PKT_WEIGHT;
786 -}
787 -
788 static int get_tx_bufs(struct vhost_net *net,
789 struct vhost_net_virtqueue *nvq,
790 struct msghdr *msg,
791 @@ -557,7 +551,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
792 int err;
793 int sent_pkts = 0;
794
795 - for (;;) {
796 + do {
797 bool busyloop_intr = false;
798
799 head = get_tx_bufs(net, nvq, &msg, &out, &in, &len,
800 @@ -598,11 +592,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
801 err, len);
802 if (++nvq->done_idx >= VHOST_NET_BATCH)
803 vhost_net_signal_used(nvq);
804 - if (vhost_exceeds_weight(++sent_pkts, total_len)) {
805 - vhost_poll_queue(&vq->poll);
806 - break;
807 - }
808 - }
809 + } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
810
811 vhost_net_signal_used(nvq);
812 }
813 @@ -626,7 +616,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
814 bool zcopy_used;
815 int sent_pkts = 0;
816
817 - for (;;) {
818 + do {
819 bool busyloop_intr;
820
821 /* Release DMAs done buffers first */
822 @@ -701,11 +691,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
823 else
824 vhost_zerocopy_signal_used(net, vq);
825 vhost_net_tx_packet(net);
826 - if (unlikely(vhost_exceeds_weight(++sent_pkts, total_len))) {
827 - vhost_poll_queue(&vq->poll);
828 - break;
829 - }
830 - }
831 + } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
832 }
833
834 /* Expects to be always run from workqueue - which acts as
835 @@ -941,8 +927,11 @@ static void handle_rx(struct vhost_net *net)
836 vq->log : NULL;
837 mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
838
839 - while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
840 - &busyloop_intr))) {
841 + do {
842 + sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
843 + &busyloop_intr);
844 + if (!sock_len)
845 + break;
846 sock_len += sock_hlen;
847 vhost_len = sock_len + vhost_hlen;
848 headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
849 @@ -1027,14 +1016,11 @@ static void handle_rx(struct vhost_net *net)
850 vhost_log_write(vq, vq_log, log, vhost_len,
851 vq->iov, in);
852 total_len += vhost_len;
853 - if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) {
854 - vhost_poll_queue(&vq->poll);
855 - goto out;
856 - }
857 - }
858 + } while (likely(!vhost_exceeds_weight(vq, ++recv_pkts, total_len)));
859 +
860 if (unlikely(busyloop_intr))
861 vhost_poll_queue(&vq->poll);
862 - else
863 + else if (!sock_len)
864 vhost_net_enable_vq(net, vq);
865 out:
866 vhost_net_signal_used(nvq);
867 @@ -1115,7 +1101,8 @@ static int vhost_net_open(struct inode *inode, struct file *f)
868 vhost_net_buf_init(&n->vqs[i].rxq);
869 }
870 vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
871 - UIO_MAXIOV + VHOST_NET_BATCH);
872 + UIO_MAXIOV + VHOST_NET_BATCH,
873 + VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT);
874
875 vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev);
876 vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev);
877 diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
878 index 0cfa925be4ec..5e298d9287f1 100644
879 --- a/drivers/vhost/scsi.c
880 +++ b/drivers/vhost/scsi.c
881 @@ -57,6 +57,12 @@
882 #define VHOST_SCSI_PREALLOC_UPAGES 2048
883 #define VHOST_SCSI_PREALLOC_PROT_SGLS 2048
884
885 +/* Max number of requests before requeueing the job.
886 + * Using this limit prevents one virtqueue from starving others with
887 + * request.
888 + */
889 +#define VHOST_SCSI_WEIGHT 256
890 +
891 struct vhost_scsi_inflight {
892 /* Wait for the flush operation to finish */
893 struct completion comp;
894 @@ -811,7 +817,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
895 u64 tag;
896 u32 exp_data_len, data_direction;
897 unsigned int out = 0, in = 0;
898 - int head, ret, prot_bytes;
899 + int head, ret, prot_bytes, c = 0;
900 size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp);
901 size_t out_size, in_size;
902 u16 lun;
903 @@ -830,7 +836,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
904
905 vhost_disable_notify(&vs->dev, vq);
906
907 - for (;;) {
908 + do {
909 head = vhost_get_vq_desc(vq, vq->iov,
910 ARRAY_SIZE(vq->iov), &out, &in,
911 NULL, NULL);
912 @@ -1045,7 +1051,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
913 */
914 INIT_WORK(&cmd->work, vhost_scsi_submission_work);
915 queue_work(vhost_scsi_workqueue, &cmd->work);
916 - }
917 + } while (likely(!vhost_exceeds_weight(vq, ++c, 0)));
918 out:
919 mutex_unlock(&vq->mutex);
920 }
921 @@ -1398,7 +1404,8 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
922 vqs[i] = &vs->vqs[i].vq;
923 vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
924 }
925 - vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV);
926 + vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV,
927 + VHOST_SCSI_WEIGHT, 0);
928
929 vhost_scsi_init_inflight(vs, NULL);
930
931 diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
932 index c163bc15976a..0752f8dc47b1 100644
933 --- a/drivers/vhost/vhost.c
934 +++ b/drivers/vhost/vhost.c
935 @@ -413,8 +413,24 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev)
936 vhost_vq_free_iovecs(dev->vqs[i]);
937 }
938
939 +bool vhost_exceeds_weight(struct vhost_virtqueue *vq,
940 + int pkts, int total_len)
941 +{
942 + struct vhost_dev *dev = vq->dev;
943 +
944 + if ((dev->byte_weight && total_len >= dev->byte_weight) ||
945 + pkts >= dev->weight) {
946 + vhost_poll_queue(&vq->poll);
947 + return true;
948 + }
949 +
950 + return false;
951 +}
952 +EXPORT_SYMBOL_GPL(vhost_exceeds_weight);
953 +
954 void vhost_dev_init(struct vhost_dev *dev,
955 - struct vhost_virtqueue **vqs, int nvqs, int iov_limit)
956 + struct vhost_virtqueue **vqs, int nvqs,
957 + int iov_limit, int weight, int byte_weight)
958 {
959 struct vhost_virtqueue *vq;
960 int i;
961 @@ -428,6 +444,8 @@ void vhost_dev_init(struct vhost_dev *dev,
962 dev->mm = NULL;
963 dev->worker = NULL;
964 dev->iov_limit = iov_limit;
965 + dev->weight = weight;
966 + dev->byte_weight = byte_weight;
967 init_llist_head(&dev->work_list);
968 init_waitqueue_head(&dev->wait);
969 INIT_LIST_HEAD(&dev->read_list);
970 diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
971 index 9490e7ddb340..27a78a9b8cc7 100644
972 --- a/drivers/vhost/vhost.h
973 +++ b/drivers/vhost/vhost.h
974 @@ -171,10 +171,13 @@ struct vhost_dev {
975 struct list_head pending_list;
976 wait_queue_head_t wait;
977 int iov_limit;
978 + int weight;
979 + int byte_weight;
980 };
981
982 +bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len);
983 void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs,
984 - int nvqs, int iov_limit);
985 + int nvqs, int iov_limit, int weight, int byte_weight);
986 long vhost_dev_set_owner(struct vhost_dev *dev);
987 bool vhost_dev_has_owner(struct vhost_dev *dev);
988 long vhost_dev_check_owner(struct vhost_dev *);
989 diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
990 index e440f87ae1d6..bab495d73195 100644
991 --- a/drivers/vhost/vsock.c
992 +++ b/drivers/vhost/vsock.c
993 @@ -21,6 +21,14 @@
994 #include "vhost.h"
995
996 #define VHOST_VSOCK_DEFAULT_HOST_CID 2
997 +/* Max number of bytes transferred before requeueing the job.
998 + * Using this limit prevents one virtqueue from starving others. */
999 +#define VHOST_VSOCK_WEIGHT 0x80000
1000 +/* Max number of packets transferred before requeueing the job.
1001 + * Using this limit prevents one virtqueue from starving others with
1002 + * small pkts.
1003 + */
1004 +#define VHOST_VSOCK_PKT_WEIGHT 256
1005
1006 enum {
1007 VHOST_VSOCK_FEATURES = VHOST_FEATURES,
1008 @@ -78,6 +86,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
1009 struct vhost_virtqueue *vq)
1010 {
1011 struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
1012 + int pkts = 0, total_len = 0;
1013 bool added = false;
1014 bool restart_tx = false;
1015
1016 @@ -89,7 +98,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
1017 /* Avoid further vmexits, we're already processing the virtqueue */
1018 vhost_disable_notify(&vsock->dev, vq);
1019
1020 - for (;;) {
1021 + do {
1022 struct virtio_vsock_pkt *pkt;
1023 struct iov_iter iov_iter;
1024 unsigned out, in;
1025 @@ -174,8 +183,9 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
1026 */
1027 virtio_transport_deliver_tap_pkt(pkt);
1028
1029 + total_len += pkt->len;
1030 virtio_transport_free_pkt(pkt);
1031 - }
1032 + } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
1033 if (added)
1034 vhost_signal(&vsock->dev, vq);
1035
1036 @@ -350,7 +360,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
1037 struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
1038 dev);
1039 struct virtio_vsock_pkt *pkt;
1040 - int head;
1041 + int head, pkts = 0, total_len = 0;
1042 unsigned int out, in;
1043 bool added = false;
1044
1045 @@ -360,7 +370,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
1046 goto out;
1047
1048 vhost_disable_notify(&vsock->dev, vq);
1049 - for (;;) {
1050 + do {
1051 u32 len;
1052
1053 if (!vhost_vsock_more_replies(vsock)) {
1054 @@ -401,9 +411,11 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
1055 else
1056 virtio_transport_free_pkt(pkt);
1057
1058 - vhost_add_used(vq, head, sizeof(pkt->hdr) + len);
1059 + len += sizeof(pkt->hdr);
1060 + vhost_add_used(vq, head, len);
1061 + total_len += len;
1062 added = true;
1063 - }
1064 + } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
1065
1066 no_more_replies:
1067 if (added)
1068 @@ -531,7 +543,9 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
1069 vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
1070 vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
1071
1072 - vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), UIO_MAXIOV);
1073 + vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
1074 + UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
1075 + VHOST_VSOCK_WEIGHT);
1076
1077 file->private_data = vsock;
1078 spin_lock_init(&vsock->send_pkt_list_lock);
1079 diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
1080 index c7542e8dd096..a11fa0b6b34d 100644
1081 --- a/fs/ceph/caps.c
1082 +++ b/fs/ceph/caps.c
1083 @@ -1237,20 +1237,23 @@ static int send_cap_msg(struct cap_msg_args *arg)
1084 }
1085
1086 /*
1087 - * Queue cap releases when an inode is dropped from our cache. Since
1088 - * inode is about to be destroyed, there is no need for i_ceph_lock.
1089 + * Queue cap releases when an inode is dropped from our cache.
1090 */
1091 void ceph_queue_caps_release(struct inode *inode)
1092 {
1093 struct ceph_inode_info *ci = ceph_inode(inode);
1094 struct rb_node *p;
1095
1096 + /* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU)
1097 + * may call __ceph_caps_issued_mask() on a freeing inode. */
1098 + spin_lock(&ci->i_ceph_lock);
1099 p = rb_first(&ci->i_caps);
1100 while (p) {
1101 struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
1102 p = rb_next(p);
1103 __ceph_remove_cap(cap, true);
1104 }
1105 + spin_unlock(&ci->i_ceph_lock);
1106 }
1107
1108 /*
1109 diff --git a/fs/exec.c b/fs/exec.c
1110 index 433b1257694a..561ea64829ec 100644
1111 --- a/fs/exec.c
1112 +++ b/fs/exec.c
1113 @@ -1826,7 +1826,7 @@ static int __do_execve_file(int fd, struct filename *filename,
1114 membarrier_execve(current);
1115 rseq_execve(current);
1116 acct_update_integrals(current);
1117 - task_numa_free(current);
1118 + task_numa_free(current, false);
1119 free_bprm(bprm);
1120 kfree(pathbuf);
1121 if (filename)
1122 diff --git a/fs/nfs/client.c b/fs/nfs/client.c
1123 index c092661147b3..0a2b59c1ecb3 100644
1124 --- a/fs/nfs/client.c
1125 +++ b/fs/nfs/client.c
1126 @@ -416,10 +416,10 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
1127 clp = nfs_match_client(cl_init);
1128 if (clp) {
1129 spin_unlock(&nn->nfs_client_lock);
1130 - if (IS_ERR(clp))
1131 - return clp;
1132 if (new)
1133 new->rpc_ops->free_client(new);
1134 + if (IS_ERR(clp))
1135 + return clp;
1136 return nfs_found_client(cl_init, clp);
1137 }
1138 if (new) {
1139 diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
1140 index 8bfaa658b2c1..71b2e390becf 100644
1141 --- a/fs/nfs/dir.c
1142 +++ b/fs/nfs/dir.c
1143 @@ -1072,6 +1072,100 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
1144 return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
1145 }
1146
1147 +static int
1148 +nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
1149 + struct inode *inode, int error)
1150 +{
1151 + switch (error) {
1152 + case 1:
1153 + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
1154 + __func__, dentry);
1155 + return 1;
1156 + case 0:
1157 + nfs_mark_for_revalidate(dir);
1158 + if (inode && S_ISDIR(inode->i_mode)) {
1159 + /* Purge readdir caches. */
1160 + nfs_zap_caches(inode);
1161 + /*
1162 + * We can't d_drop the root of a disconnected tree:
1163 + * its d_hash is on the s_anon list and d_drop() would hide
1164 + * it from shrink_dcache_for_unmount(), leading to busy
1165 + * inodes on unmount and further oopses.
1166 + */
1167 + if (IS_ROOT(dentry))
1168 + return 1;
1169 + }
1170 + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
1171 + __func__, dentry);
1172 + return 0;
1173 + }
1174 + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
1175 + __func__, dentry, error);
1176 + return error;
1177 +}
1178 +
1179 +static int
1180 +nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry,
1181 + unsigned int flags)
1182 +{
1183 + int ret = 1;
1184 + if (nfs_neg_need_reval(dir, dentry, flags)) {
1185 + if (flags & LOOKUP_RCU)
1186 + return -ECHILD;
1187 + ret = 0;
1188 + }
1189 + return nfs_lookup_revalidate_done(dir, dentry, NULL, ret);
1190 +}
1191 +
1192 +static int
1193 +nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
1194 + struct inode *inode)
1195 +{
1196 + nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1197 + return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
1198 +}
1199 +
1200 +static int
1201 +nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
1202 + struct inode *inode)
1203 +{
1204 + struct nfs_fh *fhandle;
1205 + struct nfs_fattr *fattr;
1206 + struct nfs4_label *label;
1207 + int ret;
1208 +
1209 + ret = -ENOMEM;
1210 + fhandle = nfs_alloc_fhandle();
1211 + fattr = nfs_alloc_fattr();
1212 + label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
1213 + if (fhandle == NULL || fattr == NULL || IS_ERR(label))
1214 + goto out;
1215 +
1216 + ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
1217 + if (ret < 0) {
1218 + if (ret == -ESTALE || ret == -ENOENT)
1219 + ret = 0;
1220 + goto out;
1221 + }
1222 + ret = 0;
1223 + if (nfs_compare_fh(NFS_FH(inode), fhandle))
1224 + goto out;
1225 + if (nfs_refresh_inode(inode, fattr) < 0)
1226 + goto out;
1227 +
1228 + nfs_setsecurity(inode, fattr, label);
1229 + nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1230 +
1231 + /* set a readdirplus hint that we had a cache miss */
1232 + nfs_force_use_readdirplus(dir);
1233 + ret = 1;
1234 +out:
1235 + nfs_free_fattr(fattr);
1236 + nfs_free_fhandle(fhandle);
1237 + nfs4_label_free(label);
1238 + return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
1239 +}
1240 +
1241 /*
1242 * This is called every time the dcache has a lookup hit,
1243 * and we should check whether we can really trust that
1244 @@ -1083,58 +1177,36 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
1245 * If the parent directory is seen to have changed, we throw out the
1246 * cached dentry and do a new lookup.
1247 */
1248 -static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1249 +static int
1250 +nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
1251 + unsigned int flags)
1252 {
1253 - struct inode *dir;
1254 struct inode *inode;
1255 - struct dentry *parent;
1256 - struct nfs_fh *fhandle = NULL;
1257 - struct nfs_fattr *fattr = NULL;
1258 - struct nfs4_label *label = NULL;
1259 int error;
1260
1261 - if (flags & LOOKUP_RCU) {
1262 - parent = READ_ONCE(dentry->d_parent);
1263 - dir = d_inode_rcu(parent);
1264 - if (!dir)
1265 - return -ECHILD;
1266 - } else {
1267 - parent = dget_parent(dentry);
1268 - dir = d_inode(parent);
1269 - }
1270 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
1271 inode = d_inode(dentry);
1272
1273 - if (!inode) {
1274 - if (nfs_neg_need_reval(dir, dentry, flags)) {
1275 - if (flags & LOOKUP_RCU)
1276 - return -ECHILD;
1277 - goto out_bad;
1278 - }
1279 - goto out_valid;
1280 - }
1281 + if (!inode)
1282 + return nfs_lookup_revalidate_negative(dir, dentry, flags);
1283
1284 if (is_bad_inode(inode)) {
1285 - if (flags & LOOKUP_RCU)
1286 - return -ECHILD;
1287 dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1288 __func__, dentry);
1289 goto out_bad;
1290 }
1291
1292 if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
1293 - goto out_set_verifier;
1294 + return nfs_lookup_revalidate_delegated(dir, dentry, inode);
1295
1296 /* Force a full look up iff the parent directory has changed */
1297 if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) &&
1298 nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
1299 error = nfs_lookup_verify_inode(inode, flags);
1300 if (error) {
1301 - if (flags & LOOKUP_RCU)
1302 - return -ECHILD;
1303 if (error == -ESTALE)
1304 - goto out_zap_parent;
1305 - goto out_error;
1306 + nfs_zap_caches(dir);
1307 + goto out_bad;
1308 }
1309 nfs_advise_use_readdirplus(dir);
1310 goto out_valid;
1311 @@ -1146,81 +1218,45 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1312 if (NFS_STALE(inode))
1313 goto out_bad;
1314
1315 - error = -ENOMEM;
1316 - fhandle = nfs_alloc_fhandle();
1317 - fattr = nfs_alloc_fattr();
1318 - if (fhandle == NULL || fattr == NULL)
1319 - goto out_error;
1320 -
1321 - label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
1322 - if (IS_ERR(label))
1323 - goto out_error;
1324 -
1325 trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
1326 - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
1327 + error = nfs_lookup_revalidate_dentry(dir, dentry, inode);
1328 trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
1329 - if (error == -ESTALE || error == -ENOENT)
1330 - goto out_bad;
1331 - if (error)
1332 - goto out_error;
1333 - if (nfs_compare_fh(NFS_FH(inode), fhandle))
1334 - goto out_bad;
1335 - if ((error = nfs_refresh_inode(inode, fattr)) != 0)
1336 - goto out_bad;
1337 -
1338 - nfs_setsecurity(inode, fattr, label);
1339 -
1340 - nfs_free_fattr(fattr);
1341 - nfs_free_fhandle(fhandle);
1342 - nfs4_label_free(label);
1343 + return error;
1344 +out_valid:
1345 + return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
1346 +out_bad:
1347 + if (flags & LOOKUP_RCU)
1348 + return -ECHILD;
1349 + return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
1350 +}
1351
1352 - /* set a readdirplus hint that we had a cache miss */
1353 - nfs_force_use_readdirplus(dir);
1354 +static int
1355 +__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
1356 + int (*reval)(struct inode *, struct dentry *, unsigned int))
1357 +{
1358 + struct dentry *parent;
1359 + struct inode *dir;
1360 + int ret;
1361
1362 -out_set_verifier:
1363 - nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1364 - out_valid:
1365 if (flags & LOOKUP_RCU) {
1366 + parent = READ_ONCE(dentry->d_parent);
1367 + dir = d_inode_rcu(parent);
1368 + if (!dir)
1369 + return -ECHILD;
1370 + ret = reval(dir, dentry, flags);
1371 if (parent != READ_ONCE(dentry->d_parent))
1372 return -ECHILD;
1373 - } else
1374 + } else {
1375 + parent = dget_parent(dentry);
1376 + ret = reval(d_inode(parent), dentry, flags);
1377 dput(parent);
1378 - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
1379 - __func__, dentry);
1380 - return 1;
1381 -out_zap_parent:
1382 - nfs_zap_caches(dir);
1383 - out_bad:
1384 - WARN_ON(flags & LOOKUP_RCU);
1385 - nfs_free_fattr(fattr);
1386 - nfs_free_fhandle(fhandle);
1387 - nfs4_label_free(label);
1388 - nfs_mark_for_revalidate(dir);
1389 - if (inode && S_ISDIR(inode->i_mode)) {
1390 - /* Purge readdir caches. */
1391 - nfs_zap_caches(inode);
1392 - /*
1393 - * We can't d_drop the root of a disconnected tree:
1394 - * its d_hash is on the s_anon list and d_drop() would hide
1395 - * it from shrink_dcache_for_unmount(), leading to busy
1396 - * inodes on unmount and further oopses.
1397 - */
1398 - if (IS_ROOT(dentry))
1399 - goto out_valid;
1400 }
1401 - dput(parent);
1402 - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
1403 - __func__, dentry);
1404 - return 0;
1405 -out_error:
1406 - WARN_ON(flags & LOOKUP_RCU);
1407 - nfs_free_fattr(fattr);
1408 - nfs_free_fhandle(fhandle);
1409 - nfs4_label_free(label);
1410 - dput(parent);
1411 - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
1412 - __func__, dentry, error);
1413 - return error;
1414 + return ret;
1415 +}
1416 +
1417 +static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1418 +{
1419 + return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
1420 }
1421
1422 /*
1423 @@ -1579,62 +1615,55 @@ no_open:
1424 }
1425 EXPORT_SYMBOL_GPL(nfs_atomic_open);
1426
1427 -static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1428 +static int
1429 +nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
1430 + unsigned int flags)
1431 {
1432 struct inode *inode;
1433 - int ret = 0;
1434
1435 if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
1436 - goto no_open;
1437 + goto full_reval;
1438 if (d_mountpoint(dentry))
1439 - goto no_open;
1440 - if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1)
1441 - goto no_open;
1442 + goto full_reval;
1443
1444 inode = d_inode(dentry);
1445
1446 /* We can't create new files in nfs_open_revalidate(), so we
1447 * optimize away revalidation of negative dentries.
1448 */
1449 - if (inode == NULL) {
1450 - struct dentry *parent;
1451 - struct inode *dir;
1452 -
1453 - if (flags & LOOKUP_RCU) {
1454 - parent = READ_ONCE(dentry->d_parent);
1455 - dir = d_inode_rcu(parent);
1456 - if (!dir)
1457 - return -ECHILD;
1458 - } else {
1459 - parent = dget_parent(dentry);
1460 - dir = d_inode(parent);
1461 - }
1462 - if (!nfs_neg_need_reval(dir, dentry, flags))
1463 - ret = 1;
1464 - else if (flags & LOOKUP_RCU)
1465 - ret = -ECHILD;
1466 - if (!(flags & LOOKUP_RCU))
1467 - dput(parent);
1468 - else if (parent != READ_ONCE(dentry->d_parent))
1469 - return -ECHILD;
1470 - goto out;
1471 - }
1472 + if (inode == NULL)
1473 + goto full_reval;
1474 +
1475 + if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
1476 + return nfs_lookup_revalidate_delegated(dir, dentry, inode);
1477
1478 /* NFS only supports OPEN on regular files */
1479 if (!S_ISREG(inode->i_mode))
1480 - goto no_open;
1481 + goto full_reval;
1482 +
1483 /* We cannot do exclusive creation on a positive dentry */
1484 - if (flags & LOOKUP_EXCL)
1485 - goto no_open;
1486 + if (flags & (LOOKUP_EXCL | LOOKUP_REVAL))
1487 + goto reval_dentry;
1488 +
1489 + /* Check if the directory changed */
1490 + if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU))
1491 + goto reval_dentry;
1492
1493 /* Let f_op->open() actually open (and revalidate) the file */
1494 - ret = 1;
1495 + return 1;
1496 +reval_dentry:
1497 + if (flags & LOOKUP_RCU)
1498 + return -ECHILD;
1499 + return nfs_lookup_revalidate_dentry(dir, dentry, inode);;
1500
1501 -out:
1502 - return ret;
1503 +full_reval:
1504 + return nfs_do_lookup_revalidate(dir, dentry, flags);
1505 +}
1506
1507 -no_open:
1508 - return nfs_lookup_revalidate(dentry, flags);
1509 +static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1510 +{
1511 + return __nfs_lookup_revalidate(dentry, flags,
1512 + nfs4_do_lookup_revalidate);
1513 }
1514
1515 #endif /* CONFIG_NFSV4 */
1516 diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
1517 index 1de855e0ae61..904e08bbb289 100644
1518 --- a/fs/nfs/nfs4proc.c
1519 +++ b/fs/nfs/nfs4proc.c
1520 @@ -1355,12 +1355,20 @@ static bool nfs4_mode_match_open_stateid(struct nfs4_state *state,
1521 return false;
1522 }
1523
1524 -static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode)
1525 +static int can_open_cached(struct nfs4_state *state, fmode_t mode,
1526 + int open_mode, enum open_claim_type4 claim)
1527 {
1528 int ret = 0;
1529
1530 if (open_mode & (O_EXCL|O_TRUNC))
1531 goto out;
1532 + switch (claim) {
1533 + case NFS4_OPEN_CLAIM_NULL:
1534 + case NFS4_OPEN_CLAIM_FH:
1535 + goto out;
1536 + default:
1537 + break;
1538 + }
1539 switch (mode & (FMODE_READ|FMODE_WRITE)) {
1540 case FMODE_READ:
1541 ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0
1542 @@ -1753,7 +1761,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
1543
1544 for (;;) {
1545 spin_lock(&state->owner->so_lock);
1546 - if (can_open_cached(state, fmode, open_mode)) {
1547 + if (can_open_cached(state, fmode, open_mode, claim)) {
1548 update_open_stateflags(state, fmode);
1549 spin_unlock(&state->owner->so_lock);
1550 goto out_return_state;
1551 @@ -2282,7 +2290,8 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1552 if (data->state != NULL) {
1553 struct nfs_delegation *delegation;
1554
1555 - if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags))
1556 + if (can_open_cached(data->state, data->o_arg.fmode,
1557 + data->o_arg.open_flags, claim))
1558 goto out_no_action;
1559 rcu_read_lock();
1560 delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
1561 diff --git a/fs/proc/base.c b/fs/proc/base.c
1562 index a7fbda72afeb..3b9b726b1a6c 100644
1563 --- a/fs/proc/base.c
1564 +++ b/fs/proc/base.c
1565 @@ -205,12 +205,53 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
1566 return result;
1567 }
1568
1569 +/*
1570 + * If the user used setproctitle(), we just get the string from
1571 + * user space at arg_start, and limit it to a maximum of one page.
1572 + */
1573 +static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf,
1574 + size_t count, unsigned long pos,
1575 + unsigned long arg_start)
1576 +{
1577 + char *page;
1578 + int ret, got;
1579 +
1580 + if (pos >= PAGE_SIZE)
1581 + return 0;
1582 +
1583 + page = (char *)__get_free_page(GFP_KERNEL);
1584 + if (!page)
1585 + return -ENOMEM;
1586 +
1587 + ret = 0;
1588 + got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON);
1589 + if (got > 0) {
1590 + int len = strnlen(page, got);
1591 +
1592 + /* Include the NUL character if it was found */
1593 + if (len < got)
1594 + len++;
1595 +
1596 + if (len > pos) {
1597 + len -= pos;
1598 + if (len > count)
1599 + len = count;
1600 + len -= copy_to_user(buf, page+pos, len);
1601 + if (!len)
1602 + len = -EFAULT;
1603 + ret = len;
1604 + }
1605 + }
1606 + free_page((unsigned long)page);
1607 + return ret;
1608 +}
1609 +
1610 static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
1611 size_t count, loff_t *ppos)
1612 {
1613 unsigned long arg_start, arg_end, env_start, env_end;
1614 unsigned long pos, len;
1615 - char *page;
1616 + char *page, c;
1617
1618 /* Check if process spawned far enough to have cmdline. */
1619 if (!mm->env_end)
1620 @@ -227,28 +268,42 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
1621 return 0;
1622
1623 /*
1624 - * We have traditionally allowed the user to re-write
1625 - * the argument strings and overflow the end result
1626 - * into the environment section. But only do that if
1627 - * the environment area is contiguous to the arguments.
1628 + * We allow setproctitle() to overwrite the argument
1629 + * strings, and overflow past the original end. But
1630 + * only when it overflows into the environment area.
1631 */
1632 - if (env_start != arg_end || env_start >= env_end)
1633 + if (env_start != arg_end || env_end < env_start)
1634 env_start = env_end = arg_end;
1635 -
1636 - /* .. and limit it to a maximum of one page of slop */
1637 - if (env_end >= arg_end + PAGE_SIZE)
1638 - env_end = arg_end + PAGE_SIZE - 1;
1639 + len = env_end - arg_start;
1640
1641 /* We're not going to care if "*ppos" has high bits set */
1642 - pos = arg_start + *ppos;
1643 -
1644 - /* .. but we do check the result is in the proper range */
1645 - if (pos < arg_start || pos >= env_end)
1646 + pos = *ppos;
1647 + if (pos >= len)
1648 return 0;
1649 + if (count > len - pos)
1650 + count = len - pos;
1651 + if (!count)
1652 + return 0;
1653 +
1654 + /*
1655 + * Magical special case: if the argv[] end byte is not
1656 + * zero, the user has overwritten it with setproctitle(3).
1657 + *
1658 + * Possible future enhancement: do this only once when
1659 + * pos is 0, and set a flag in the 'struct file'.
1660 + */
1661 + if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c)
1662 + return get_mm_proctitle(mm, buf, count, pos, arg_start);
1663
1664 - /* .. and we never go past env_end */
1665 - if (env_end - pos < count)
1666 - count = env_end - pos;
1667 + /*
1668 + * For the non-setproctitle() case we limit things strictly
1669 + * to the [arg_start, arg_end[ range.
1670 + */
1671 + pos += arg_start;
1672 + if (pos < arg_start || pos >= arg_end)
1673 + return 0;
1674 + if (count > arg_end - pos)
1675 + count = arg_end - pos;
1676
1677 page = (char *)__get_free_page(GFP_KERNEL);
1678 if (!page)
1679 @@ -258,48 +313,11 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
1680 while (count) {
1681 int got;
1682 size_t size = min_t(size_t, PAGE_SIZE, count);
1683 - long offset;
1684
1685 - /*
1686 - * Are we already starting past the official end?
1687 - * We always include the last byte that is *supposed*
1688 - * to be NUL
1689 - */
1690 - offset = (pos >= arg_end) ? pos - arg_end + 1 : 0;
1691 -
1692 - got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON);
1693 - if (got <= offset)
1694 + got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
1695 + if (got <= 0)
1696 break;
1697 - got -= offset;
1698 -
1699 - /* Don't walk past a NUL character once you hit arg_end */
1700 - if (pos + got >= arg_end) {
1701 - int n = 0;
1702 -
1703 - /*
1704 - * If we started before 'arg_end' but ended up
1705 - * at or after it, we start the NUL character
1706 - * check at arg_end-1 (where we expect the normal
1707 - * EOF to be).
1708 - *
1709 - * NOTE! This is smaller than 'got', because
1710 - * pos + got >= arg_end
1711 - */
1712 - if (pos < arg_end)
1713 - n = arg_end - pos - 1;
1714 -
1715 - /* Cut off at first NUL after 'n' */
1716 - got = n + strnlen(page+n, offset+got-n);
1717 - if (got < offset)
1718 - break;
1719 - got -= offset;
1720 -
1721 - /* Include the NUL if it existed */
1722 - if (got < size)
1723 - got++;
1724 - }
1725 -
1726 - got -= copy_to_user(buf, page+offset, got);
1727 + got -= copy_to_user(buf, page, got);
1728 if (unlikely(!got)) {
1729 if (!len)
1730 len = -EFAULT;
1731 diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
1732 index 6980014357d4..d51e10f50e75 100644
1733 --- a/include/linux/blkdev.h
1734 +++ b/include/linux/blkdev.h
1735 @@ -504,6 +504,12 @@ struct request_queue {
1736 * various queue flags, see QUEUE_* below
1737 */
1738 unsigned long queue_flags;
1739 + /*
1740 + * Number of contexts that have called blk_set_pm_only(). If this
1741 + * counter is above zero then only RQF_PM and RQF_PREEMPT requests are
1742 + * processed.
1743 + */
1744 + atomic_t pm_only;
1745
1746 /*
1747 * ida allocated id for this queue. Used to index queues from
1748 @@ -698,7 +704,6 @@ struct request_queue {
1749 #define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */
1750 #define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */
1751 #define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */
1752 -#define QUEUE_FLAG_PREEMPT_ONLY 29 /* only process REQ_PREEMPT requests */
1753
1754 #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
1755 (1 << QUEUE_FLAG_SAME_COMP) | \
1756 @@ -736,12 +741,11 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);
1757 ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
1758 REQ_FAILFAST_DRIVER))
1759 #define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
1760 -#define blk_queue_preempt_only(q) \
1761 - test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags)
1762 +#define blk_queue_pm_only(q) atomic_read(&(q)->pm_only)
1763 #define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
1764
1765 -extern int blk_set_preempt_only(struct request_queue *q);
1766 -extern void blk_clear_preempt_only(struct request_queue *q);
1767 +extern void blk_set_pm_only(struct request_queue *q);
1768 +extern void blk_clear_pm_only(struct request_queue *q);
1769
1770 static inline int queue_in_flight(struct request_queue *q)
1771 {
1772 diff --git a/include/linux/iova.h b/include/linux/iova.h
1773 index 928442dda565..84fbe73d2ec0 100644
1774 --- a/include/linux/iova.h
1775 +++ b/include/linux/iova.h
1776 @@ -156,6 +156,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
1777 void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
1778 void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
1779 unsigned long start_pfn);
1780 +bool has_iova_flush_queue(struct iova_domain *iovad);
1781 int init_iova_flush_queue(struct iova_domain *iovad,
1782 iova_flush_cb flush_cb, iova_entry_dtor entry_dtor);
1783 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
1784 @@ -236,6 +237,11 @@ static inline void init_iova_domain(struct iova_domain *iovad,
1785 {
1786 }
1787
1788 +static inline bool has_iova_flush_queue(struct iova_domain *iovad)
1789 +{
1790 + return false;
1791 +}
1792 +
1793 static inline int init_iova_flush_queue(struct iova_domain *iovad,
1794 iova_flush_cb flush_cb,
1795 iova_entry_dtor entry_dtor)
1796 diff --git a/include/linux/sched.h b/include/linux/sched.h
1797 index 5dc024e28397..20f5ba262cc0 100644
1798 --- a/include/linux/sched.h
1799 +++ b/include/linux/sched.h
1800 @@ -1023,7 +1023,15 @@ struct task_struct {
1801 u64 last_sum_exec_runtime;
1802 struct callback_head numa_work;
1803
1804 - struct numa_group *numa_group;
1805 + /*
1806 + * This pointer is only modified for current in syscall and
1807 + * pagefault context (and for tasks being destroyed), so it can be read
1808 + * from any of the following contexts:
1809 + * - RCU read-side critical section
1810 + * - current->numa_group from everywhere
1811 + * - task's runqueue locked, task not running
1812 + */
1813 + struct numa_group __rcu *numa_group;
1814
1815 /*
1816 * numa_faults is an array split into four regions:
1817 diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
1818 index e7dd04a84ba8..3988762efe15 100644
1819 --- a/include/linux/sched/numa_balancing.h
1820 +++ b/include/linux/sched/numa_balancing.h
1821 @@ -19,7 +19,7 @@
1822 extern void task_numa_fault(int last_node, int node, int pages, int flags);
1823 extern pid_t task_numa_group_id(struct task_struct *p);
1824 extern void set_numabalancing_state(bool enabled);
1825 -extern void task_numa_free(struct task_struct *p);
1826 +extern void task_numa_free(struct task_struct *p, bool final);
1827 extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
1828 int src_nid, int dst_cpu);
1829 #else
1830 @@ -34,7 +34,7 @@ static inline pid_t task_numa_group_id(struct task_struct *p)
1831 static inline void set_numabalancing_state(bool enabled)
1832 {
1833 }
1834 -static inline void task_numa_free(struct task_struct *p)
1835 +static inline void task_numa_free(struct task_struct *p, bool final)
1836 {
1837 }
1838 static inline bool should_numa_migrate_memory(struct task_struct *p,
1839 diff --git a/kernel/fork.c b/kernel/fork.c
1840 index 69874db3fba8..e76ce81c9c75 100644
1841 --- a/kernel/fork.c
1842 +++ b/kernel/fork.c
1843 @@ -679,7 +679,7 @@ void __put_task_struct(struct task_struct *tsk)
1844 WARN_ON(tsk == current);
1845
1846 cgroup_free(tsk);
1847 - task_numa_free(tsk);
1848 + task_numa_free(tsk, true);
1849 security_task_free(tsk);
1850 exit_creds(tsk);
1851 delayacct_tsk_free(tsk);
1852 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
1853 index 4a433608ba74..75f322603d44 100644
1854 --- a/kernel/sched/fair.c
1855 +++ b/kernel/sched/fair.c
1856 @@ -1053,6 +1053,21 @@ struct numa_group {
1857 unsigned long faults[0];
1858 };
1859
1860 +/*
1861 + * For functions that can be called in multiple contexts that permit reading
1862 + * ->numa_group (see struct task_struct for locking rules).
1863 + */
1864 +static struct numa_group *deref_task_numa_group(struct task_struct *p)
1865 +{
1866 + return rcu_dereference_check(p->numa_group, p == current ||
1867 + (lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu)));
1868 +}
1869 +
1870 +static struct numa_group *deref_curr_numa_group(struct task_struct *p)
1871 +{
1872 + return rcu_dereference_protected(p->numa_group, p == current);
1873 +}
1874 +
1875 static inline unsigned long group_faults_priv(struct numa_group *ng);
1876 static inline unsigned long group_faults_shared(struct numa_group *ng);
1877
1878 @@ -1096,10 +1111,12 @@ static unsigned int task_scan_start(struct task_struct *p)
1879 {
1880 unsigned long smin = task_scan_min(p);
1881 unsigned long period = smin;
1882 + struct numa_group *ng;
1883
1884 /* Scale the maximum scan period with the amount of shared memory. */
1885 - if (p->numa_group) {
1886 - struct numa_group *ng = p->numa_group;
1887 + rcu_read_lock();
1888 + ng = rcu_dereference(p->numa_group);
1889 + if (ng) {
1890 unsigned long shared = group_faults_shared(ng);
1891 unsigned long private = group_faults_priv(ng);
1892
1893 @@ -1107,6 +1124,7 @@ static unsigned int task_scan_start(struct task_struct *p)
1894 period *= shared + 1;
1895 period /= private + shared + 1;
1896 }
1897 + rcu_read_unlock();
1898
1899 return max(smin, period);
1900 }
1901 @@ -1115,13 +1133,14 @@ static unsigned int task_scan_max(struct task_struct *p)
1902 {
1903 unsigned long smin = task_scan_min(p);
1904 unsigned long smax;
1905 + struct numa_group *ng;
1906
1907 /* Watch for min being lower than max due to floor calculations */
1908 smax = sysctl_numa_balancing_scan_period_max / task_nr_scan_windows(p);
1909
1910 /* Scale the maximum scan period with the amount of shared memory. */
1911 - if (p->numa_group) {
1912 - struct numa_group *ng = p->numa_group;
1913 + ng = deref_curr_numa_group(p);
1914 + if (ng) {
1915 unsigned long shared = group_faults_shared(ng);
1916 unsigned long private = group_faults_priv(ng);
1917 unsigned long period = smax;
1918 @@ -1153,7 +1172,7 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
1919 p->numa_scan_period = sysctl_numa_balancing_scan_delay;
1920 p->numa_work.next = &p->numa_work;
1921 p->numa_faults = NULL;
1922 - p->numa_group = NULL;
1923 + RCU_INIT_POINTER(p->numa_group, NULL);
1924 p->last_task_numa_placement = 0;
1925 p->last_sum_exec_runtime = 0;
1926
1927 @@ -1200,7 +1219,16 @@ static void account_numa_dequeue(struct rq *rq, struct task_struct *p)
1928
1929 pid_t task_numa_group_id(struct task_struct *p)
1930 {
1931 - return p->numa_group ? p->numa_group->gid : 0;
1932 + struct numa_group *ng;
1933 + pid_t gid = 0;
1934 +
1935 + rcu_read_lock();
1936 + ng = rcu_dereference(p->numa_group);
1937 + if (ng)
1938 + gid = ng->gid;
1939 + rcu_read_unlock();
1940 +
1941 + return gid;
1942 }
1943
1944 /*
1945 @@ -1225,11 +1253,13 @@ static inline unsigned long task_faults(struct task_struct *p, int nid)
1946
1947 static inline unsigned long group_faults(struct task_struct *p, int nid)
1948 {
1949 - if (!p->numa_group)
1950 + struct numa_group *ng = deref_task_numa_group(p);
1951 +
1952 + if (!ng)
1953 return 0;
1954
1955 - return p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 0)] +
1956 - p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 1)];
1957 + return ng->faults[task_faults_idx(NUMA_MEM, nid, 0)] +
1958 + ng->faults[task_faults_idx(NUMA_MEM, nid, 1)];
1959 }
1960
1961 static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
1962 @@ -1367,12 +1397,13 @@ static inline unsigned long task_weight(struct task_struct *p, int nid,
1963 static inline unsigned long group_weight(struct task_struct *p, int nid,
1964 int dist)
1965 {
1966 + struct numa_group *ng = deref_task_numa_group(p);
1967 unsigned long faults, total_faults;
1968
1969 - if (!p->numa_group)
1970 + if (!ng)
1971 return 0;
1972
1973 - total_faults = p->numa_group->total_faults;
1974 + total_faults = ng->total_faults;
1975
1976 if (!total_faults)
1977 return 0;
1978 @@ -1386,7 +1417,7 @@ static inline unsigned long group_weight(struct task_struct *p, int nid,
1979 bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
1980 int src_nid, int dst_cpu)
1981 {
1982 - struct numa_group *ng = p->numa_group;
1983 + struct numa_group *ng = deref_curr_numa_group(p);
1984 int dst_nid = cpu_to_node(dst_cpu);
1985 int last_cpupid, this_cpupid;
1986
1987 @@ -1592,13 +1623,14 @@ static bool load_too_imbalanced(long src_load, long dst_load,
1988 static void task_numa_compare(struct task_numa_env *env,
1989 long taskimp, long groupimp, bool maymove)
1990 {
1991 + struct numa_group *cur_ng, *p_ng = deref_curr_numa_group(env->p);
1992 struct rq *dst_rq = cpu_rq(env->dst_cpu);
1993 + long imp = p_ng ? groupimp : taskimp;
1994 struct task_struct *cur;
1995 long src_load, dst_load;
1996 - long load;
1997 - long imp = env->p->numa_group ? groupimp : taskimp;
1998 - long moveimp = imp;
1999 int dist = env->dist;
2000 + long moveimp = imp;
2001 + long load;
2002
2003 if (READ_ONCE(dst_rq->numa_migrate_on))
2004 return;
2005 @@ -1637,21 +1669,22 @@ static void task_numa_compare(struct task_numa_env *env,
2006 * If dst and source tasks are in the same NUMA group, or not
2007 * in any group then look only at task weights.
2008 */
2009 - if (cur->numa_group == env->p->numa_group) {
2010 + cur_ng = rcu_dereference(cur->numa_group);
2011 + if (cur_ng == p_ng) {
2012 imp = taskimp + task_weight(cur, env->src_nid, dist) -
2013 task_weight(cur, env->dst_nid, dist);
2014 /*
2015 * Add some hysteresis to prevent swapping the
2016 * tasks within a group over tiny differences.
2017 */
2018 - if (cur->numa_group)
2019 + if (cur_ng)
2020 imp -= imp / 16;
2021 } else {
2022 /*
2023 * Compare the group weights. If a task is all by itself
2024 * (not part of a group), use the task weight instead.
2025 */
2026 - if (cur->numa_group && env->p->numa_group)
2027 + if (cur_ng && p_ng)
2028 imp += group_weight(cur, env->src_nid, dist) -
2029 group_weight(cur, env->dst_nid, dist);
2030 else
2031 @@ -1749,11 +1782,12 @@ static int task_numa_migrate(struct task_struct *p)
2032 .best_imp = 0,
2033 .best_cpu = -1,
2034 };
2035 + unsigned long taskweight, groupweight;
2036 struct sched_domain *sd;
2037 + long taskimp, groupimp;
2038 + struct numa_group *ng;
2039 struct rq *best_rq;
2040 - unsigned long taskweight, groupweight;
2041 int nid, ret, dist;
2042 - long taskimp, groupimp;
2043
2044 /*
2045 * Pick the lowest SD_NUMA domain, as that would have the smallest
2046 @@ -1799,7 +1833,8 @@ static int task_numa_migrate(struct task_struct *p)
2047 * multiple NUMA nodes; in order to better consolidate the group,
2048 * we need to check other locations.
2049 */
2050 - if (env.best_cpu == -1 || (p->numa_group && p->numa_group->active_nodes > 1)) {
2051 + ng = deref_curr_numa_group(p);
2052 + if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) {
2053 for_each_online_node(nid) {
2054 if (nid == env.src_nid || nid == p->numa_preferred_nid)
2055 continue;
2056 @@ -1832,7 +1867,7 @@ static int task_numa_migrate(struct task_struct *p)
2057 * A task that migrated to a second choice node will be better off
2058 * trying for a better one later. Do not set the preferred node here.
2059 */
2060 - if (p->numa_group) {
2061 + if (ng) {
2062 if (env.best_cpu == -1)
2063 nid = env.src_nid;
2064 else
2065 @@ -2127,6 +2162,7 @@ static void task_numa_placement(struct task_struct *p)
2066 unsigned long total_faults;
2067 u64 runtime, period;
2068 spinlock_t *group_lock = NULL;
2069 + struct numa_group *ng;
2070
2071 /*
2072 * The p->mm->numa_scan_seq field gets updated without
2073 @@ -2144,8 +2180,9 @@ static void task_numa_placement(struct task_struct *p)
2074 runtime = numa_get_avg_runtime(p, &period);
2075
2076 /* If the task is part of a group prevent parallel updates to group stats */
2077 - if (p->numa_group) {
2078 - group_lock = &p->numa_group->lock;
2079 + ng = deref_curr_numa_group(p);
2080 + if (ng) {
2081 + group_lock = &ng->lock;
2082 spin_lock_irq(group_lock);
2083 }
2084
2085 @@ -2186,7 +2223,7 @@ static void task_numa_placement(struct task_struct *p)
2086 p->numa_faults[cpu_idx] += f_diff;
2087 faults += p->numa_faults[mem_idx];
2088 p->total_numa_faults += diff;
2089 - if (p->numa_group) {
2090 + if (ng) {
2091 /*
2092 * safe because we can only change our own group
2093 *
2094 @@ -2194,14 +2231,14 @@ static void task_numa_placement(struct task_struct *p)
2095 * nid and priv in a specific region because it
2096 * is at the beginning of the numa_faults array.
2097 */
2098 - p->numa_group->faults[mem_idx] += diff;
2099 - p->numa_group->faults_cpu[mem_idx] += f_diff;
2100 - p->numa_group->total_faults += diff;
2101 - group_faults += p->numa_group->faults[mem_idx];
2102 + ng->faults[mem_idx] += diff;
2103 + ng->faults_cpu[mem_idx] += f_diff;
2104 + ng->total_faults += diff;
2105 + group_faults += ng->faults[mem_idx];
2106 }
2107 }
2108
2109 - if (!p->numa_group) {
2110 + if (!ng) {
2111 if (faults > max_faults) {
2112 max_faults = faults;
2113 max_nid = nid;
2114 @@ -2212,8 +2249,8 @@ static void task_numa_placement(struct task_struct *p)
2115 }
2116 }
2117
2118 - if (p->numa_group) {
2119 - numa_group_count_active_nodes(p->numa_group);
2120 + if (ng) {
2121 + numa_group_count_active_nodes(ng);
2122 spin_unlock_irq(group_lock);
2123 max_nid = preferred_group_nid(p, max_nid);
2124 }
2125 @@ -2247,7 +2284,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
2126 int cpu = cpupid_to_cpu(cpupid);
2127 int i;
2128
2129 - if (unlikely(!p->numa_group)) {
2130 + if (unlikely(!deref_curr_numa_group(p))) {
2131 unsigned int size = sizeof(struct numa_group) +
2132 4*nr_node_ids*sizeof(unsigned long);
2133
2134 @@ -2283,7 +2320,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
2135 if (!grp)
2136 goto no_join;
2137
2138 - my_grp = p->numa_group;
2139 + my_grp = deref_curr_numa_group(p);
2140 if (grp == my_grp)
2141 goto no_join;
2142
2143 @@ -2345,13 +2382,24 @@ no_join:
2144 return;
2145 }
2146
2147 -void task_numa_free(struct task_struct *p)
2148 +/*
2149 + * Get rid of NUMA staticstics associated with a task (either current or dead).
2150 + * If @final is set, the task is dead and has reached refcount zero, so we can
2151 + * safely free all relevant data structures. Otherwise, there might be
2152 + * concurrent reads from places like load balancing and procfs, and we should
2153 + * reset the data back to default state without freeing ->numa_faults.
2154 + */
2155 +void task_numa_free(struct task_struct *p, bool final)
2156 {
2157 - struct numa_group *grp = p->numa_group;
2158 - void *numa_faults = p->numa_faults;
2159 + /* safe: p either is current or is being freed by current */
2160 + struct numa_group *grp = rcu_dereference_raw(p->numa_group);
2161 + unsigned long *numa_faults = p->numa_faults;
2162 unsigned long flags;
2163 int i;
2164
2165 + if (!numa_faults)
2166 + return;
2167 +
2168 if (grp) {
2169 spin_lock_irqsave(&grp->lock, flags);
2170 for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
2171 @@ -2364,8 +2412,14 @@ void task_numa_free(struct task_struct *p)
2172 put_numa_group(grp);
2173 }
2174
2175 - p->numa_faults = NULL;
2176 - kfree(numa_faults);
2177 + if (final) {
2178 + p->numa_faults = NULL;
2179 + kfree(numa_faults);
2180 + } else {
2181 + p->total_numa_faults = 0;
2182 + for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
2183 + numa_faults[i] = 0;
2184 + }
2185 }
2186
2187 /*
2188 @@ -2418,7 +2472,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
2189 * actively using should be counted as local. This allows the
2190 * scan rate to slow down when a workload has settled down.
2191 */
2192 - ng = p->numa_group;
2193 + ng = deref_curr_numa_group(p);
2194 if (!priv && !local && ng && ng->active_nodes > 1 &&
2195 numa_is_active_node(cpu_node, ng) &&
2196 numa_is_active_node(mem_node, ng))
2197 @@ -10218,18 +10272,22 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m)
2198 {
2199 int node;
2200 unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0;
2201 + struct numa_group *ng;
2202
2203 + rcu_read_lock();
2204 + ng = rcu_dereference(p->numa_group);
2205 for_each_online_node(node) {
2206 if (p->numa_faults) {
2207 tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)];
2208 tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)];
2209 }
2210 - if (p->numa_group) {
2211 - gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)],
2212 - gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)];
2213 + if (ng) {
2214 + gsf = ng->faults[task_faults_idx(NUMA_MEM, node, 0)],
2215 + gpf = ng->faults[task_faults_idx(NUMA_MEM, node, 1)];
2216 }
2217 print_numa_stats(m, node, tsf, tpf, gsf, gpf);
2218 }
2219 + rcu_read_unlock();
2220 }
2221 #endif /* CONFIG_NUMA_BALANCING */
2222 #endif /* CONFIG_SCHED_DEBUG */
2223 diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
2224 index c248e0dccbe1..67ef9d853d90 100644
2225 --- a/net/ipv4/ip_tunnel_core.c
2226 +++ b/net/ipv4/ip_tunnel_core.c
2227 @@ -89,9 +89,12 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
2228 __ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1);
2229
2230 err = ip_local_out(net, sk, skb);
2231 - if (unlikely(net_xmit_eval(err)))
2232 - pkt_len = 0;
2233 - iptunnel_xmit_stats(dev, pkt_len);
2234 +
2235 + if (dev) {
2236 + if (unlikely(net_xmit_eval(err)))
2237 + pkt_len = 0;
2238 + iptunnel_xmit_stats(dev, pkt_len);
2239 + }
2240 }
2241 EXPORT_SYMBOL_GPL(iptunnel_xmit);
2242
2243 diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
2244 index ab27a2872935..2e30bf197583 100644
2245 --- a/net/vmw_vsock/af_vsock.c
2246 +++ b/net/vmw_vsock/af_vsock.c
2247 @@ -281,7 +281,8 @@ EXPORT_SYMBOL_GPL(vsock_insert_connected);
2248 void vsock_remove_bound(struct vsock_sock *vsk)
2249 {
2250 spin_lock_bh(&vsock_table_lock);
2251 - __vsock_remove_bound(vsk);
2252 + if (__vsock_in_bound_table(vsk))
2253 + __vsock_remove_bound(vsk);
2254 spin_unlock_bh(&vsock_table_lock);
2255 }
2256 EXPORT_SYMBOL_GPL(vsock_remove_bound);
2257 @@ -289,7 +290,8 @@ EXPORT_SYMBOL_GPL(vsock_remove_bound);
2258 void vsock_remove_connected(struct vsock_sock *vsk)
2259 {
2260 spin_lock_bh(&vsock_table_lock);
2261 - __vsock_remove_connected(vsk);
2262 + if (__vsock_in_connected_table(vsk))
2263 + __vsock_remove_connected(vsk);
2264 spin_unlock_bh(&vsock_table_lock);
2265 }
2266 EXPORT_SYMBOL_GPL(vsock_remove_connected);
2267 @@ -325,35 +327,10 @@ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
2268 }
2269 EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
2270
2271 -static bool vsock_in_bound_table(struct vsock_sock *vsk)
2272 -{
2273 - bool ret;
2274 -
2275 - spin_lock_bh(&vsock_table_lock);
2276 - ret = __vsock_in_bound_table(vsk);
2277 - spin_unlock_bh(&vsock_table_lock);
2278 -
2279 - return ret;
2280 -}
2281 -
2282 -static bool vsock_in_connected_table(struct vsock_sock *vsk)
2283 -{
2284 - bool ret;
2285 -
2286 - spin_lock_bh(&vsock_table_lock);
2287 - ret = __vsock_in_connected_table(vsk);
2288 - spin_unlock_bh(&vsock_table_lock);
2289 -
2290 - return ret;
2291 -}
2292 -
2293 void vsock_remove_sock(struct vsock_sock *vsk)
2294 {
2295 - if (vsock_in_bound_table(vsk))
2296 - vsock_remove_bound(vsk);
2297 -
2298 - if (vsock_in_connected_table(vsk))
2299 - vsock_remove_connected(vsk);
2300 + vsock_remove_bound(vsk);
2301 + vsock_remove_connected(vsk);
2302 }
2303 EXPORT_SYMBOL_GPL(vsock_remove_sock);
2304
2305 @@ -484,8 +461,7 @@ static void vsock_pending_work(struct work_struct *work)
2306 * incoming packets can't find this socket, and to reduce the reference
2307 * count.
2308 */
2309 - if (vsock_in_connected_table(vsk))
2310 - vsock_remove_connected(vsk);
2311 + vsock_remove_connected(vsk);
2312
2313 sk->sk_state = TCP_CLOSE;
2314
2315 diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
2316 index b131561a9469..9c7da811d130 100644
2317 --- a/net/vmw_vsock/hyperv_transport.c
2318 +++ b/net/vmw_vsock/hyperv_transport.c
2319 @@ -35,6 +35,9 @@
2320 /* The MTU is 16KB per the host side's design */
2321 #define HVS_MTU_SIZE (1024 * 16)
2322
2323 +/* How long to wait for graceful shutdown of a connection */
2324 +#define HVS_CLOSE_TIMEOUT (8 * HZ)
2325 +
2326 struct vmpipe_proto_header {
2327 u32 pkt_type;
2328 u32 data_size;
2329 @@ -290,19 +293,32 @@ static void hvs_channel_cb(void *ctx)
2330 sk->sk_write_space(sk);
2331 }
2332
2333 -static void hvs_close_connection(struct vmbus_channel *chan)
2334 +static void hvs_do_close_lock_held(struct vsock_sock *vsk,
2335 + bool cancel_timeout)
2336 {
2337 - struct sock *sk = get_per_channel_state(chan);
2338 - struct vsock_sock *vsk = vsock_sk(sk);
2339 -
2340 - lock_sock(sk);
2341 + struct sock *sk = sk_vsock(vsk);
2342
2343 - sk->sk_state = TCP_CLOSE;
2344 sock_set_flag(sk, SOCK_DONE);
2345 - vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
2346 -
2347 + vsk->peer_shutdown = SHUTDOWN_MASK;
2348 + if (vsock_stream_has_data(vsk) <= 0)
2349 + sk->sk_state = TCP_CLOSING;
2350 sk->sk_state_change(sk);
2351 + if (vsk->close_work_scheduled &&
2352 + (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
2353 + vsk->close_work_scheduled = false;
2354 + vsock_remove_sock(vsk);
2355
2356 + /* Release the reference taken while scheduling the timeout */
2357 + sock_put(sk);
2358 + }
2359 +}
2360 +
2361 +static void hvs_close_connection(struct vmbus_channel *chan)
2362 +{
2363 + struct sock *sk = get_per_channel_state(chan);
2364 +
2365 + lock_sock(sk);
2366 + hvs_do_close_lock_held(vsock_sk(sk), true);
2367 release_sock(sk);
2368 }
2369
2370 @@ -445,50 +461,80 @@ static int hvs_connect(struct vsock_sock *vsk)
2371 return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id);
2372 }
2373
2374 +static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode)
2375 +{
2376 + struct vmpipe_proto_header hdr;
2377 +
2378 + if (hvs->fin_sent || !hvs->chan)
2379 + return;
2380 +
2381 + /* It can't fail: see hvs_channel_writable_bytes(). */
2382 + (void)hvs_send_data(hvs->chan, (struct hvs_send_buf *)&hdr, 0);
2383 + hvs->fin_sent = true;
2384 +}
2385 +
2386 static int hvs_shutdown(struct vsock_sock *vsk, int mode)
2387 {
2388 struct sock *sk = sk_vsock(vsk);
2389 - struct vmpipe_proto_header hdr;
2390 - struct hvs_send_buf *send_buf;
2391 - struct hvsock *hvs;
2392
2393 if (!(mode & SEND_SHUTDOWN))
2394 return 0;
2395
2396 lock_sock(sk);
2397 + hvs_shutdown_lock_held(vsk->trans, mode);
2398 + release_sock(sk);
2399 + return 0;
2400 +}
2401
2402 - hvs = vsk->trans;
2403 - if (hvs->fin_sent)
2404 - goto out;
2405 -
2406 - send_buf = (struct hvs_send_buf *)&hdr;
2407 +static void hvs_close_timeout(struct work_struct *work)
2408 +{
2409 + struct vsock_sock *vsk =
2410 + container_of(work, struct vsock_sock, close_work.work);
2411 + struct sock *sk = sk_vsock(vsk);
2412
2413 - /* It can't fail: see hvs_channel_writable_bytes(). */
2414 - (void)hvs_send_data(hvs->chan, send_buf, 0);
2415 + sock_hold(sk);
2416 + lock_sock(sk);
2417 + if (!sock_flag(sk, SOCK_DONE))
2418 + hvs_do_close_lock_held(vsk, false);
2419
2420 - hvs->fin_sent = true;
2421 -out:
2422 + vsk->close_work_scheduled = false;
2423 release_sock(sk);
2424 - return 0;
2425 + sock_put(sk);
2426 }
2427
2428 -static void hvs_release(struct vsock_sock *vsk)
2429 +/* Returns true, if it is safe to remove socket; false otherwise */
2430 +static bool hvs_close_lock_held(struct vsock_sock *vsk)
2431 {
2432 struct sock *sk = sk_vsock(vsk);
2433 - struct hvsock *hvs = vsk->trans;
2434 - struct vmbus_channel *chan;
2435
2436 - lock_sock(sk);
2437 + if (!(sk->sk_state == TCP_ESTABLISHED ||
2438 + sk->sk_state == TCP_CLOSING))
2439 + return true;
2440
2441 - sk->sk_state = TCP_CLOSING;
2442 - vsock_remove_sock(vsk);
2443 + if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK)
2444 + hvs_shutdown_lock_held(vsk->trans, SHUTDOWN_MASK);
2445
2446 - release_sock(sk);
2447 + if (sock_flag(sk, SOCK_DONE))
2448 + return true;
2449
2450 - chan = hvs->chan;
2451 - if (chan)
2452 - hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN);
2453 + /* This reference will be dropped by the delayed close routine */
2454 + sock_hold(sk);
2455 + INIT_DELAYED_WORK(&vsk->close_work, hvs_close_timeout);
2456 + vsk->close_work_scheduled = true;
2457 + schedule_delayed_work(&vsk->close_work, HVS_CLOSE_TIMEOUT);
2458 + return false;
2459 +}
2460
2461 +static void hvs_release(struct vsock_sock *vsk)
2462 +{
2463 + struct sock *sk = sk_vsock(vsk);
2464 + bool remove_sock;
2465 +
2466 + lock_sock(sk);
2467 + remove_sock = hvs_close_lock_held(vsk);
2468 + release_sock(sk);
2469 + if (remove_sock)
2470 + vsock_remove_sock(vsk);
2471 }
2472
2473 static void hvs_destruct(struct vsock_sock *vsk)