Magellan Linux

Annotation of /trunk/kernel-alx/patches-4.19/0163-4.19.64-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3442 - (hide annotations) (download)
Mon Aug 5 07:52:44 2019 UTC (4 years, 9 months ago) by niro
File size: 78299 byte(s)
-linux-4.19.64
1 niro 3442 diff --git a/Makefile b/Makefile
2     index 8ad77a93de30..203d9e80a315 100644
3     --- a/Makefile
4     +++ b/Makefile
5     @@ -1,7 +1,7 @@
6     # SPDX-License-Identifier: GPL-2.0
7     VERSION = 4
8     PATCHLEVEL = 19
9     -SUBLEVEL = 63
10     +SUBLEVEL = 64
11     EXTRAVERSION =
12     NAME = "People's Front"
13    
14     diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
15     index 1a037b94eba1..cee28a05ee98 100644
16     --- a/arch/arm64/include/asm/compat.h
17     +++ b/arch/arm64/include/asm/compat.h
18     @@ -159,6 +159,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
19     }
20    
21     #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current)))
22     +#define COMPAT_MINSIGSTKSZ 2048
23    
24     static inline void __user *arch_compat_alloc_user_space(long len)
25     {
26     diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
27     index 6394b4f0a69b..f42feab25dcf 100644
28     --- a/arch/sh/boards/Kconfig
29     +++ b/arch/sh/boards/Kconfig
30     @@ -8,27 +8,19 @@ config SH_ALPHA_BOARD
31     bool
32    
33     config SH_DEVICE_TREE
34     - bool "Board Described by Device Tree"
35     + bool
36     select OF
37     select OF_EARLY_FLATTREE
38     select TIMER_OF
39     select COMMON_CLK
40     select GENERIC_CALIBRATE_DELAY
41     - help
42     - Select Board Described by Device Tree to build a kernel that
43     - does not hard-code any board-specific knowledge but instead uses
44     - a device tree blob provided by the boot-loader. You must enable
45     - drivers for any hardware you want to use separately. At this
46     - time, only boards based on the open-hardware J-Core processors
47     - have sufficient driver coverage to use this option; do not
48     - select it if you are using original SuperH hardware.
49    
50     config SH_JCORE_SOC
51     bool "J-Core SoC"
52     - depends on SH_DEVICE_TREE && (CPU_SH2 || CPU_J2)
53     + select SH_DEVICE_TREE
54     select CLKSRC_JCORE_PIT
55     select JCORE_AIC
56     - default y if CPU_J2
57     + depends on CPU_J2
58     help
59     Select this option to include drivers core components of the
60     J-Core SoC, including interrupt controllers and timers.
61     diff --git a/block/blk-core.c b/block/blk-core.c
62     index 9ca703bcfe3b..4a3e1f417880 100644
63     --- a/block/blk-core.c
64     +++ b/block/blk-core.c
65     @@ -421,24 +421,25 @@ void blk_sync_queue(struct request_queue *q)
66     EXPORT_SYMBOL(blk_sync_queue);
67    
68     /**
69     - * blk_set_preempt_only - set QUEUE_FLAG_PREEMPT_ONLY
70     + * blk_set_pm_only - increment pm_only counter
71     * @q: request queue pointer
72     - *
73     - * Returns the previous value of the PREEMPT_ONLY flag - 0 if the flag was not
74     - * set and 1 if the flag was already set.
75     */
76     -int blk_set_preempt_only(struct request_queue *q)
77     +void blk_set_pm_only(struct request_queue *q)
78     {
79     - return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
80     + atomic_inc(&q->pm_only);
81     }
82     -EXPORT_SYMBOL_GPL(blk_set_preempt_only);
83     +EXPORT_SYMBOL_GPL(blk_set_pm_only);
84    
85     -void blk_clear_preempt_only(struct request_queue *q)
86     +void blk_clear_pm_only(struct request_queue *q)
87     {
88     - blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
89     - wake_up_all(&q->mq_freeze_wq);
90     + int pm_only;
91     +
92     + pm_only = atomic_dec_return(&q->pm_only);
93     + WARN_ON_ONCE(pm_only < 0);
94     + if (pm_only == 0)
95     + wake_up_all(&q->mq_freeze_wq);
96     }
97     -EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
98     +EXPORT_SYMBOL_GPL(blk_clear_pm_only);
99    
100     /**
101     * __blk_run_queue_uncond - run a queue whether or not it has been stopped
102     @@ -916,7 +917,7 @@ EXPORT_SYMBOL(blk_alloc_queue);
103     */
104     int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
105     {
106     - const bool preempt = flags & BLK_MQ_REQ_PREEMPT;
107     + const bool pm = flags & BLK_MQ_REQ_PREEMPT;
108    
109     while (true) {
110     bool success = false;
111     @@ -924,11 +925,11 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
112     rcu_read_lock();
113     if (percpu_ref_tryget_live(&q->q_usage_counter)) {
114     /*
115     - * The code that sets the PREEMPT_ONLY flag is
116     - * responsible for ensuring that that flag is globally
117     - * visible before the queue is unfrozen.
118     + * The code that increments the pm_only counter is
119     + * responsible for ensuring that that counter is
120     + * globally visible before the queue is unfrozen.
121     */
122     - if (preempt || !blk_queue_preempt_only(q)) {
123     + if (pm || !blk_queue_pm_only(q)) {
124     success = true;
125     } else {
126     percpu_ref_put(&q->q_usage_counter);
127     @@ -953,7 +954,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
128    
129     wait_event(q->mq_freeze_wq,
130     (atomic_read(&q->mq_freeze_depth) == 0 &&
131     - (preempt || !blk_queue_preempt_only(q))) ||
132     + (pm || !blk_queue_pm_only(q))) ||
133     blk_queue_dying(q));
134     if (blk_queue_dying(q))
135     return -ENODEV;
136     diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
137     index cb1e6cf7ac48..a5ea86835fcb 100644
138     --- a/block/blk-mq-debugfs.c
139     +++ b/block/blk-mq-debugfs.c
140     @@ -102,6 +102,14 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags,
141     return 0;
142     }
143    
144     +static int queue_pm_only_show(void *data, struct seq_file *m)
145     +{
146     + struct request_queue *q = data;
147     +
148     + seq_printf(m, "%d\n", atomic_read(&q->pm_only));
149     + return 0;
150     +}
151     +
152     #define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name
153     static const char *const blk_queue_flag_name[] = {
154     QUEUE_FLAG_NAME(QUEUED),
155     @@ -132,7 +140,6 @@ static const char *const blk_queue_flag_name[] = {
156     QUEUE_FLAG_NAME(REGISTERED),
157     QUEUE_FLAG_NAME(SCSI_PASSTHROUGH),
158     QUEUE_FLAG_NAME(QUIESCED),
159     - QUEUE_FLAG_NAME(PREEMPT_ONLY),
160     };
161     #undef QUEUE_FLAG_NAME
162    
163     @@ -209,6 +216,7 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf,
164     static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
165     { "poll_stat", 0400, queue_poll_stat_show },
166     { "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops },
167     + { "pm_only", 0600, queue_pm_only_show, NULL },
168     { "state", 0600, queue_state_show, queue_state_write },
169     { "write_hints", 0600, queue_write_hint_show, queue_write_hint_store },
170     { "zone_wlock", 0400, queue_zone_wlock_show, NULL },
171     diff --git a/drivers/android/binder.c b/drivers/android/binder.c
172     index 1e0e438f079f..6e04e7a707a1 100644
173     --- a/drivers/android/binder.c
174     +++ b/drivers/android/binder.c
175     @@ -1960,8 +1960,18 @@ static struct binder_thread *binder_get_txn_from_and_acq_inner(
176    
177     static void binder_free_transaction(struct binder_transaction *t)
178     {
179     - if (t->buffer)
180     - t->buffer->transaction = NULL;
181     + struct binder_proc *target_proc = t->to_proc;
182     +
183     + if (target_proc) {
184     + binder_inner_proc_lock(target_proc);
185     + if (t->buffer)
186     + t->buffer->transaction = NULL;
187     + binder_inner_proc_unlock(target_proc);
188     + }
189     + /*
190     + * If the transaction has no target_proc, then
191     + * t->buffer->transaction has already been cleared.
192     + */
193     kfree(t);
194     binder_stats_deleted(BINDER_STAT_TRANSACTION);
195     }
196     @@ -3484,10 +3494,12 @@ static int binder_thread_write(struct binder_proc *proc,
197     buffer->debug_id,
198     buffer->transaction ? "active" : "finished");
199    
200     + binder_inner_proc_lock(proc);
201     if (buffer->transaction) {
202     buffer->transaction->buffer = NULL;
203     buffer->transaction = NULL;
204     }
205     + binder_inner_proc_unlock(proc);
206     if (buffer->async_transaction && buffer->target_node) {
207     struct binder_node *buf_node;
208     struct binder_work *w;
209     diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c
210     index d568fbd94d6c..20235925344d 100644
211     --- a/drivers/bluetooth/hci_ath.c
212     +++ b/drivers/bluetooth/hci_ath.c
213     @@ -112,6 +112,9 @@ static int ath_open(struct hci_uart *hu)
214    
215     BT_DBG("hu %p", hu);
216    
217     + if (!hci_uart_has_flow_control(hu))
218     + return -EOPNOTSUPP;
219     +
220     ath = kzalloc(sizeof(*ath), GFP_KERNEL);
221     if (!ath)
222     return -ENOMEM;
223     diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
224     index 800132369134..aa6b7ed9fdf1 100644
225     --- a/drivers/bluetooth/hci_bcm.c
226     +++ b/drivers/bluetooth/hci_bcm.c
227     @@ -369,6 +369,9 @@ static int bcm_open(struct hci_uart *hu)
228    
229     bt_dev_dbg(hu->hdev, "hu %p", hu);
230    
231     + if (!hci_uart_has_flow_control(hu))
232     + return -EOPNOTSUPP;
233     +
234     bcm = kzalloc(sizeof(*bcm), GFP_KERNEL);
235     if (!bcm)
236     return -ENOMEM;
237     diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c
238     index 46ace321bf60..e9228520e4c7 100644
239     --- a/drivers/bluetooth/hci_intel.c
240     +++ b/drivers/bluetooth/hci_intel.c
241     @@ -406,6 +406,9 @@ static int intel_open(struct hci_uart *hu)
242    
243     BT_DBG("hu %p", hu);
244    
245     + if (!hci_uart_has_flow_control(hu))
246     + return -EOPNOTSUPP;
247     +
248     intel = kzalloc(sizeof(*intel), GFP_KERNEL);
249     if (!intel)
250     return -ENOMEM;
251     diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
252     index c915daf01a89..efeb8137ec67 100644
253     --- a/drivers/bluetooth/hci_ldisc.c
254     +++ b/drivers/bluetooth/hci_ldisc.c
255     @@ -299,6 +299,19 @@ static int hci_uart_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
256     return 0;
257     }
258    
259     +/* Check the underlying device or tty has flow control support */
260     +bool hci_uart_has_flow_control(struct hci_uart *hu)
261     +{
262     + /* serdev nodes check if the needed operations are present */
263     + if (hu->serdev)
264     + return true;
265     +
266     + if (hu->tty->driver->ops->tiocmget && hu->tty->driver->ops->tiocmset)
267     + return true;
268     +
269     + return false;
270     +}
271     +
272     /* Flow control or un-flow control the device */
273     void hci_uart_set_flow_control(struct hci_uart *hu, bool enable)
274     {
275     diff --git a/drivers/bluetooth/hci_mrvl.c b/drivers/bluetooth/hci_mrvl.c
276     index ffb00669346f..23791df081ba 100644
277     --- a/drivers/bluetooth/hci_mrvl.c
278     +++ b/drivers/bluetooth/hci_mrvl.c
279     @@ -66,6 +66,9 @@ static int mrvl_open(struct hci_uart *hu)
280    
281     BT_DBG("hu %p", hu);
282    
283     + if (!hci_uart_has_flow_control(hu))
284     + return -EOPNOTSUPP;
285     +
286     mrvl = kzalloc(sizeof(*mrvl), GFP_KERNEL);
287     if (!mrvl)
288     return -ENOMEM;
289     diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
290     index 77004c29da08..f96e58de049b 100644
291     --- a/drivers/bluetooth/hci_qca.c
292     +++ b/drivers/bluetooth/hci_qca.c
293     @@ -450,6 +450,9 @@ static int qca_open(struct hci_uart *hu)
294    
295     BT_DBG("hu %p qca_open", hu);
296    
297     + if (!hci_uart_has_flow_control(hu))
298     + return -EOPNOTSUPP;
299     +
300     qca = kzalloc(sizeof(struct qca_data), GFP_KERNEL);
301     if (!qca)
302     return -ENOMEM;
303     diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h
304     index 00cab2fd7a1b..067a610f1372 100644
305     --- a/drivers/bluetooth/hci_uart.h
306     +++ b/drivers/bluetooth/hci_uart.h
307     @@ -118,6 +118,7 @@ int hci_uart_tx_wakeup(struct hci_uart *hu);
308     int hci_uart_init_ready(struct hci_uart *hu);
309     void hci_uart_init_work(struct work_struct *work);
310     void hci_uart_set_baudrate(struct hci_uart *hu, unsigned int speed);
311     +bool hci_uart_has_flow_control(struct hci_uart *hu);
312     void hci_uart_set_flow_control(struct hci_uart *hu, bool enable);
313     void hci_uart_set_speeds(struct hci_uart *hu, unsigned int init_speed,
314     unsigned int oper_speed);
315     diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
316     index c1439019dd12..b9af2419006f 100644
317     --- a/drivers/iommu/intel-iommu.c
318     +++ b/drivers/iommu/intel-iommu.c
319     @@ -3721,7 +3721,7 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
320    
321     freelist = domain_unmap(domain, start_pfn, last_pfn);
322    
323     - if (intel_iommu_strict) {
324     + if (intel_iommu_strict || !has_iova_flush_queue(&domain->iovad)) {
325     iommu_flush_iotlb_psi(iommu, domain, start_pfn,
326     nrpages, !freelist, 0);
327     /* free iova */
328     diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
329     index 83fe2621effe..60348d707b99 100644
330     --- a/drivers/iommu/iova.c
331     +++ b/drivers/iommu/iova.c
332     @@ -65,9 +65,14 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
333     }
334     EXPORT_SYMBOL_GPL(init_iova_domain);
335    
336     +bool has_iova_flush_queue(struct iova_domain *iovad)
337     +{
338     + return !!iovad->fq;
339     +}
340     +
341     static void free_iova_flush_queue(struct iova_domain *iovad)
342     {
343     - if (!iovad->fq)
344     + if (!has_iova_flush_queue(iovad))
345     return;
346    
347     if (timer_pending(&iovad->fq_timer))
348     @@ -85,13 +90,14 @@ static void free_iova_flush_queue(struct iova_domain *iovad)
349     int init_iova_flush_queue(struct iova_domain *iovad,
350     iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
351     {
352     + struct iova_fq __percpu *queue;
353     int cpu;
354    
355     atomic64_set(&iovad->fq_flush_start_cnt, 0);
356     atomic64_set(&iovad->fq_flush_finish_cnt, 0);
357    
358     - iovad->fq = alloc_percpu(struct iova_fq);
359     - if (!iovad->fq)
360     + queue = alloc_percpu(struct iova_fq);
361     + if (!queue)
362     return -ENOMEM;
363    
364     iovad->flush_cb = flush_cb;
365     @@ -100,13 +106,17 @@ int init_iova_flush_queue(struct iova_domain *iovad,
366     for_each_possible_cpu(cpu) {
367     struct iova_fq *fq;
368    
369     - fq = per_cpu_ptr(iovad->fq, cpu);
370     + fq = per_cpu_ptr(queue, cpu);
371     fq->head = 0;
372     fq->tail = 0;
373    
374     spin_lock_init(&fq->lock);
375     }
376    
377     + smp_wmb();
378     +
379     + iovad->fq = queue;
380     +
381     timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
382     atomic_set(&iovad->fq_timer_on, 0);
383    
384     diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c
385     index 6d05946b445e..060dc7fd66c1 100644
386     --- a/drivers/isdn/hardware/mISDN/hfcsusb.c
387     +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c
388     @@ -1967,6 +1967,9 @@ hfcsusb_probe(struct usb_interface *intf, const struct usb_device_id *id)
389    
390     /* get endpoint base */
391     idx = ((ep_addr & 0x7f) - 1) * 2;
392     + if (idx > 15)
393     + return -EIO;
394     +
395     if (ep_addr & 0x80)
396     idx++;
397     attr = ep->desc.bmAttributes;
398     diff --git a/drivers/media/radio/radio-raremono.c b/drivers/media/radio/radio-raremono.c
399     index 9a5079d64c4a..729600c4a056 100644
400     --- a/drivers/media/radio/radio-raremono.c
401     +++ b/drivers/media/radio/radio-raremono.c
402     @@ -271,6 +271,14 @@ static int vidioc_g_frequency(struct file *file, void *priv,
403     return 0;
404     }
405    
406     +static void raremono_device_release(struct v4l2_device *v4l2_dev)
407     +{
408     + struct raremono_device *radio = to_raremono_dev(v4l2_dev);
409     +
410     + kfree(radio->buffer);
411     + kfree(radio);
412     +}
413     +
414     /* File system interface */
415     static const struct v4l2_file_operations usb_raremono_fops = {
416     .owner = THIS_MODULE,
417     @@ -295,12 +303,14 @@ static int usb_raremono_probe(struct usb_interface *intf,
418     struct raremono_device *radio;
419     int retval = 0;
420    
421     - radio = devm_kzalloc(&intf->dev, sizeof(struct raremono_device), GFP_KERNEL);
422     - if (radio)
423     - radio->buffer = devm_kmalloc(&intf->dev, BUFFER_LENGTH, GFP_KERNEL);
424     -
425     - if (!radio || !radio->buffer)
426     + radio = kzalloc(sizeof(*radio), GFP_KERNEL);
427     + if (!radio)
428     + return -ENOMEM;
429     + radio->buffer = kmalloc(BUFFER_LENGTH, GFP_KERNEL);
430     + if (!radio->buffer) {
431     + kfree(radio);
432     return -ENOMEM;
433     + }
434    
435     radio->usbdev = interface_to_usbdev(intf);
436     radio->intf = intf;
437     @@ -324,7 +334,8 @@ static int usb_raremono_probe(struct usb_interface *intf,
438     if (retval != 3 ||
439     (get_unaligned_be16(&radio->buffer[1]) & 0xfff) == 0x0242) {
440     dev_info(&intf->dev, "this is not Thanko's Raremono.\n");
441     - return -ENODEV;
442     + retval = -ENODEV;
443     + goto free_mem;
444     }
445    
446     dev_info(&intf->dev, "Thanko's Raremono connected: (%04X:%04X)\n",
447     @@ -333,7 +344,7 @@ static int usb_raremono_probe(struct usb_interface *intf,
448     retval = v4l2_device_register(&intf->dev, &radio->v4l2_dev);
449     if (retval < 0) {
450     dev_err(&intf->dev, "couldn't register v4l2_device\n");
451     - return retval;
452     + goto free_mem;
453     }
454    
455     mutex_init(&radio->lock);
456     @@ -345,6 +356,7 @@ static int usb_raremono_probe(struct usb_interface *intf,
457     radio->vdev.ioctl_ops = &usb_raremono_ioctl_ops;
458     radio->vdev.lock = &radio->lock;
459     radio->vdev.release = video_device_release_empty;
460     + radio->v4l2_dev.release = raremono_device_release;
461    
462     usb_set_intfdata(intf, &radio->v4l2_dev);
463    
464     @@ -360,6 +372,10 @@ static int usb_raremono_probe(struct usb_interface *intf,
465     }
466     dev_err(&intf->dev, "could not register video device\n");
467     v4l2_device_unregister(&radio->v4l2_dev);
468     +
469     +free_mem:
470     + kfree(radio->buffer);
471     + kfree(radio);
472     return retval;
473     }
474    
475     diff --git a/drivers/media/usb/au0828/au0828-core.c b/drivers/media/usb/au0828/au0828-core.c
476     index 257ae0d8cfe2..e3f63299f85c 100644
477     --- a/drivers/media/usb/au0828/au0828-core.c
478     +++ b/drivers/media/usb/au0828/au0828-core.c
479     @@ -623,6 +623,12 @@ static int au0828_usb_probe(struct usb_interface *interface,
480     /* Setup */
481     au0828_card_setup(dev);
482    
483     + /*
484     + * Store the pointer to the au0828_dev so it can be accessed in
485     + * au0828_usb_disconnect
486     + */
487     + usb_set_intfdata(interface, dev);
488     +
489     /* Analog TV */
490     retval = au0828_analog_register(dev, interface);
491     if (retval) {
492     @@ -641,12 +647,6 @@ static int au0828_usb_probe(struct usb_interface *interface,
493     /* Remote controller */
494     au0828_rc_register(dev);
495    
496     - /*
497     - * Store the pointer to the au0828_dev so it can be accessed in
498     - * au0828_usb_disconnect
499     - */
500     - usb_set_intfdata(interface, dev);
501     -
502     pr_info("Registered device AU0828 [%s]\n",
503     dev->board.name == NULL ? "Unset" : dev->board.name);
504    
505     diff --git a/drivers/media/usb/cpia2/cpia2_usb.c b/drivers/media/usb/cpia2/cpia2_usb.c
506     index a771e0a52610..f5b04594e209 100644
507     --- a/drivers/media/usb/cpia2/cpia2_usb.c
508     +++ b/drivers/media/usb/cpia2/cpia2_usb.c
509     @@ -902,7 +902,6 @@ static void cpia2_usb_disconnect(struct usb_interface *intf)
510     cpia2_unregister_camera(cam);
511     v4l2_device_disconnect(&cam->v4l2_dev);
512     mutex_unlock(&cam->v4l2_lock);
513     - v4l2_device_put(&cam->v4l2_dev);
514    
515     if(cam->buffers) {
516     DBG("Wakeup waiting processes\n");
517     @@ -911,6 +910,8 @@ static void cpia2_usb_disconnect(struct usb_interface *intf)
518     wake_up_interruptible(&cam->wq_stream);
519     }
520    
521     + v4l2_device_put(&cam->v4l2_dev);
522     +
523     LOG("CPiA2 camera disconnected.\n");
524     }
525    
526     diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
527     index 673fdca8d2da..fcb201a40920 100644
528     --- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
529     +++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
530     @@ -1680,7 +1680,7 @@ static int pvr2_decoder_enable(struct pvr2_hdw *hdw,int enablefl)
531     }
532     if (!hdw->flag_decoder_missed) {
533     pvr2_trace(PVR2_TRACE_ERROR_LEGS,
534     - "WARNING: No decoder present");
535     + "***WARNING*** No decoder present");
536     hdw->flag_decoder_missed = !0;
537     trace_stbit("flag_decoder_missed",
538     hdw->flag_decoder_missed);
539     @@ -2366,7 +2366,7 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf,
540     if (hdw_desc->flag_is_experimental) {
541     pvr2_trace(PVR2_TRACE_INFO, "**********");
542     pvr2_trace(PVR2_TRACE_INFO,
543     - "WARNING: Support for this device (%s) is experimental.",
544     + "***WARNING*** Support for this device (%s) is experimental.",
545     hdw_desc->description);
546     pvr2_trace(PVR2_TRACE_INFO,
547     "Important functionality might not be entirely working.");
548     diff --git a/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c b/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c
549     index f3003ca05f4b..922c06279663 100644
550     --- a/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c
551     +++ b/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c
552     @@ -343,11 +343,11 @@ static int i2c_hack_cx25840(struct pvr2_hdw *hdw,
553    
554     if ((ret != 0) || (*rdata == 0x04) || (*rdata == 0x0a)) {
555     pvr2_trace(PVR2_TRACE_ERROR_LEGS,
556     - "WARNING: Detected a wedged cx25840 chip; the device will not work.");
557     + "***WARNING*** Detected a wedged cx25840 chip; the device will not work.");
558     pvr2_trace(PVR2_TRACE_ERROR_LEGS,
559     - "WARNING: Try power cycling the pvrusb2 device.");
560     + "***WARNING*** Try power cycling the pvrusb2 device.");
561     pvr2_trace(PVR2_TRACE_ERROR_LEGS,
562     - "WARNING: Disabling further access to the device to prevent other foul-ups.");
563     + "***WARNING*** Disabling further access to the device to prevent other foul-ups.");
564     // This blocks all further communication with the part.
565     hdw->i2c_func[0x44] = NULL;
566     pvr2_hdw_render_useless(hdw);
567     diff --git a/drivers/media/usb/pvrusb2/pvrusb2-std.c b/drivers/media/usb/pvrusb2/pvrusb2-std.c
568     index 6b651f8b54df..37dc299a1ca2 100644
569     --- a/drivers/media/usb/pvrusb2/pvrusb2-std.c
570     +++ b/drivers/media/usb/pvrusb2/pvrusb2-std.c
571     @@ -353,7 +353,7 @@ struct v4l2_standard *pvr2_std_create_enum(unsigned int *countptr,
572     bcnt = pvr2_std_id_to_str(buf,sizeof(buf),fmsk);
573     pvr2_trace(
574     PVR2_TRACE_ERROR_LEGS,
575     - "WARNING: Failed to classify the following standard(s): %.*s",
576     + "***WARNING*** Failed to classify the following standard(s): %.*s",
577     bcnt,buf);
578     }
579    
580     diff --git a/drivers/net/wireless/ath/ath10k/usb.c b/drivers/net/wireless/ath/ath10k/usb.c
581     index d4803ff5a78a..f09a4ad2e9de 100644
582     --- a/drivers/net/wireless/ath/ath10k/usb.c
583     +++ b/drivers/net/wireless/ath/ath10k/usb.c
584     @@ -1025,7 +1025,7 @@ static int ath10k_usb_probe(struct usb_interface *interface,
585     }
586    
587     /* TODO: remove this once USB support is fully implemented */
588     - ath10k_warn(ar, "WARNING: ath10k USB support is incomplete, don't expect anything to work!\n");
589     + ath10k_warn(ar, "Warning: ath10k USB support is incomplete, don't expect anything to work!\n");
590    
591     return 0;
592    
593     diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c
594     index 8febacb8fc54..0951564b6830 100644
595     --- a/drivers/pps/pps.c
596     +++ b/drivers/pps/pps.c
597     @@ -166,6 +166,14 @@ static long pps_cdev_ioctl(struct file *file,
598     pps->params.mode |= PPS_CANWAIT;
599     pps->params.api_version = PPS_API_VERS;
600    
601     + /*
602     + * Clear unused fields of pps_kparams to avoid leaking
603     + * uninitialized data of the PPS_SETPARAMS caller via
604     + * PPS_GETPARAMS
605     + */
606     + pps->params.assert_off_tu.flags = 0;
607     + pps->params.clear_off_tu.flags = 0;
608     +
609     spin_unlock_irq(&pps->lock);
610    
611     break;
612     diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
613     index 32652b2c5e7c..75b926e70076 100644
614     --- a/drivers/scsi/scsi_lib.c
615     +++ b/drivers/scsi/scsi_lib.c
616     @@ -3059,11 +3059,14 @@ scsi_device_quiesce(struct scsi_device *sdev)
617     */
618     WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current);
619    
620     - blk_set_preempt_only(q);
621     + if (sdev->quiesced_by == current)
622     + return 0;
623     +
624     + blk_set_pm_only(q);
625    
626     blk_mq_freeze_queue(q);
627     /*
628     - * Ensure that the effect of blk_set_preempt_only() will be visible
629     + * Ensure that the effect of blk_set_pm_only() will be visible
630     * for percpu_ref_tryget() callers that occur after the queue
631     * unfreeze even if the queue was already frozen before this function
632     * was called. See also https://lwn.net/Articles/573497/.
633     @@ -3076,7 +3079,7 @@ scsi_device_quiesce(struct scsi_device *sdev)
634     if (err == 0)
635     sdev->quiesced_by = current;
636     else
637     - blk_clear_preempt_only(q);
638     + blk_clear_pm_only(q);
639     mutex_unlock(&sdev->state_mutex);
640    
641     return err;
642     @@ -3099,8 +3102,10 @@ void scsi_device_resume(struct scsi_device *sdev)
643     * device deleted during suspend)
644     */
645     mutex_lock(&sdev->state_mutex);
646     - sdev->quiesced_by = NULL;
647     - blk_clear_preempt_only(sdev->request_queue);
648     + if (sdev->quiesced_by) {
649     + sdev->quiesced_by = NULL;
650     + blk_clear_pm_only(sdev->request_queue);
651     + }
652     if (sdev->sdev_state == SDEV_QUIESCE)
653     scsi_device_set_state(sdev, SDEV_RUNNING);
654     mutex_unlock(&sdev->state_mutex);
655     diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
656     index 03614ef64ca4..3f68edde0f03 100644
657     --- a/drivers/usb/dwc2/gadget.c
658     +++ b/drivers/usb/dwc2/gadget.c
659     @@ -3125,6 +3125,7 @@ void dwc2_hsotg_disconnect(struct dwc2_hsotg *hsotg)
660     hsotg->connected = 0;
661     hsotg->test_mode = 0;
662    
663     + /* all endpoints should be shutdown */
664     for (ep = 0; ep < hsotg->num_of_eps; ep++) {
665     if (hsotg->eps_in[ep])
666     kill_all_requests(hsotg, hsotg->eps_in[ep],
667     @@ -3175,6 +3176,7 @@ static void dwc2_hsotg_irq_fifoempty(struct dwc2_hsotg *hsotg, bool periodic)
668     GINTSTS_PTXFEMP | \
669     GINTSTS_RXFLVL)
670    
671     +static int dwc2_hsotg_ep_disable(struct usb_ep *ep);
672     /**
673     * dwc2_hsotg_core_init - issue softreset to the core
674     * @hsotg: The device state
675     @@ -3189,13 +3191,23 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
676     u32 val;
677     u32 usbcfg;
678     u32 dcfg = 0;
679     + int ep;
680    
681     /* Kill any ep0 requests as controller will be reinitialized */
682     kill_all_requests(hsotg, hsotg->eps_out[0], -ECONNRESET);
683    
684     - if (!is_usb_reset)
685     + if (!is_usb_reset) {
686     if (dwc2_core_reset(hsotg, true))
687     return;
688     + } else {
689     + /* all endpoints should be shutdown */
690     + for (ep = 1; ep < hsotg->num_of_eps; ep++) {
691     + if (hsotg->eps_in[ep])
692     + dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep);
693     + if (hsotg->eps_out[ep])
694     + dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep);
695     + }
696     + }
697    
698     /*
699     * we must now enable ep0 ready for host detection and then
700     @@ -3993,7 +4005,6 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep)
701     struct dwc2_hsotg *hsotg = hs_ep->parent;
702     int dir_in = hs_ep->dir_in;
703     int index = hs_ep->index;
704     - unsigned long flags;
705     u32 epctrl_reg;
706     u32 ctrl;
707    
708     @@ -4011,8 +4022,6 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep)
709    
710     epctrl_reg = dir_in ? DIEPCTL(index) : DOEPCTL(index);
711    
712     - spin_lock_irqsave(&hsotg->lock, flags);
713     -
714     ctrl = dwc2_readl(hsotg, epctrl_reg);
715    
716     if (ctrl & DXEPCTL_EPENA)
717     @@ -4035,10 +4044,22 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep)
718     hs_ep->fifo_index = 0;
719     hs_ep->fifo_size = 0;
720    
721     - spin_unlock_irqrestore(&hsotg->lock, flags);
722     return 0;
723     }
724    
725     +static int dwc2_hsotg_ep_disable_lock(struct usb_ep *ep)
726     +{
727     + struct dwc2_hsotg_ep *hs_ep = our_ep(ep);
728     + struct dwc2_hsotg *hsotg = hs_ep->parent;
729     + unsigned long flags;
730     + int ret;
731     +
732     + spin_lock_irqsave(&hsotg->lock, flags);
733     + ret = dwc2_hsotg_ep_disable(ep);
734     + spin_unlock_irqrestore(&hsotg->lock, flags);
735     + return ret;
736     +}
737     +
738     /**
739     * on_list - check request is on the given endpoint
740     * @ep: The endpoint to check.
741     @@ -4186,7 +4207,7 @@ static int dwc2_hsotg_ep_sethalt_lock(struct usb_ep *ep, int value)
742    
743     static const struct usb_ep_ops dwc2_hsotg_ep_ops = {
744     .enable = dwc2_hsotg_ep_enable,
745     - .disable = dwc2_hsotg_ep_disable,
746     + .disable = dwc2_hsotg_ep_disable_lock,
747     .alloc_request = dwc2_hsotg_ep_alloc_request,
748     .free_request = dwc2_hsotg_ep_free_request,
749     .queue = dwc2_hsotg_ep_queue_lock,
750     @@ -4326,9 +4347,9 @@ static int dwc2_hsotg_udc_stop(struct usb_gadget *gadget)
751     /* all endpoints should be shutdown */
752     for (ep = 1; ep < hsotg->num_of_eps; ep++) {
753     if (hsotg->eps_in[ep])
754     - dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep);
755     + dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep);
756     if (hsotg->eps_out[ep])
757     - dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep);
758     + dwc2_hsotg_ep_disable_lock(&hsotg->eps_out[ep]->ep);
759     }
760    
761     spin_lock_irqsave(&hsotg->lock, flags);
762     @@ -4776,9 +4797,9 @@ int dwc2_hsotg_suspend(struct dwc2_hsotg *hsotg)
763    
764     for (ep = 0; ep < hsotg->num_of_eps; ep++) {
765     if (hsotg->eps_in[ep])
766     - dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep);
767     + dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep);
768     if (hsotg->eps_out[ep])
769     - dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep);
770     + dwc2_hsotg_ep_disable_lock(&hsotg->eps_out[ep]->ep);
771     }
772     }
773    
774     diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
775     index ae704658b528..124356dc39e1 100644
776     --- a/drivers/vhost/net.c
777     +++ b/drivers/vhost/net.c
778     @@ -497,12 +497,6 @@ static size_t init_iov_iter(struct vhost_virtqueue *vq, struct iov_iter *iter,
779     return iov_iter_count(iter);
780     }
781    
782     -static bool vhost_exceeds_weight(int pkts, int total_len)
783     -{
784     - return total_len >= VHOST_NET_WEIGHT ||
785     - pkts >= VHOST_NET_PKT_WEIGHT;
786     -}
787     -
788     static int get_tx_bufs(struct vhost_net *net,
789     struct vhost_net_virtqueue *nvq,
790     struct msghdr *msg,
791     @@ -557,7 +551,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
792     int err;
793     int sent_pkts = 0;
794    
795     - for (;;) {
796     + do {
797     bool busyloop_intr = false;
798    
799     head = get_tx_bufs(net, nvq, &msg, &out, &in, &len,
800     @@ -598,11 +592,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
801     err, len);
802     if (++nvq->done_idx >= VHOST_NET_BATCH)
803     vhost_net_signal_used(nvq);
804     - if (vhost_exceeds_weight(++sent_pkts, total_len)) {
805     - vhost_poll_queue(&vq->poll);
806     - break;
807     - }
808     - }
809     + } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
810    
811     vhost_net_signal_used(nvq);
812     }
813     @@ -626,7 +616,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
814     bool zcopy_used;
815     int sent_pkts = 0;
816    
817     - for (;;) {
818     + do {
819     bool busyloop_intr;
820    
821     /* Release DMAs done buffers first */
822     @@ -701,11 +691,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
823     else
824     vhost_zerocopy_signal_used(net, vq);
825     vhost_net_tx_packet(net);
826     - if (unlikely(vhost_exceeds_weight(++sent_pkts, total_len))) {
827     - vhost_poll_queue(&vq->poll);
828     - break;
829     - }
830     - }
831     + } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
832     }
833    
834     /* Expects to be always run from workqueue - which acts as
835     @@ -941,8 +927,11 @@ static void handle_rx(struct vhost_net *net)
836     vq->log : NULL;
837     mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
838    
839     - while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
840     - &busyloop_intr))) {
841     + do {
842     + sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
843     + &busyloop_intr);
844     + if (!sock_len)
845     + break;
846     sock_len += sock_hlen;
847     vhost_len = sock_len + vhost_hlen;
848     headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
849     @@ -1027,14 +1016,11 @@ static void handle_rx(struct vhost_net *net)
850     vhost_log_write(vq, vq_log, log, vhost_len,
851     vq->iov, in);
852     total_len += vhost_len;
853     - if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) {
854     - vhost_poll_queue(&vq->poll);
855     - goto out;
856     - }
857     - }
858     + } while (likely(!vhost_exceeds_weight(vq, ++recv_pkts, total_len)));
859     +
860     if (unlikely(busyloop_intr))
861     vhost_poll_queue(&vq->poll);
862     - else
863     + else if (!sock_len)
864     vhost_net_enable_vq(net, vq);
865     out:
866     vhost_net_signal_used(nvq);
867     @@ -1115,7 +1101,8 @@ static int vhost_net_open(struct inode *inode, struct file *f)
868     vhost_net_buf_init(&n->vqs[i].rxq);
869     }
870     vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
871     - UIO_MAXIOV + VHOST_NET_BATCH);
872     + UIO_MAXIOV + VHOST_NET_BATCH,
873     + VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT);
874    
875     vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev);
876     vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev);
877     diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
878     index 0cfa925be4ec..5e298d9287f1 100644
879     --- a/drivers/vhost/scsi.c
880     +++ b/drivers/vhost/scsi.c
881     @@ -57,6 +57,12 @@
882     #define VHOST_SCSI_PREALLOC_UPAGES 2048
883     #define VHOST_SCSI_PREALLOC_PROT_SGLS 2048
884    
885     +/* Max number of requests before requeueing the job.
886     + * Using this limit prevents one virtqueue from starving others with
887     + * request.
888     + */
889     +#define VHOST_SCSI_WEIGHT 256
890     +
891     struct vhost_scsi_inflight {
892     /* Wait for the flush operation to finish */
893     struct completion comp;
894     @@ -811,7 +817,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
895     u64 tag;
896     u32 exp_data_len, data_direction;
897     unsigned int out = 0, in = 0;
898     - int head, ret, prot_bytes;
899     + int head, ret, prot_bytes, c = 0;
900     size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp);
901     size_t out_size, in_size;
902     u16 lun;
903     @@ -830,7 +836,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
904    
905     vhost_disable_notify(&vs->dev, vq);
906    
907     - for (;;) {
908     + do {
909     head = vhost_get_vq_desc(vq, vq->iov,
910     ARRAY_SIZE(vq->iov), &out, &in,
911     NULL, NULL);
912     @@ -1045,7 +1051,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
913     */
914     INIT_WORK(&cmd->work, vhost_scsi_submission_work);
915     queue_work(vhost_scsi_workqueue, &cmd->work);
916     - }
917     + } while (likely(!vhost_exceeds_weight(vq, ++c, 0)));
918     out:
919     mutex_unlock(&vq->mutex);
920     }
921     @@ -1398,7 +1404,8 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
922     vqs[i] = &vs->vqs[i].vq;
923     vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
924     }
925     - vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV);
926     + vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV,
927     + VHOST_SCSI_WEIGHT, 0);
928    
929     vhost_scsi_init_inflight(vs, NULL);
930    
931     diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
932     index c163bc15976a..0752f8dc47b1 100644
933     --- a/drivers/vhost/vhost.c
934     +++ b/drivers/vhost/vhost.c
935     @@ -413,8 +413,24 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev)
936     vhost_vq_free_iovecs(dev->vqs[i]);
937     }
938    
939     +bool vhost_exceeds_weight(struct vhost_virtqueue *vq,
940     + int pkts, int total_len)
941     +{
942     + struct vhost_dev *dev = vq->dev;
943     +
944     + if ((dev->byte_weight && total_len >= dev->byte_weight) ||
945     + pkts >= dev->weight) {
946     + vhost_poll_queue(&vq->poll);
947     + return true;
948     + }
949     +
950     + return false;
951     +}
952     +EXPORT_SYMBOL_GPL(vhost_exceeds_weight);
953     +
954     void vhost_dev_init(struct vhost_dev *dev,
955     - struct vhost_virtqueue **vqs, int nvqs, int iov_limit)
956     + struct vhost_virtqueue **vqs, int nvqs,
957     + int iov_limit, int weight, int byte_weight)
958     {
959     struct vhost_virtqueue *vq;
960     int i;
961     @@ -428,6 +444,8 @@ void vhost_dev_init(struct vhost_dev *dev,
962     dev->mm = NULL;
963     dev->worker = NULL;
964     dev->iov_limit = iov_limit;
965     + dev->weight = weight;
966     + dev->byte_weight = byte_weight;
967     init_llist_head(&dev->work_list);
968     init_waitqueue_head(&dev->wait);
969     INIT_LIST_HEAD(&dev->read_list);
970     diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
971     index 9490e7ddb340..27a78a9b8cc7 100644
972     --- a/drivers/vhost/vhost.h
973     +++ b/drivers/vhost/vhost.h
974     @@ -171,10 +171,13 @@ struct vhost_dev {
975     struct list_head pending_list;
976     wait_queue_head_t wait;
977     int iov_limit;
978     + int weight;
979     + int byte_weight;
980     };
981    
982     +bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len);
983     void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs,
984     - int nvqs, int iov_limit);
985     + int nvqs, int iov_limit, int weight, int byte_weight);
986     long vhost_dev_set_owner(struct vhost_dev *dev);
987     bool vhost_dev_has_owner(struct vhost_dev *dev);
988     long vhost_dev_check_owner(struct vhost_dev *);
989     diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
990     index e440f87ae1d6..bab495d73195 100644
991     --- a/drivers/vhost/vsock.c
992     +++ b/drivers/vhost/vsock.c
993     @@ -21,6 +21,14 @@
994     #include "vhost.h"
995    
996     #define VHOST_VSOCK_DEFAULT_HOST_CID 2
997     +/* Max number of bytes transferred before requeueing the job.
998     + * Using this limit prevents one virtqueue from starving others. */
999     +#define VHOST_VSOCK_WEIGHT 0x80000
1000     +/* Max number of packets transferred before requeueing the job.
1001     + * Using this limit prevents one virtqueue from starving others with
1002     + * small pkts.
1003     + */
1004     +#define VHOST_VSOCK_PKT_WEIGHT 256
1005    
1006     enum {
1007     VHOST_VSOCK_FEATURES = VHOST_FEATURES,
1008     @@ -78,6 +86,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
1009     struct vhost_virtqueue *vq)
1010     {
1011     struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
1012     + int pkts = 0, total_len = 0;
1013     bool added = false;
1014     bool restart_tx = false;
1015    
1016     @@ -89,7 +98,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
1017     /* Avoid further vmexits, we're already processing the virtqueue */
1018     vhost_disable_notify(&vsock->dev, vq);
1019    
1020     - for (;;) {
1021     + do {
1022     struct virtio_vsock_pkt *pkt;
1023     struct iov_iter iov_iter;
1024     unsigned out, in;
1025     @@ -174,8 +183,9 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
1026     */
1027     virtio_transport_deliver_tap_pkt(pkt);
1028    
1029     + total_len += pkt->len;
1030     virtio_transport_free_pkt(pkt);
1031     - }
1032     + } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
1033     if (added)
1034     vhost_signal(&vsock->dev, vq);
1035    
1036     @@ -350,7 +360,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
1037     struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
1038     dev);
1039     struct virtio_vsock_pkt *pkt;
1040     - int head;
1041     + int head, pkts = 0, total_len = 0;
1042     unsigned int out, in;
1043     bool added = false;
1044    
1045     @@ -360,7 +370,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
1046     goto out;
1047    
1048     vhost_disable_notify(&vsock->dev, vq);
1049     - for (;;) {
1050     + do {
1051     u32 len;
1052    
1053     if (!vhost_vsock_more_replies(vsock)) {
1054     @@ -401,9 +411,11 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
1055     else
1056     virtio_transport_free_pkt(pkt);
1057    
1058     - vhost_add_used(vq, head, sizeof(pkt->hdr) + len);
1059     + len += sizeof(pkt->hdr);
1060     + vhost_add_used(vq, head, len);
1061     + total_len += len;
1062     added = true;
1063     - }
1064     + } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
1065    
1066     no_more_replies:
1067     if (added)
1068     @@ -531,7 +543,9 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
1069     vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
1070     vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
1071    
1072     - vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), UIO_MAXIOV);
1073     + vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
1074     + UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
1075     + VHOST_VSOCK_WEIGHT);
1076    
1077     file->private_data = vsock;
1078     spin_lock_init(&vsock->send_pkt_list_lock);
1079     diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
1080     index c7542e8dd096..a11fa0b6b34d 100644
1081     --- a/fs/ceph/caps.c
1082     +++ b/fs/ceph/caps.c
1083     @@ -1237,20 +1237,23 @@ static int send_cap_msg(struct cap_msg_args *arg)
1084     }
1085    
1086     /*
1087     - * Queue cap releases when an inode is dropped from our cache. Since
1088     - * inode is about to be destroyed, there is no need for i_ceph_lock.
1089     + * Queue cap releases when an inode is dropped from our cache.
1090     */
1091     void ceph_queue_caps_release(struct inode *inode)
1092     {
1093     struct ceph_inode_info *ci = ceph_inode(inode);
1094     struct rb_node *p;
1095    
1096     + /* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU)
1097     + * may call __ceph_caps_issued_mask() on a freeing inode. */
1098     + spin_lock(&ci->i_ceph_lock);
1099     p = rb_first(&ci->i_caps);
1100     while (p) {
1101     struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
1102     p = rb_next(p);
1103     __ceph_remove_cap(cap, true);
1104     }
1105     + spin_unlock(&ci->i_ceph_lock);
1106     }
1107    
1108     /*
1109     diff --git a/fs/exec.c b/fs/exec.c
1110     index 433b1257694a..561ea64829ec 100644
1111     --- a/fs/exec.c
1112     +++ b/fs/exec.c
1113     @@ -1826,7 +1826,7 @@ static int __do_execve_file(int fd, struct filename *filename,
1114     membarrier_execve(current);
1115     rseq_execve(current);
1116     acct_update_integrals(current);
1117     - task_numa_free(current);
1118     + task_numa_free(current, false);
1119     free_bprm(bprm);
1120     kfree(pathbuf);
1121     if (filename)
1122     diff --git a/fs/nfs/client.c b/fs/nfs/client.c
1123     index c092661147b3..0a2b59c1ecb3 100644
1124     --- a/fs/nfs/client.c
1125     +++ b/fs/nfs/client.c
1126     @@ -416,10 +416,10 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
1127     clp = nfs_match_client(cl_init);
1128     if (clp) {
1129     spin_unlock(&nn->nfs_client_lock);
1130     - if (IS_ERR(clp))
1131     - return clp;
1132     if (new)
1133     new->rpc_ops->free_client(new);
1134     + if (IS_ERR(clp))
1135     + return clp;
1136     return nfs_found_client(cl_init, clp);
1137     }
1138     if (new) {
1139     diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
1140     index 8bfaa658b2c1..71b2e390becf 100644
1141     --- a/fs/nfs/dir.c
1142     +++ b/fs/nfs/dir.c
1143     @@ -1072,6 +1072,100 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
1144     return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
1145     }
1146    
1147     +static int
1148     +nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
1149     + struct inode *inode, int error)
1150     +{
1151     + switch (error) {
1152     + case 1:
1153     + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
1154     + __func__, dentry);
1155     + return 1;
1156     + case 0:
1157     + nfs_mark_for_revalidate(dir);
1158     + if (inode && S_ISDIR(inode->i_mode)) {
1159     + /* Purge readdir caches. */
1160     + nfs_zap_caches(inode);
1161     + /*
1162     + * We can't d_drop the root of a disconnected tree:
1163     + * its d_hash is on the s_anon list and d_drop() would hide
1164     + * it from shrink_dcache_for_unmount(), leading to busy
1165     + * inodes on unmount and further oopses.
1166     + */
1167     + if (IS_ROOT(dentry))
1168     + return 1;
1169     + }
1170     + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
1171     + __func__, dentry);
1172     + return 0;
1173     + }
1174     + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
1175     + __func__, dentry, error);
1176     + return error;
1177     +}
1178     +
1179     +static int
1180     +nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry,
1181     + unsigned int flags)
1182     +{
1183     + int ret = 1;
1184     + if (nfs_neg_need_reval(dir, dentry, flags)) {
1185     + if (flags & LOOKUP_RCU)
1186     + return -ECHILD;
1187     + ret = 0;
1188     + }
1189     + return nfs_lookup_revalidate_done(dir, dentry, NULL, ret);
1190     +}
1191     +
1192     +static int
1193     +nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
1194     + struct inode *inode)
1195     +{
1196     + nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1197     + return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
1198     +}
1199     +
1200     +static int
1201     +nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
1202     + struct inode *inode)
1203     +{
1204     + struct nfs_fh *fhandle;
1205     + struct nfs_fattr *fattr;
1206     + struct nfs4_label *label;
1207     + int ret;
1208     +
1209     + ret = -ENOMEM;
1210     + fhandle = nfs_alloc_fhandle();
1211     + fattr = nfs_alloc_fattr();
1212     + label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
1213     + if (fhandle == NULL || fattr == NULL || IS_ERR(label))
1214     + goto out;
1215     +
1216     + ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
1217     + if (ret < 0) {
1218     + if (ret == -ESTALE || ret == -ENOENT)
1219     + ret = 0;
1220     + goto out;
1221     + }
1222     + ret = 0;
1223     + if (nfs_compare_fh(NFS_FH(inode), fhandle))
1224     + goto out;
1225     + if (nfs_refresh_inode(inode, fattr) < 0)
1226     + goto out;
1227     +
1228     + nfs_setsecurity(inode, fattr, label);
1229     + nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1230     +
1231     + /* set a readdirplus hint that we had a cache miss */
1232     + nfs_force_use_readdirplus(dir);
1233     + ret = 1;
1234     +out:
1235     + nfs_free_fattr(fattr);
1236     + nfs_free_fhandle(fhandle);
1237     + nfs4_label_free(label);
1238     + return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
1239     +}
1240     +
1241     /*
1242     * This is called every time the dcache has a lookup hit,
1243     * and we should check whether we can really trust that
1244     @@ -1083,58 +1177,36 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
1245     * If the parent directory is seen to have changed, we throw out the
1246     * cached dentry and do a new lookup.
1247     */
1248     -static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1249     +static int
1250     +nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
1251     + unsigned int flags)
1252     {
1253     - struct inode *dir;
1254     struct inode *inode;
1255     - struct dentry *parent;
1256     - struct nfs_fh *fhandle = NULL;
1257     - struct nfs_fattr *fattr = NULL;
1258     - struct nfs4_label *label = NULL;
1259     int error;
1260    
1261     - if (flags & LOOKUP_RCU) {
1262     - parent = READ_ONCE(dentry->d_parent);
1263     - dir = d_inode_rcu(parent);
1264     - if (!dir)
1265     - return -ECHILD;
1266     - } else {
1267     - parent = dget_parent(dentry);
1268     - dir = d_inode(parent);
1269     - }
1270     nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
1271     inode = d_inode(dentry);
1272    
1273     - if (!inode) {
1274     - if (nfs_neg_need_reval(dir, dentry, flags)) {
1275     - if (flags & LOOKUP_RCU)
1276     - return -ECHILD;
1277     - goto out_bad;
1278     - }
1279     - goto out_valid;
1280     - }
1281     + if (!inode)
1282     + return nfs_lookup_revalidate_negative(dir, dentry, flags);
1283    
1284     if (is_bad_inode(inode)) {
1285     - if (flags & LOOKUP_RCU)
1286     - return -ECHILD;
1287     dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1288     __func__, dentry);
1289     goto out_bad;
1290     }
1291    
1292     if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
1293     - goto out_set_verifier;
1294     + return nfs_lookup_revalidate_delegated(dir, dentry, inode);
1295    
1296     /* Force a full look up iff the parent directory has changed */
1297     if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) &&
1298     nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
1299     error = nfs_lookup_verify_inode(inode, flags);
1300     if (error) {
1301     - if (flags & LOOKUP_RCU)
1302     - return -ECHILD;
1303     if (error == -ESTALE)
1304     - goto out_zap_parent;
1305     - goto out_error;
1306     + nfs_zap_caches(dir);
1307     + goto out_bad;
1308     }
1309     nfs_advise_use_readdirplus(dir);
1310     goto out_valid;
1311     @@ -1146,81 +1218,45 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1312     if (NFS_STALE(inode))
1313     goto out_bad;
1314    
1315     - error = -ENOMEM;
1316     - fhandle = nfs_alloc_fhandle();
1317     - fattr = nfs_alloc_fattr();
1318     - if (fhandle == NULL || fattr == NULL)
1319     - goto out_error;
1320     -
1321     - label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
1322     - if (IS_ERR(label))
1323     - goto out_error;
1324     -
1325     trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
1326     - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
1327     + error = nfs_lookup_revalidate_dentry(dir, dentry, inode);
1328     trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
1329     - if (error == -ESTALE || error == -ENOENT)
1330     - goto out_bad;
1331     - if (error)
1332     - goto out_error;
1333     - if (nfs_compare_fh(NFS_FH(inode), fhandle))
1334     - goto out_bad;
1335     - if ((error = nfs_refresh_inode(inode, fattr)) != 0)
1336     - goto out_bad;
1337     -
1338     - nfs_setsecurity(inode, fattr, label);
1339     -
1340     - nfs_free_fattr(fattr);
1341     - nfs_free_fhandle(fhandle);
1342     - nfs4_label_free(label);
1343     + return error;
1344     +out_valid:
1345     + return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
1346     +out_bad:
1347     + if (flags & LOOKUP_RCU)
1348     + return -ECHILD;
1349     + return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
1350     +}
1351    
1352     - /* set a readdirplus hint that we had a cache miss */
1353     - nfs_force_use_readdirplus(dir);
1354     +static int
1355     +__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
1356     + int (*reval)(struct inode *, struct dentry *, unsigned int))
1357     +{
1358     + struct dentry *parent;
1359     + struct inode *dir;
1360     + int ret;
1361    
1362     -out_set_verifier:
1363     - nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1364     - out_valid:
1365     if (flags & LOOKUP_RCU) {
1366     + parent = READ_ONCE(dentry->d_parent);
1367     + dir = d_inode_rcu(parent);
1368     + if (!dir)
1369     + return -ECHILD;
1370     + ret = reval(dir, dentry, flags);
1371     if (parent != READ_ONCE(dentry->d_parent))
1372     return -ECHILD;
1373     - } else
1374     + } else {
1375     + parent = dget_parent(dentry);
1376     + ret = reval(d_inode(parent), dentry, flags);
1377     dput(parent);
1378     - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
1379     - __func__, dentry);
1380     - return 1;
1381     -out_zap_parent:
1382     - nfs_zap_caches(dir);
1383     - out_bad:
1384     - WARN_ON(flags & LOOKUP_RCU);
1385     - nfs_free_fattr(fattr);
1386     - nfs_free_fhandle(fhandle);
1387     - nfs4_label_free(label);
1388     - nfs_mark_for_revalidate(dir);
1389     - if (inode && S_ISDIR(inode->i_mode)) {
1390     - /* Purge readdir caches. */
1391     - nfs_zap_caches(inode);
1392     - /*
1393     - * We can't d_drop the root of a disconnected tree:
1394     - * its d_hash is on the s_anon list and d_drop() would hide
1395     - * it from shrink_dcache_for_unmount(), leading to busy
1396     - * inodes on unmount and further oopses.
1397     - */
1398     - if (IS_ROOT(dentry))
1399     - goto out_valid;
1400     }
1401     - dput(parent);
1402     - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
1403     - __func__, dentry);
1404     - return 0;
1405     -out_error:
1406     - WARN_ON(flags & LOOKUP_RCU);
1407     - nfs_free_fattr(fattr);
1408     - nfs_free_fhandle(fhandle);
1409     - nfs4_label_free(label);
1410     - dput(parent);
1411     - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
1412     - __func__, dentry, error);
1413     - return error;
1414     + return ret;
1415     +}
1416     +
1417     +static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1418     +{
1419     + return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
1420     }
1421    
1422     /*
1423     @@ -1579,62 +1615,55 @@ no_open:
1424     }
1425     EXPORT_SYMBOL_GPL(nfs_atomic_open);
1426    
1427     -static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1428     +static int
1429     +nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
1430     + unsigned int flags)
1431     {
1432     struct inode *inode;
1433     - int ret = 0;
1434    
1435     if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
1436     - goto no_open;
1437     + goto full_reval;
1438     if (d_mountpoint(dentry))
1439     - goto no_open;
1440     - if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1)
1441     - goto no_open;
1442     + goto full_reval;
1443    
1444     inode = d_inode(dentry);
1445    
1446     /* We can't create new files in nfs_open_revalidate(), so we
1447     * optimize away revalidation of negative dentries.
1448     */
1449     - if (inode == NULL) {
1450     - struct dentry *parent;
1451     - struct inode *dir;
1452     -
1453     - if (flags & LOOKUP_RCU) {
1454     - parent = READ_ONCE(dentry->d_parent);
1455     - dir = d_inode_rcu(parent);
1456     - if (!dir)
1457     - return -ECHILD;
1458     - } else {
1459     - parent = dget_parent(dentry);
1460     - dir = d_inode(parent);
1461     - }
1462     - if (!nfs_neg_need_reval(dir, dentry, flags))
1463     - ret = 1;
1464     - else if (flags & LOOKUP_RCU)
1465     - ret = -ECHILD;
1466     - if (!(flags & LOOKUP_RCU))
1467     - dput(parent);
1468     - else if (parent != READ_ONCE(dentry->d_parent))
1469     - return -ECHILD;
1470     - goto out;
1471     - }
1472     + if (inode == NULL)
1473     + goto full_reval;
1474     +
1475     + if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
1476     + return nfs_lookup_revalidate_delegated(dir, dentry, inode);
1477    
1478     /* NFS only supports OPEN on regular files */
1479     if (!S_ISREG(inode->i_mode))
1480     - goto no_open;
1481     + goto full_reval;
1482     +
1483     /* We cannot do exclusive creation on a positive dentry */
1484     - if (flags & LOOKUP_EXCL)
1485     - goto no_open;
1486     + if (flags & (LOOKUP_EXCL | LOOKUP_REVAL))
1487     + goto reval_dentry;
1488     +
1489     + /* Check if the directory changed */
1490     + if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU))
1491     + goto reval_dentry;
1492    
1493     /* Let f_op->open() actually open (and revalidate) the file */
1494     - ret = 1;
1495     + return 1;
1496     +reval_dentry:
1497     + if (flags & LOOKUP_RCU)
1498     + return -ECHILD;
1499     + return nfs_lookup_revalidate_dentry(dir, dentry, inode);;
1500    
1501     -out:
1502     - return ret;
1503     +full_reval:
1504     + return nfs_do_lookup_revalidate(dir, dentry, flags);
1505     +}
1506    
1507     -no_open:
1508     - return nfs_lookup_revalidate(dentry, flags);
1509     +static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1510     +{
1511     + return __nfs_lookup_revalidate(dentry, flags,
1512     + nfs4_do_lookup_revalidate);
1513     }
1514    
1515     #endif /* CONFIG_NFSV4 */
1516     diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
1517     index 1de855e0ae61..904e08bbb289 100644
1518     --- a/fs/nfs/nfs4proc.c
1519     +++ b/fs/nfs/nfs4proc.c
1520     @@ -1355,12 +1355,20 @@ static bool nfs4_mode_match_open_stateid(struct nfs4_state *state,
1521     return false;
1522     }
1523    
1524     -static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode)
1525     +static int can_open_cached(struct nfs4_state *state, fmode_t mode,
1526     + int open_mode, enum open_claim_type4 claim)
1527     {
1528     int ret = 0;
1529    
1530     if (open_mode & (O_EXCL|O_TRUNC))
1531     goto out;
1532     + switch (claim) {
1533     + case NFS4_OPEN_CLAIM_NULL:
1534     + case NFS4_OPEN_CLAIM_FH:
1535     + goto out;
1536     + default:
1537     + break;
1538     + }
1539     switch (mode & (FMODE_READ|FMODE_WRITE)) {
1540     case FMODE_READ:
1541     ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0
1542     @@ -1753,7 +1761,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
1543    
1544     for (;;) {
1545     spin_lock(&state->owner->so_lock);
1546     - if (can_open_cached(state, fmode, open_mode)) {
1547     + if (can_open_cached(state, fmode, open_mode, claim)) {
1548     update_open_stateflags(state, fmode);
1549     spin_unlock(&state->owner->so_lock);
1550     goto out_return_state;
1551     @@ -2282,7 +2290,8 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1552     if (data->state != NULL) {
1553     struct nfs_delegation *delegation;
1554    
1555     - if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags))
1556     + if (can_open_cached(data->state, data->o_arg.fmode,
1557     + data->o_arg.open_flags, claim))
1558     goto out_no_action;
1559     rcu_read_lock();
1560     delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
1561     diff --git a/fs/proc/base.c b/fs/proc/base.c
1562     index a7fbda72afeb..3b9b726b1a6c 100644
1563     --- a/fs/proc/base.c
1564     +++ b/fs/proc/base.c
1565     @@ -205,12 +205,53 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
1566     return result;
1567     }
1568    
1569     +/*
1570     + * If the user used setproctitle(), we just get the string from
1571     + * user space at arg_start, and limit it to a maximum of one page.
1572     + */
1573     +static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf,
1574     + size_t count, unsigned long pos,
1575     + unsigned long arg_start)
1576     +{
1577     + char *page;
1578     + int ret, got;
1579     +
1580     + if (pos >= PAGE_SIZE)
1581     + return 0;
1582     +
1583     + page = (char *)__get_free_page(GFP_KERNEL);
1584     + if (!page)
1585     + return -ENOMEM;
1586     +
1587     + ret = 0;
1588     + got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON);
1589     + if (got > 0) {
1590     + int len = strnlen(page, got);
1591     +
1592     + /* Include the NUL character if it was found */
1593     + if (len < got)
1594     + len++;
1595     +
1596     + if (len > pos) {
1597     + len -= pos;
1598     + if (len > count)
1599     + len = count;
1600     + len -= copy_to_user(buf, page+pos, len);
1601     + if (!len)
1602     + len = -EFAULT;
1603     + ret = len;
1604     + }
1605     + }
1606     + free_page((unsigned long)page);
1607     + return ret;
1608     +}
1609     +
1610     static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
1611     size_t count, loff_t *ppos)
1612     {
1613     unsigned long arg_start, arg_end, env_start, env_end;
1614     unsigned long pos, len;
1615     - char *page;
1616     + char *page, c;
1617    
1618     /* Check if process spawned far enough to have cmdline. */
1619     if (!mm->env_end)
1620     @@ -227,28 +268,42 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
1621     return 0;
1622    
1623     /*
1624     - * We have traditionally allowed the user to re-write
1625     - * the argument strings and overflow the end result
1626     - * into the environment section. But only do that if
1627     - * the environment area is contiguous to the arguments.
1628     + * We allow setproctitle() to overwrite the argument
1629     + * strings, and overflow past the original end. But
1630     + * only when it overflows into the environment area.
1631     */
1632     - if (env_start != arg_end || env_start >= env_end)
1633     + if (env_start != arg_end || env_end < env_start)
1634     env_start = env_end = arg_end;
1635     -
1636     - /* .. and limit it to a maximum of one page of slop */
1637     - if (env_end >= arg_end + PAGE_SIZE)
1638     - env_end = arg_end + PAGE_SIZE - 1;
1639     + len = env_end - arg_start;
1640    
1641     /* We're not going to care if "*ppos" has high bits set */
1642     - pos = arg_start + *ppos;
1643     -
1644     - /* .. but we do check the result is in the proper range */
1645     - if (pos < arg_start || pos >= env_end)
1646     + pos = *ppos;
1647     + if (pos >= len)
1648     return 0;
1649     + if (count > len - pos)
1650     + count = len - pos;
1651     + if (!count)
1652     + return 0;
1653     +
1654     + /*
1655     + * Magical special case: if the argv[] end byte is not
1656     + * zero, the user has overwritten it with setproctitle(3).
1657     + *
1658     + * Possible future enhancement: do this only once when
1659     + * pos is 0, and set a flag in the 'struct file'.
1660     + */
1661     + if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c)
1662     + return get_mm_proctitle(mm, buf, count, pos, arg_start);
1663    
1664     - /* .. and we never go past env_end */
1665     - if (env_end - pos < count)
1666     - count = env_end - pos;
1667     + /*
1668     + * For the non-setproctitle() case we limit things strictly
1669     + * to the [arg_start, arg_end[ range.
1670     + */
1671     + pos += arg_start;
1672     + if (pos < arg_start || pos >= arg_end)
1673     + return 0;
1674     + if (count > arg_end - pos)
1675     + count = arg_end - pos;
1676    
1677     page = (char *)__get_free_page(GFP_KERNEL);
1678     if (!page)
1679     @@ -258,48 +313,11 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
1680     while (count) {
1681     int got;
1682     size_t size = min_t(size_t, PAGE_SIZE, count);
1683     - long offset;
1684    
1685     - /*
1686     - * Are we already starting past the official end?
1687     - * We always include the last byte that is *supposed*
1688     - * to be NUL
1689     - */
1690     - offset = (pos >= arg_end) ? pos - arg_end + 1 : 0;
1691     -
1692     - got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON);
1693     - if (got <= offset)
1694     + got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
1695     + if (got <= 0)
1696     break;
1697     - got -= offset;
1698     -
1699     - /* Don't walk past a NUL character once you hit arg_end */
1700     - if (pos + got >= arg_end) {
1701     - int n = 0;
1702     -
1703     - /*
1704     - * If we started before 'arg_end' but ended up
1705     - * at or after it, we start the NUL character
1706     - * check at arg_end-1 (where we expect the normal
1707     - * EOF to be).
1708     - *
1709     - * NOTE! This is smaller than 'got', because
1710     - * pos + got >= arg_end
1711     - */
1712     - if (pos < arg_end)
1713     - n = arg_end - pos - 1;
1714     -
1715     - /* Cut off at first NUL after 'n' */
1716     - got = n + strnlen(page+n, offset+got-n);
1717     - if (got < offset)
1718     - break;
1719     - got -= offset;
1720     -
1721     - /* Include the NUL if it existed */
1722     - if (got < size)
1723     - got++;
1724     - }
1725     -
1726     - got -= copy_to_user(buf, page+offset, got);
1727     + got -= copy_to_user(buf, page, got);
1728     if (unlikely(!got)) {
1729     if (!len)
1730     len = -EFAULT;
1731     diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
1732     index 6980014357d4..d51e10f50e75 100644
1733     --- a/include/linux/blkdev.h
1734     +++ b/include/linux/blkdev.h
1735     @@ -504,6 +504,12 @@ struct request_queue {
1736     * various queue flags, see QUEUE_* below
1737     */
1738     unsigned long queue_flags;
1739     + /*
1740     + * Number of contexts that have called blk_set_pm_only(). If this
1741     + * counter is above zero then only RQF_PM and RQF_PREEMPT requests are
1742     + * processed.
1743     + */
1744     + atomic_t pm_only;
1745    
1746     /*
1747     * ida allocated id for this queue. Used to index queues from
1748     @@ -698,7 +704,6 @@ struct request_queue {
1749     #define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */
1750     #define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */
1751     #define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */
1752     -#define QUEUE_FLAG_PREEMPT_ONLY 29 /* only process REQ_PREEMPT requests */
1753    
1754     #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
1755     (1 << QUEUE_FLAG_SAME_COMP) | \
1756     @@ -736,12 +741,11 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);
1757     ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
1758     REQ_FAILFAST_DRIVER))
1759     #define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
1760     -#define blk_queue_preempt_only(q) \
1761     - test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags)
1762     +#define blk_queue_pm_only(q) atomic_read(&(q)->pm_only)
1763     #define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
1764    
1765     -extern int blk_set_preempt_only(struct request_queue *q);
1766     -extern void blk_clear_preempt_only(struct request_queue *q);
1767     +extern void blk_set_pm_only(struct request_queue *q);
1768     +extern void blk_clear_pm_only(struct request_queue *q);
1769    
1770     static inline int queue_in_flight(struct request_queue *q)
1771     {
1772     diff --git a/include/linux/iova.h b/include/linux/iova.h
1773     index 928442dda565..84fbe73d2ec0 100644
1774     --- a/include/linux/iova.h
1775     +++ b/include/linux/iova.h
1776     @@ -156,6 +156,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
1777     void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
1778     void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
1779     unsigned long start_pfn);
1780     +bool has_iova_flush_queue(struct iova_domain *iovad);
1781     int init_iova_flush_queue(struct iova_domain *iovad,
1782     iova_flush_cb flush_cb, iova_entry_dtor entry_dtor);
1783     struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
1784     @@ -236,6 +237,11 @@ static inline void init_iova_domain(struct iova_domain *iovad,
1785     {
1786     }
1787    
1788     +static inline bool has_iova_flush_queue(struct iova_domain *iovad)
1789     +{
1790     + return false;
1791     +}
1792     +
1793     static inline int init_iova_flush_queue(struct iova_domain *iovad,
1794     iova_flush_cb flush_cb,
1795     iova_entry_dtor entry_dtor)
1796     diff --git a/include/linux/sched.h b/include/linux/sched.h
1797     index 5dc024e28397..20f5ba262cc0 100644
1798     --- a/include/linux/sched.h
1799     +++ b/include/linux/sched.h
1800     @@ -1023,7 +1023,15 @@ struct task_struct {
1801     u64 last_sum_exec_runtime;
1802     struct callback_head numa_work;
1803    
1804     - struct numa_group *numa_group;
1805     + /*
1806     + * This pointer is only modified for current in syscall and
1807     + * pagefault context (and for tasks being destroyed), so it can be read
1808     + * from any of the following contexts:
1809     + * - RCU read-side critical section
1810     + * - current->numa_group from everywhere
1811     + * - task's runqueue locked, task not running
1812     + */
1813     + struct numa_group __rcu *numa_group;
1814    
1815     /*
1816     * numa_faults is an array split into four regions:
1817     diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
1818     index e7dd04a84ba8..3988762efe15 100644
1819     --- a/include/linux/sched/numa_balancing.h
1820     +++ b/include/linux/sched/numa_balancing.h
1821     @@ -19,7 +19,7 @@
1822     extern void task_numa_fault(int last_node, int node, int pages, int flags);
1823     extern pid_t task_numa_group_id(struct task_struct *p);
1824     extern void set_numabalancing_state(bool enabled);
1825     -extern void task_numa_free(struct task_struct *p);
1826     +extern void task_numa_free(struct task_struct *p, bool final);
1827     extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
1828     int src_nid, int dst_cpu);
1829     #else
1830     @@ -34,7 +34,7 @@ static inline pid_t task_numa_group_id(struct task_struct *p)
1831     static inline void set_numabalancing_state(bool enabled)
1832     {
1833     }
1834     -static inline void task_numa_free(struct task_struct *p)
1835     +static inline void task_numa_free(struct task_struct *p, bool final)
1836     {
1837     }
1838     static inline bool should_numa_migrate_memory(struct task_struct *p,
1839     diff --git a/kernel/fork.c b/kernel/fork.c
1840     index 69874db3fba8..e76ce81c9c75 100644
1841     --- a/kernel/fork.c
1842     +++ b/kernel/fork.c
1843     @@ -679,7 +679,7 @@ void __put_task_struct(struct task_struct *tsk)
1844     WARN_ON(tsk == current);
1845    
1846     cgroup_free(tsk);
1847     - task_numa_free(tsk);
1848     + task_numa_free(tsk, true);
1849     security_task_free(tsk);
1850     exit_creds(tsk);
1851     delayacct_tsk_free(tsk);
1852     diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
1853     index 4a433608ba74..75f322603d44 100644
1854     --- a/kernel/sched/fair.c
1855     +++ b/kernel/sched/fair.c
1856     @@ -1053,6 +1053,21 @@ struct numa_group {
1857     unsigned long faults[0];
1858     };
1859    
1860     +/*
1861     + * For functions that can be called in multiple contexts that permit reading
1862     + * ->numa_group (see struct task_struct for locking rules).
1863     + */
1864     +static struct numa_group *deref_task_numa_group(struct task_struct *p)
1865     +{
1866     + return rcu_dereference_check(p->numa_group, p == current ||
1867     + (lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu)));
1868     +}
1869     +
1870     +static struct numa_group *deref_curr_numa_group(struct task_struct *p)
1871     +{
1872     + return rcu_dereference_protected(p->numa_group, p == current);
1873     +}
1874     +
1875     static inline unsigned long group_faults_priv(struct numa_group *ng);
1876     static inline unsigned long group_faults_shared(struct numa_group *ng);
1877    
1878     @@ -1096,10 +1111,12 @@ static unsigned int task_scan_start(struct task_struct *p)
1879     {
1880     unsigned long smin = task_scan_min(p);
1881     unsigned long period = smin;
1882     + struct numa_group *ng;
1883    
1884     /* Scale the maximum scan period with the amount of shared memory. */
1885     - if (p->numa_group) {
1886     - struct numa_group *ng = p->numa_group;
1887     + rcu_read_lock();
1888     + ng = rcu_dereference(p->numa_group);
1889     + if (ng) {
1890     unsigned long shared = group_faults_shared(ng);
1891     unsigned long private = group_faults_priv(ng);
1892    
1893     @@ -1107,6 +1124,7 @@ static unsigned int task_scan_start(struct task_struct *p)
1894     period *= shared + 1;
1895     period /= private + shared + 1;
1896     }
1897     + rcu_read_unlock();
1898    
1899     return max(smin, period);
1900     }
1901     @@ -1115,13 +1133,14 @@ static unsigned int task_scan_max(struct task_struct *p)
1902     {
1903     unsigned long smin = task_scan_min(p);
1904     unsigned long smax;
1905     + struct numa_group *ng;
1906    
1907     /* Watch for min being lower than max due to floor calculations */
1908     smax = sysctl_numa_balancing_scan_period_max / task_nr_scan_windows(p);
1909    
1910     /* Scale the maximum scan period with the amount of shared memory. */
1911     - if (p->numa_group) {
1912     - struct numa_group *ng = p->numa_group;
1913     + ng = deref_curr_numa_group(p);
1914     + if (ng) {
1915     unsigned long shared = group_faults_shared(ng);
1916     unsigned long private = group_faults_priv(ng);
1917     unsigned long period = smax;
1918     @@ -1153,7 +1172,7 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
1919     p->numa_scan_period = sysctl_numa_balancing_scan_delay;
1920     p->numa_work.next = &p->numa_work;
1921     p->numa_faults = NULL;
1922     - p->numa_group = NULL;
1923     + RCU_INIT_POINTER(p->numa_group, NULL);
1924     p->last_task_numa_placement = 0;
1925     p->last_sum_exec_runtime = 0;
1926    
1927     @@ -1200,7 +1219,16 @@ static void account_numa_dequeue(struct rq *rq, struct task_struct *p)
1928    
1929     pid_t task_numa_group_id(struct task_struct *p)
1930     {
1931     - return p->numa_group ? p->numa_group->gid : 0;
1932     + struct numa_group *ng;
1933     + pid_t gid = 0;
1934     +
1935     + rcu_read_lock();
1936     + ng = rcu_dereference(p->numa_group);
1937     + if (ng)
1938     + gid = ng->gid;
1939     + rcu_read_unlock();
1940     +
1941     + return gid;
1942     }
1943    
1944     /*
1945     @@ -1225,11 +1253,13 @@ static inline unsigned long task_faults(struct task_struct *p, int nid)
1946    
1947     static inline unsigned long group_faults(struct task_struct *p, int nid)
1948     {
1949     - if (!p->numa_group)
1950     + struct numa_group *ng = deref_task_numa_group(p);
1951     +
1952     + if (!ng)
1953     return 0;
1954    
1955     - return p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 0)] +
1956     - p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 1)];
1957     + return ng->faults[task_faults_idx(NUMA_MEM, nid, 0)] +
1958     + ng->faults[task_faults_idx(NUMA_MEM, nid, 1)];
1959     }
1960    
1961     static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
1962     @@ -1367,12 +1397,13 @@ static inline unsigned long task_weight(struct task_struct *p, int nid,
1963     static inline unsigned long group_weight(struct task_struct *p, int nid,
1964     int dist)
1965     {
1966     + struct numa_group *ng = deref_task_numa_group(p);
1967     unsigned long faults, total_faults;
1968    
1969     - if (!p->numa_group)
1970     + if (!ng)
1971     return 0;
1972    
1973     - total_faults = p->numa_group->total_faults;
1974     + total_faults = ng->total_faults;
1975    
1976     if (!total_faults)
1977     return 0;
1978     @@ -1386,7 +1417,7 @@ static inline unsigned long group_weight(struct task_struct *p, int nid,
1979     bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
1980     int src_nid, int dst_cpu)
1981     {
1982     - struct numa_group *ng = p->numa_group;
1983     + struct numa_group *ng = deref_curr_numa_group(p);
1984     int dst_nid = cpu_to_node(dst_cpu);
1985     int last_cpupid, this_cpupid;
1986    
1987     @@ -1592,13 +1623,14 @@ static bool load_too_imbalanced(long src_load, long dst_load,
1988     static void task_numa_compare(struct task_numa_env *env,
1989     long taskimp, long groupimp, bool maymove)
1990     {
1991     + struct numa_group *cur_ng, *p_ng = deref_curr_numa_group(env->p);
1992     struct rq *dst_rq = cpu_rq(env->dst_cpu);
1993     + long imp = p_ng ? groupimp : taskimp;
1994     struct task_struct *cur;
1995     long src_load, dst_load;
1996     - long load;
1997     - long imp = env->p->numa_group ? groupimp : taskimp;
1998     - long moveimp = imp;
1999     int dist = env->dist;
2000     + long moveimp = imp;
2001     + long load;
2002    
2003     if (READ_ONCE(dst_rq->numa_migrate_on))
2004     return;
2005     @@ -1637,21 +1669,22 @@ static void task_numa_compare(struct task_numa_env *env,
2006     * If dst and source tasks are in the same NUMA group, or not
2007     * in any group then look only at task weights.
2008     */
2009     - if (cur->numa_group == env->p->numa_group) {
2010     + cur_ng = rcu_dereference(cur->numa_group);
2011     + if (cur_ng == p_ng) {
2012     imp = taskimp + task_weight(cur, env->src_nid, dist) -
2013     task_weight(cur, env->dst_nid, dist);
2014     /*
2015     * Add some hysteresis to prevent swapping the
2016     * tasks within a group over tiny differences.
2017     */
2018     - if (cur->numa_group)
2019     + if (cur_ng)
2020     imp -= imp / 16;
2021     } else {
2022     /*
2023     * Compare the group weights. If a task is all by itself
2024     * (not part of a group), use the task weight instead.
2025     */
2026     - if (cur->numa_group && env->p->numa_group)
2027     + if (cur_ng && p_ng)
2028     imp += group_weight(cur, env->src_nid, dist) -
2029     group_weight(cur, env->dst_nid, dist);
2030     else
2031     @@ -1749,11 +1782,12 @@ static int task_numa_migrate(struct task_struct *p)
2032     .best_imp = 0,
2033     .best_cpu = -1,
2034     };
2035     + unsigned long taskweight, groupweight;
2036     struct sched_domain *sd;
2037     + long taskimp, groupimp;
2038     + struct numa_group *ng;
2039     struct rq *best_rq;
2040     - unsigned long taskweight, groupweight;
2041     int nid, ret, dist;
2042     - long taskimp, groupimp;
2043    
2044     /*
2045     * Pick the lowest SD_NUMA domain, as that would have the smallest
2046     @@ -1799,7 +1833,8 @@ static int task_numa_migrate(struct task_struct *p)
2047     * multiple NUMA nodes; in order to better consolidate the group,
2048     * we need to check other locations.
2049     */
2050     - if (env.best_cpu == -1 || (p->numa_group && p->numa_group->active_nodes > 1)) {
2051     + ng = deref_curr_numa_group(p);
2052     + if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) {
2053     for_each_online_node(nid) {
2054     if (nid == env.src_nid || nid == p->numa_preferred_nid)
2055     continue;
2056     @@ -1832,7 +1867,7 @@ static int task_numa_migrate(struct task_struct *p)
2057     * A task that migrated to a second choice node will be better off
2058     * trying for a better one later. Do not set the preferred node here.
2059     */
2060     - if (p->numa_group) {
2061     + if (ng) {
2062     if (env.best_cpu == -1)
2063     nid = env.src_nid;
2064     else
2065     @@ -2127,6 +2162,7 @@ static void task_numa_placement(struct task_struct *p)
2066     unsigned long total_faults;
2067     u64 runtime, period;
2068     spinlock_t *group_lock = NULL;
2069     + struct numa_group *ng;
2070    
2071     /*
2072     * The p->mm->numa_scan_seq field gets updated without
2073     @@ -2144,8 +2180,9 @@ static void task_numa_placement(struct task_struct *p)
2074     runtime = numa_get_avg_runtime(p, &period);
2075    
2076     /* If the task is part of a group prevent parallel updates to group stats */
2077     - if (p->numa_group) {
2078     - group_lock = &p->numa_group->lock;
2079     + ng = deref_curr_numa_group(p);
2080     + if (ng) {
2081     + group_lock = &ng->lock;
2082     spin_lock_irq(group_lock);
2083     }
2084    
2085     @@ -2186,7 +2223,7 @@ static void task_numa_placement(struct task_struct *p)
2086     p->numa_faults[cpu_idx] += f_diff;
2087     faults += p->numa_faults[mem_idx];
2088     p->total_numa_faults += diff;
2089     - if (p->numa_group) {
2090     + if (ng) {
2091     /*
2092     * safe because we can only change our own group
2093     *
2094     @@ -2194,14 +2231,14 @@ static void task_numa_placement(struct task_struct *p)
2095     * nid and priv in a specific region because it
2096     * is at the beginning of the numa_faults array.
2097     */
2098     - p->numa_group->faults[mem_idx] += diff;
2099     - p->numa_group->faults_cpu[mem_idx] += f_diff;
2100     - p->numa_group->total_faults += diff;
2101     - group_faults += p->numa_group->faults[mem_idx];
2102     + ng->faults[mem_idx] += diff;
2103     + ng->faults_cpu[mem_idx] += f_diff;
2104     + ng->total_faults += diff;
2105     + group_faults += ng->faults[mem_idx];
2106     }
2107     }
2108    
2109     - if (!p->numa_group) {
2110     + if (!ng) {
2111     if (faults > max_faults) {
2112     max_faults = faults;
2113     max_nid = nid;
2114     @@ -2212,8 +2249,8 @@ static void task_numa_placement(struct task_struct *p)
2115     }
2116     }
2117    
2118     - if (p->numa_group) {
2119     - numa_group_count_active_nodes(p->numa_group);
2120     + if (ng) {
2121     + numa_group_count_active_nodes(ng);
2122     spin_unlock_irq(group_lock);
2123     max_nid = preferred_group_nid(p, max_nid);
2124     }
2125     @@ -2247,7 +2284,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
2126     int cpu = cpupid_to_cpu(cpupid);
2127     int i;
2128    
2129     - if (unlikely(!p->numa_group)) {
2130     + if (unlikely(!deref_curr_numa_group(p))) {
2131     unsigned int size = sizeof(struct numa_group) +
2132     4*nr_node_ids*sizeof(unsigned long);
2133    
2134     @@ -2283,7 +2320,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
2135     if (!grp)
2136     goto no_join;
2137    
2138     - my_grp = p->numa_group;
2139     + my_grp = deref_curr_numa_group(p);
2140     if (grp == my_grp)
2141     goto no_join;
2142    
2143     @@ -2345,13 +2382,24 @@ no_join:
2144     return;
2145     }
2146    
2147     -void task_numa_free(struct task_struct *p)
2148     +/*
2149     + * Get rid of NUMA staticstics associated with a task (either current or dead).
2150     + * If @final is set, the task is dead and has reached refcount zero, so we can
2151     + * safely free all relevant data structures. Otherwise, there might be
2152     + * concurrent reads from places like load balancing and procfs, and we should
2153     + * reset the data back to default state without freeing ->numa_faults.
2154     + */
2155     +void task_numa_free(struct task_struct *p, bool final)
2156     {
2157     - struct numa_group *grp = p->numa_group;
2158     - void *numa_faults = p->numa_faults;
2159     + /* safe: p either is current or is being freed by current */
2160     + struct numa_group *grp = rcu_dereference_raw(p->numa_group);
2161     + unsigned long *numa_faults = p->numa_faults;
2162     unsigned long flags;
2163     int i;
2164    
2165     + if (!numa_faults)
2166     + return;
2167     +
2168     if (grp) {
2169     spin_lock_irqsave(&grp->lock, flags);
2170     for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
2171     @@ -2364,8 +2412,14 @@ void task_numa_free(struct task_struct *p)
2172     put_numa_group(grp);
2173     }
2174    
2175     - p->numa_faults = NULL;
2176     - kfree(numa_faults);
2177     + if (final) {
2178     + p->numa_faults = NULL;
2179     + kfree(numa_faults);
2180     + } else {
2181     + p->total_numa_faults = 0;
2182     + for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
2183     + numa_faults[i] = 0;
2184     + }
2185     }
2186    
2187     /*
2188     @@ -2418,7 +2472,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
2189     * actively using should be counted as local. This allows the
2190     * scan rate to slow down when a workload has settled down.
2191     */
2192     - ng = p->numa_group;
2193     + ng = deref_curr_numa_group(p);
2194     if (!priv && !local && ng && ng->active_nodes > 1 &&
2195     numa_is_active_node(cpu_node, ng) &&
2196     numa_is_active_node(mem_node, ng))
2197     @@ -10218,18 +10272,22 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m)
2198     {
2199     int node;
2200     unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0;
2201     + struct numa_group *ng;
2202    
2203     + rcu_read_lock();
2204     + ng = rcu_dereference(p->numa_group);
2205     for_each_online_node(node) {
2206     if (p->numa_faults) {
2207     tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)];
2208     tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)];
2209     }
2210     - if (p->numa_group) {
2211     - gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)],
2212     - gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)];
2213     + if (ng) {
2214     + gsf = ng->faults[task_faults_idx(NUMA_MEM, node, 0)],
2215     + gpf = ng->faults[task_faults_idx(NUMA_MEM, node, 1)];
2216     }
2217     print_numa_stats(m, node, tsf, tpf, gsf, gpf);
2218     }
2219     + rcu_read_unlock();
2220     }
2221     #endif /* CONFIG_NUMA_BALANCING */
2222     #endif /* CONFIG_SCHED_DEBUG */
2223     diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
2224     index c248e0dccbe1..67ef9d853d90 100644
2225     --- a/net/ipv4/ip_tunnel_core.c
2226     +++ b/net/ipv4/ip_tunnel_core.c
2227     @@ -89,9 +89,12 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
2228     __ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1);
2229    
2230     err = ip_local_out(net, sk, skb);
2231     - if (unlikely(net_xmit_eval(err)))
2232     - pkt_len = 0;
2233     - iptunnel_xmit_stats(dev, pkt_len);
2234     +
2235     + if (dev) {
2236     + if (unlikely(net_xmit_eval(err)))
2237     + pkt_len = 0;
2238     + iptunnel_xmit_stats(dev, pkt_len);
2239     + }
2240     }
2241     EXPORT_SYMBOL_GPL(iptunnel_xmit);
2242    
2243     diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
2244     index ab27a2872935..2e30bf197583 100644
2245     --- a/net/vmw_vsock/af_vsock.c
2246     +++ b/net/vmw_vsock/af_vsock.c
2247     @@ -281,7 +281,8 @@ EXPORT_SYMBOL_GPL(vsock_insert_connected);
2248     void vsock_remove_bound(struct vsock_sock *vsk)
2249     {
2250     spin_lock_bh(&vsock_table_lock);
2251     - __vsock_remove_bound(vsk);
2252     + if (__vsock_in_bound_table(vsk))
2253     + __vsock_remove_bound(vsk);
2254     spin_unlock_bh(&vsock_table_lock);
2255     }
2256     EXPORT_SYMBOL_GPL(vsock_remove_bound);
2257     @@ -289,7 +290,8 @@ EXPORT_SYMBOL_GPL(vsock_remove_bound);
2258     void vsock_remove_connected(struct vsock_sock *vsk)
2259     {
2260     spin_lock_bh(&vsock_table_lock);
2261     - __vsock_remove_connected(vsk);
2262     + if (__vsock_in_connected_table(vsk))
2263     + __vsock_remove_connected(vsk);
2264     spin_unlock_bh(&vsock_table_lock);
2265     }
2266     EXPORT_SYMBOL_GPL(vsock_remove_connected);
2267     @@ -325,35 +327,10 @@ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
2268     }
2269     EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
2270    
2271     -static bool vsock_in_bound_table(struct vsock_sock *vsk)
2272     -{
2273     - bool ret;
2274     -
2275     - spin_lock_bh(&vsock_table_lock);
2276     - ret = __vsock_in_bound_table(vsk);
2277     - spin_unlock_bh(&vsock_table_lock);
2278     -
2279     - return ret;
2280     -}
2281     -
2282     -static bool vsock_in_connected_table(struct vsock_sock *vsk)
2283     -{
2284     - bool ret;
2285     -
2286     - spin_lock_bh(&vsock_table_lock);
2287     - ret = __vsock_in_connected_table(vsk);
2288     - spin_unlock_bh(&vsock_table_lock);
2289     -
2290     - return ret;
2291     -}
2292     -
2293     void vsock_remove_sock(struct vsock_sock *vsk)
2294     {
2295     - if (vsock_in_bound_table(vsk))
2296     - vsock_remove_bound(vsk);
2297     -
2298     - if (vsock_in_connected_table(vsk))
2299     - vsock_remove_connected(vsk);
2300     + vsock_remove_bound(vsk);
2301     + vsock_remove_connected(vsk);
2302     }
2303     EXPORT_SYMBOL_GPL(vsock_remove_sock);
2304    
2305     @@ -484,8 +461,7 @@ static void vsock_pending_work(struct work_struct *work)
2306     * incoming packets can't find this socket, and to reduce the reference
2307     * count.
2308     */
2309     - if (vsock_in_connected_table(vsk))
2310     - vsock_remove_connected(vsk);
2311     + vsock_remove_connected(vsk);
2312    
2313     sk->sk_state = TCP_CLOSE;
2314    
2315     diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
2316     index b131561a9469..9c7da811d130 100644
2317     --- a/net/vmw_vsock/hyperv_transport.c
2318     +++ b/net/vmw_vsock/hyperv_transport.c
2319     @@ -35,6 +35,9 @@
2320     /* The MTU is 16KB per the host side's design */
2321     #define HVS_MTU_SIZE (1024 * 16)
2322    
2323     +/* How long to wait for graceful shutdown of a connection */
2324     +#define HVS_CLOSE_TIMEOUT (8 * HZ)
2325     +
2326     struct vmpipe_proto_header {
2327     u32 pkt_type;
2328     u32 data_size;
2329     @@ -290,19 +293,32 @@ static void hvs_channel_cb(void *ctx)
2330     sk->sk_write_space(sk);
2331     }
2332    
2333     -static void hvs_close_connection(struct vmbus_channel *chan)
2334     +static void hvs_do_close_lock_held(struct vsock_sock *vsk,
2335     + bool cancel_timeout)
2336     {
2337     - struct sock *sk = get_per_channel_state(chan);
2338     - struct vsock_sock *vsk = vsock_sk(sk);
2339     -
2340     - lock_sock(sk);
2341     + struct sock *sk = sk_vsock(vsk);
2342    
2343     - sk->sk_state = TCP_CLOSE;
2344     sock_set_flag(sk, SOCK_DONE);
2345     - vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
2346     -
2347     + vsk->peer_shutdown = SHUTDOWN_MASK;
2348     + if (vsock_stream_has_data(vsk) <= 0)
2349     + sk->sk_state = TCP_CLOSING;
2350     sk->sk_state_change(sk);
2351     + if (vsk->close_work_scheduled &&
2352     + (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
2353     + vsk->close_work_scheduled = false;
2354     + vsock_remove_sock(vsk);
2355    
2356     + /* Release the reference taken while scheduling the timeout */
2357     + sock_put(sk);
2358     + }
2359     +}
2360     +
2361     +static void hvs_close_connection(struct vmbus_channel *chan)
2362     +{
2363     + struct sock *sk = get_per_channel_state(chan);
2364     +
2365     + lock_sock(sk);
2366     + hvs_do_close_lock_held(vsock_sk(sk), true);
2367     release_sock(sk);
2368     }
2369    
2370     @@ -445,50 +461,80 @@ static int hvs_connect(struct vsock_sock *vsk)
2371     return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id);
2372     }
2373    
2374     +static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode)
2375     +{
2376     + struct vmpipe_proto_header hdr;
2377     +
2378     + if (hvs->fin_sent || !hvs->chan)
2379     + return;
2380     +
2381     + /* It can't fail: see hvs_channel_writable_bytes(). */
2382     + (void)hvs_send_data(hvs->chan, (struct hvs_send_buf *)&hdr, 0);
2383     + hvs->fin_sent = true;
2384     +}
2385     +
2386     static int hvs_shutdown(struct vsock_sock *vsk, int mode)
2387     {
2388     struct sock *sk = sk_vsock(vsk);
2389     - struct vmpipe_proto_header hdr;
2390     - struct hvs_send_buf *send_buf;
2391     - struct hvsock *hvs;
2392    
2393     if (!(mode & SEND_SHUTDOWN))
2394     return 0;
2395    
2396     lock_sock(sk);
2397     + hvs_shutdown_lock_held(vsk->trans, mode);
2398     + release_sock(sk);
2399     + return 0;
2400     +}
2401    
2402     - hvs = vsk->trans;
2403     - if (hvs->fin_sent)
2404     - goto out;
2405     -
2406     - send_buf = (struct hvs_send_buf *)&hdr;
2407     +static void hvs_close_timeout(struct work_struct *work)
2408     +{
2409     + struct vsock_sock *vsk =
2410     + container_of(work, struct vsock_sock, close_work.work);
2411     + struct sock *sk = sk_vsock(vsk);
2412    
2413     - /* It can't fail: see hvs_channel_writable_bytes(). */
2414     - (void)hvs_send_data(hvs->chan, send_buf, 0);
2415     + sock_hold(sk);
2416     + lock_sock(sk);
2417     + if (!sock_flag(sk, SOCK_DONE))
2418     + hvs_do_close_lock_held(vsk, false);
2419    
2420     - hvs->fin_sent = true;
2421     -out:
2422     + vsk->close_work_scheduled = false;
2423     release_sock(sk);
2424     - return 0;
2425     + sock_put(sk);
2426     }
2427    
2428     -static void hvs_release(struct vsock_sock *vsk)
2429     +/* Returns true, if it is safe to remove socket; false otherwise */
2430     +static bool hvs_close_lock_held(struct vsock_sock *vsk)
2431     {
2432     struct sock *sk = sk_vsock(vsk);
2433     - struct hvsock *hvs = vsk->trans;
2434     - struct vmbus_channel *chan;
2435    
2436     - lock_sock(sk);
2437     + if (!(sk->sk_state == TCP_ESTABLISHED ||
2438     + sk->sk_state == TCP_CLOSING))
2439     + return true;
2440    
2441     - sk->sk_state = TCP_CLOSING;
2442     - vsock_remove_sock(vsk);
2443     + if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK)
2444     + hvs_shutdown_lock_held(vsk->trans, SHUTDOWN_MASK);
2445    
2446     - release_sock(sk);
2447     + if (sock_flag(sk, SOCK_DONE))
2448     + return true;
2449    
2450     - chan = hvs->chan;
2451     - if (chan)
2452     - hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN);
2453     + /* This reference will be dropped by the delayed close routine */
2454     + sock_hold(sk);
2455     + INIT_DELAYED_WORK(&vsk->close_work, hvs_close_timeout);
2456     + vsk->close_work_scheduled = true;
2457     + schedule_delayed_work(&vsk->close_work, HVS_CLOSE_TIMEOUT);
2458     + return false;
2459     +}
2460    
2461     +static void hvs_release(struct vsock_sock *vsk)
2462     +{
2463     + struct sock *sk = sk_vsock(vsk);
2464     + bool remove_sock;
2465     +
2466     + lock_sock(sk);
2467     + remove_sock = hvs_close_lock_held(vsk);
2468     + release_sock(sk);
2469     + if (remove_sock)
2470     + vsock_remove_sock(vsk);
2471     }
2472    
2473     static void hvs_destruct(struct vsock_sock *vsk)