Contents of /trunk/kernel-alx/patches-4.19/0163-4.19.64-all-fixes.patch
Parent Directory | Revision Log
Revision 3442 -
(show annotations)
(download)
Mon Aug 5 07:52:44 2019 UTC (5 years, 1 month ago) by niro
File size: 78299 byte(s)
Mon Aug 5 07:52:44 2019 UTC (5 years, 1 month ago) by niro
File size: 78299 byte(s)
-linux-4.19.64
1 | diff --git a/Makefile b/Makefile |
2 | index 8ad77a93de30..203d9e80a315 100644 |
3 | --- a/Makefile |
4 | +++ b/Makefile |
5 | @@ -1,7 +1,7 @@ |
6 | # SPDX-License-Identifier: GPL-2.0 |
7 | VERSION = 4 |
8 | PATCHLEVEL = 19 |
9 | -SUBLEVEL = 63 |
10 | +SUBLEVEL = 64 |
11 | EXTRAVERSION = |
12 | NAME = "People's Front" |
13 | |
14 | diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h |
15 | index 1a037b94eba1..cee28a05ee98 100644 |
16 | --- a/arch/arm64/include/asm/compat.h |
17 | +++ b/arch/arm64/include/asm/compat.h |
18 | @@ -159,6 +159,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) |
19 | } |
20 | |
21 | #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) |
22 | +#define COMPAT_MINSIGSTKSZ 2048 |
23 | |
24 | static inline void __user *arch_compat_alloc_user_space(long len) |
25 | { |
26 | diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig |
27 | index 6394b4f0a69b..f42feab25dcf 100644 |
28 | --- a/arch/sh/boards/Kconfig |
29 | +++ b/arch/sh/boards/Kconfig |
30 | @@ -8,27 +8,19 @@ config SH_ALPHA_BOARD |
31 | bool |
32 | |
33 | config SH_DEVICE_TREE |
34 | - bool "Board Described by Device Tree" |
35 | + bool |
36 | select OF |
37 | select OF_EARLY_FLATTREE |
38 | select TIMER_OF |
39 | select COMMON_CLK |
40 | select GENERIC_CALIBRATE_DELAY |
41 | - help |
42 | - Select Board Described by Device Tree to build a kernel that |
43 | - does not hard-code any board-specific knowledge but instead uses |
44 | - a device tree blob provided by the boot-loader. You must enable |
45 | - drivers for any hardware you want to use separately. At this |
46 | - time, only boards based on the open-hardware J-Core processors |
47 | - have sufficient driver coverage to use this option; do not |
48 | - select it if you are using original SuperH hardware. |
49 | |
50 | config SH_JCORE_SOC |
51 | bool "J-Core SoC" |
52 | - depends on SH_DEVICE_TREE && (CPU_SH2 || CPU_J2) |
53 | + select SH_DEVICE_TREE |
54 | select CLKSRC_JCORE_PIT |
55 | select JCORE_AIC |
56 | - default y if CPU_J2 |
57 | + depends on CPU_J2 |
58 | help |
59 | Select this option to include drivers core components of the |
60 | J-Core SoC, including interrupt controllers and timers. |
61 | diff --git a/block/blk-core.c b/block/blk-core.c |
62 | index 9ca703bcfe3b..4a3e1f417880 100644 |
63 | --- a/block/blk-core.c |
64 | +++ b/block/blk-core.c |
65 | @@ -421,24 +421,25 @@ void blk_sync_queue(struct request_queue *q) |
66 | EXPORT_SYMBOL(blk_sync_queue); |
67 | |
68 | /** |
69 | - * blk_set_preempt_only - set QUEUE_FLAG_PREEMPT_ONLY |
70 | + * blk_set_pm_only - increment pm_only counter |
71 | * @q: request queue pointer |
72 | - * |
73 | - * Returns the previous value of the PREEMPT_ONLY flag - 0 if the flag was not |
74 | - * set and 1 if the flag was already set. |
75 | */ |
76 | -int blk_set_preempt_only(struct request_queue *q) |
77 | +void blk_set_pm_only(struct request_queue *q) |
78 | { |
79 | - return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q); |
80 | + atomic_inc(&q->pm_only); |
81 | } |
82 | -EXPORT_SYMBOL_GPL(blk_set_preempt_only); |
83 | +EXPORT_SYMBOL_GPL(blk_set_pm_only); |
84 | |
85 | -void blk_clear_preempt_only(struct request_queue *q) |
86 | +void blk_clear_pm_only(struct request_queue *q) |
87 | { |
88 | - blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q); |
89 | - wake_up_all(&q->mq_freeze_wq); |
90 | + int pm_only; |
91 | + |
92 | + pm_only = atomic_dec_return(&q->pm_only); |
93 | + WARN_ON_ONCE(pm_only < 0); |
94 | + if (pm_only == 0) |
95 | + wake_up_all(&q->mq_freeze_wq); |
96 | } |
97 | -EXPORT_SYMBOL_GPL(blk_clear_preempt_only); |
98 | +EXPORT_SYMBOL_GPL(blk_clear_pm_only); |
99 | |
100 | /** |
101 | * __blk_run_queue_uncond - run a queue whether or not it has been stopped |
102 | @@ -916,7 +917,7 @@ EXPORT_SYMBOL(blk_alloc_queue); |
103 | */ |
104 | int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) |
105 | { |
106 | - const bool preempt = flags & BLK_MQ_REQ_PREEMPT; |
107 | + const bool pm = flags & BLK_MQ_REQ_PREEMPT; |
108 | |
109 | while (true) { |
110 | bool success = false; |
111 | @@ -924,11 +925,11 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) |
112 | rcu_read_lock(); |
113 | if (percpu_ref_tryget_live(&q->q_usage_counter)) { |
114 | /* |
115 | - * The code that sets the PREEMPT_ONLY flag is |
116 | - * responsible for ensuring that that flag is globally |
117 | - * visible before the queue is unfrozen. |
118 | + * The code that increments the pm_only counter is |
119 | + * responsible for ensuring that that counter is |
120 | + * globally visible before the queue is unfrozen. |
121 | */ |
122 | - if (preempt || !blk_queue_preempt_only(q)) { |
123 | + if (pm || !blk_queue_pm_only(q)) { |
124 | success = true; |
125 | } else { |
126 | percpu_ref_put(&q->q_usage_counter); |
127 | @@ -953,7 +954,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) |
128 | |
129 | wait_event(q->mq_freeze_wq, |
130 | (atomic_read(&q->mq_freeze_depth) == 0 && |
131 | - (preempt || !blk_queue_preempt_only(q))) || |
132 | + (pm || !blk_queue_pm_only(q))) || |
133 | blk_queue_dying(q)); |
134 | if (blk_queue_dying(q)) |
135 | return -ENODEV; |
136 | diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c |
137 | index cb1e6cf7ac48..a5ea86835fcb 100644 |
138 | --- a/block/blk-mq-debugfs.c |
139 | +++ b/block/blk-mq-debugfs.c |
140 | @@ -102,6 +102,14 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags, |
141 | return 0; |
142 | } |
143 | |
144 | +static int queue_pm_only_show(void *data, struct seq_file *m) |
145 | +{ |
146 | + struct request_queue *q = data; |
147 | + |
148 | + seq_printf(m, "%d\n", atomic_read(&q->pm_only)); |
149 | + return 0; |
150 | +} |
151 | + |
152 | #define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name |
153 | static const char *const blk_queue_flag_name[] = { |
154 | QUEUE_FLAG_NAME(QUEUED), |
155 | @@ -132,7 +140,6 @@ static const char *const blk_queue_flag_name[] = { |
156 | QUEUE_FLAG_NAME(REGISTERED), |
157 | QUEUE_FLAG_NAME(SCSI_PASSTHROUGH), |
158 | QUEUE_FLAG_NAME(QUIESCED), |
159 | - QUEUE_FLAG_NAME(PREEMPT_ONLY), |
160 | }; |
161 | #undef QUEUE_FLAG_NAME |
162 | |
163 | @@ -209,6 +216,7 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf, |
164 | static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { |
165 | { "poll_stat", 0400, queue_poll_stat_show }, |
166 | { "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops }, |
167 | + { "pm_only", 0600, queue_pm_only_show, NULL }, |
168 | { "state", 0600, queue_state_show, queue_state_write }, |
169 | { "write_hints", 0600, queue_write_hint_show, queue_write_hint_store }, |
170 | { "zone_wlock", 0400, queue_zone_wlock_show, NULL }, |
171 | diff --git a/drivers/android/binder.c b/drivers/android/binder.c |
172 | index 1e0e438f079f..6e04e7a707a1 100644 |
173 | --- a/drivers/android/binder.c |
174 | +++ b/drivers/android/binder.c |
175 | @@ -1960,8 +1960,18 @@ static struct binder_thread *binder_get_txn_from_and_acq_inner( |
176 | |
177 | static void binder_free_transaction(struct binder_transaction *t) |
178 | { |
179 | - if (t->buffer) |
180 | - t->buffer->transaction = NULL; |
181 | + struct binder_proc *target_proc = t->to_proc; |
182 | + |
183 | + if (target_proc) { |
184 | + binder_inner_proc_lock(target_proc); |
185 | + if (t->buffer) |
186 | + t->buffer->transaction = NULL; |
187 | + binder_inner_proc_unlock(target_proc); |
188 | + } |
189 | + /* |
190 | + * If the transaction has no target_proc, then |
191 | + * t->buffer->transaction has already been cleared. |
192 | + */ |
193 | kfree(t); |
194 | binder_stats_deleted(BINDER_STAT_TRANSACTION); |
195 | } |
196 | @@ -3484,10 +3494,12 @@ static int binder_thread_write(struct binder_proc *proc, |
197 | buffer->debug_id, |
198 | buffer->transaction ? "active" : "finished"); |
199 | |
200 | + binder_inner_proc_lock(proc); |
201 | if (buffer->transaction) { |
202 | buffer->transaction->buffer = NULL; |
203 | buffer->transaction = NULL; |
204 | } |
205 | + binder_inner_proc_unlock(proc); |
206 | if (buffer->async_transaction && buffer->target_node) { |
207 | struct binder_node *buf_node; |
208 | struct binder_work *w; |
209 | diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c |
210 | index d568fbd94d6c..20235925344d 100644 |
211 | --- a/drivers/bluetooth/hci_ath.c |
212 | +++ b/drivers/bluetooth/hci_ath.c |
213 | @@ -112,6 +112,9 @@ static int ath_open(struct hci_uart *hu) |
214 | |
215 | BT_DBG("hu %p", hu); |
216 | |
217 | + if (!hci_uart_has_flow_control(hu)) |
218 | + return -EOPNOTSUPP; |
219 | + |
220 | ath = kzalloc(sizeof(*ath), GFP_KERNEL); |
221 | if (!ath) |
222 | return -ENOMEM; |
223 | diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c |
224 | index 800132369134..aa6b7ed9fdf1 100644 |
225 | --- a/drivers/bluetooth/hci_bcm.c |
226 | +++ b/drivers/bluetooth/hci_bcm.c |
227 | @@ -369,6 +369,9 @@ static int bcm_open(struct hci_uart *hu) |
228 | |
229 | bt_dev_dbg(hu->hdev, "hu %p", hu); |
230 | |
231 | + if (!hci_uart_has_flow_control(hu)) |
232 | + return -EOPNOTSUPP; |
233 | + |
234 | bcm = kzalloc(sizeof(*bcm), GFP_KERNEL); |
235 | if (!bcm) |
236 | return -ENOMEM; |
237 | diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c |
238 | index 46ace321bf60..e9228520e4c7 100644 |
239 | --- a/drivers/bluetooth/hci_intel.c |
240 | +++ b/drivers/bluetooth/hci_intel.c |
241 | @@ -406,6 +406,9 @@ static int intel_open(struct hci_uart *hu) |
242 | |
243 | BT_DBG("hu %p", hu); |
244 | |
245 | + if (!hci_uart_has_flow_control(hu)) |
246 | + return -EOPNOTSUPP; |
247 | + |
248 | intel = kzalloc(sizeof(*intel), GFP_KERNEL); |
249 | if (!intel) |
250 | return -ENOMEM; |
251 | diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c |
252 | index c915daf01a89..efeb8137ec67 100644 |
253 | --- a/drivers/bluetooth/hci_ldisc.c |
254 | +++ b/drivers/bluetooth/hci_ldisc.c |
255 | @@ -299,6 +299,19 @@ static int hci_uart_send_frame(struct hci_dev *hdev, struct sk_buff *skb) |
256 | return 0; |
257 | } |
258 | |
259 | +/* Check the underlying device or tty has flow control support */ |
260 | +bool hci_uart_has_flow_control(struct hci_uart *hu) |
261 | +{ |
262 | + /* serdev nodes check if the needed operations are present */ |
263 | + if (hu->serdev) |
264 | + return true; |
265 | + |
266 | + if (hu->tty->driver->ops->tiocmget && hu->tty->driver->ops->tiocmset) |
267 | + return true; |
268 | + |
269 | + return false; |
270 | +} |
271 | + |
272 | /* Flow control or un-flow control the device */ |
273 | void hci_uart_set_flow_control(struct hci_uart *hu, bool enable) |
274 | { |
275 | diff --git a/drivers/bluetooth/hci_mrvl.c b/drivers/bluetooth/hci_mrvl.c |
276 | index ffb00669346f..23791df081ba 100644 |
277 | --- a/drivers/bluetooth/hci_mrvl.c |
278 | +++ b/drivers/bluetooth/hci_mrvl.c |
279 | @@ -66,6 +66,9 @@ static int mrvl_open(struct hci_uart *hu) |
280 | |
281 | BT_DBG("hu %p", hu); |
282 | |
283 | + if (!hci_uart_has_flow_control(hu)) |
284 | + return -EOPNOTSUPP; |
285 | + |
286 | mrvl = kzalloc(sizeof(*mrvl), GFP_KERNEL); |
287 | if (!mrvl) |
288 | return -ENOMEM; |
289 | diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c |
290 | index 77004c29da08..f96e58de049b 100644 |
291 | --- a/drivers/bluetooth/hci_qca.c |
292 | +++ b/drivers/bluetooth/hci_qca.c |
293 | @@ -450,6 +450,9 @@ static int qca_open(struct hci_uart *hu) |
294 | |
295 | BT_DBG("hu %p qca_open", hu); |
296 | |
297 | + if (!hci_uart_has_flow_control(hu)) |
298 | + return -EOPNOTSUPP; |
299 | + |
300 | qca = kzalloc(sizeof(struct qca_data), GFP_KERNEL); |
301 | if (!qca) |
302 | return -ENOMEM; |
303 | diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h |
304 | index 00cab2fd7a1b..067a610f1372 100644 |
305 | --- a/drivers/bluetooth/hci_uart.h |
306 | +++ b/drivers/bluetooth/hci_uart.h |
307 | @@ -118,6 +118,7 @@ int hci_uart_tx_wakeup(struct hci_uart *hu); |
308 | int hci_uart_init_ready(struct hci_uart *hu); |
309 | void hci_uart_init_work(struct work_struct *work); |
310 | void hci_uart_set_baudrate(struct hci_uart *hu, unsigned int speed); |
311 | +bool hci_uart_has_flow_control(struct hci_uart *hu); |
312 | void hci_uart_set_flow_control(struct hci_uart *hu, bool enable); |
313 | void hci_uart_set_speeds(struct hci_uart *hu, unsigned int init_speed, |
314 | unsigned int oper_speed); |
315 | diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c |
316 | index c1439019dd12..b9af2419006f 100644 |
317 | --- a/drivers/iommu/intel-iommu.c |
318 | +++ b/drivers/iommu/intel-iommu.c |
319 | @@ -3721,7 +3721,7 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) |
320 | |
321 | freelist = domain_unmap(domain, start_pfn, last_pfn); |
322 | |
323 | - if (intel_iommu_strict) { |
324 | + if (intel_iommu_strict || !has_iova_flush_queue(&domain->iovad)) { |
325 | iommu_flush_iotlb_psi(iommu, domain, start_pfn, |
326 | nrpages, !freelist, 0); |
327 | /* free iova */ |
328 | diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c |
329 | index 83fe2621effe..60348d707b99 100644 |
330 | --- a/drivers/iommu/iova.c |
331 | +++ b/drivers/iommu/iova.c |
332 | @@ -65,9 +65,14 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, |
333 | } |
334 | EXPORT_SYMBOL_GPL(init_iova_domain); |
335 | |
336 | +bool has_iova_flush_queue(struct iova_domain *iovad) |
337 | +{ |
338 | + return !!iovad->fq; |
339 | +} |
340 | + |
341 | static void free_iova_flush_queue(struct iova_domain *iovad) |
342 | { |
343 | - if (!iovad->fq) |
344 | + if (!has_iova_flush_queue(iovad)) |
345 | return; |
346 | |
347 | if (timer_pending(&iovad->fq_timer)) |
348 | @@ -85,13 +90,14 @@ static void free_iova_flush_queue(struct iova_domain *iovad) |
349 | int init_iova_flush_queue(struct iova_domain *iovad, |
350 | iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) |
351 | { |
352 | + struct iova_fq __percpu *queue; |
353 | int cpu; |
354 | |
355 | atomic64_set(&iovad->fq_flush_start_cnt, 0); |
356 | atomic64_set(&iovad->fq_flush_finish_cnt, 0); |
357 | |
358 | - iovad->fq = alloc_percpu(struct iova_fq); |
359 | - if (!iovad->fq) |
360 | + queue = alloc_percpu(struct iova_fq); |
361 | + if (!queue) |
362 | return -ENOMEM; |
363 | |
364 | iovad->flush_cb = flush_cb; |
365 | @@ -100,13 +106,17 @@ int init_iova_flush_queue(struct iova_domain *iovad, |
366 | for_each_possible_cpu(cpu) { |
367 | struct iova_fq *fq; |
368 | |
369 | - fq = per_cpu_ptr(iovad->fq, cpu); |
370 | + fq = per_cpu_ptr(queue, cpu); |
371 | fq->head = 0; |
372 | fq->tail = 0; |
373 | |
374 | spin_lock_init(&fq->lock); |
375 | } |
376 | |
377 | + smp_wmb(); |
378 | + |
379 | + iovad->fq = queue; |
380 | + |
381 | timer_setup(&iovad->fq_timer, fq_flush_timeout, 0); |
382 | atomic_set(&iovad->fq_timer_on, 0); |
383 | |
384 | diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c |
385 | index 6d05946b445e..060dc7fd66c1 100644 |
386 | --- a/drivers/isdn/hardware/mISDN/hfcsusb.c |
387 | +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c |
388 | @@ -1967,6 +1967,9 @@ hfcsusb_probe(struct usb_interface *intf, const struct usb_device_id *id) |
389 | |
390 | /* get endpoint base */ |
391 | idx = ((ep_addr & 0x7f) - 1) * 2; |
392 | + if (idx > 15) |
393 | + return -EIO; |
394 | + |
395 | if (ep_addr & 0x80) |
396 | idx++; |
397 | attr = ep->desc.bmAttributes; |
398 | diff --git a/drivers/media/radio/radio-raremono.c b/drivers/media/radio/radio-raremono.c |
399 | index 9a5079d64c4a..729600c4a056 100644 |
400 | --- a/drivers/media/radio/radio-raremono.c |
401 | +++ b/drivers/media/radio/radio-raremono.c |
402 | @@ -271,6 +271,14 @@ static int vidioc_g_frequency(struct file *file, void *priv, |
403 | return 0; |
404 | } |
405 | |
406 | +static void raremono_device_release(struct v4l2_device *v4l2_dev) |
407 | +{ |
408 | + struct raremono_device *radio = to_raremono_dev(v4l2_dev); |
409 | + |
410 | + kfree(radio->buffer); |
411 | + kfree(radio); |
412 | +} |
413 | + |
414 | /* File system interface */ |
415 | static const struct v4l2_file_operations usb_raremono_fops = { |
416 | .owner = THIS_MODULE, |
417 | @@ -295,12 +303,14 @@ static int usb_raremono_probe(struct usb_interface *intf, |
418 | struct raremono_device *radio; |
419 | int retval = 0; |
420 | |
421 | - radio = devm_kzalloc(&intf->dev, sizeof(struct raremono_device), GFP_KERNEL); |
422 | - if (radio) |
423 | - radio->buffer = devm_kmalloc(&intf->dev, BUFFER_LENGTH, GFP_KERNEL); |
424 | - |
425 | - if (!radio || !radio->buffer) |
426 | + radio = kzalloc(sizeof(*radio), GFP_KERNEL); |
427 | + if (!radio) |
428 | + return -ENOMEM; |
429 | + radio->buffer = kmalloc(BUFFER_LENGTH, GFP_KERNEL); |
430 | + if (!radio->buffer) { |
431 | + kfree(radio); |
432 | return -ENOMEM; |
433 | + } |
434 | |
435 | radio->usbdev = interface_to_usbdev(intf); |
436 | radio->intf = intf; |
437 | @@ -324,7 +334,8 @@ static int usb_raremono_probe(struct usb_interface *intf, |
438 | if (retval != 3 || |
439 | (get_unaligned_be16(&radio->buffer[1]) & 0xfff) == 0x0242) { |
440 | dev_info(&intf->dev, "this is not Thanko's Raremono.\n"); |
441 | - return -ENODEV; |
442 | + retval = -ENODEV; |
443 | + goto free_mem; |
444 | } |
445 | |
446 | dev_info(&intf->dev, "Thanko's Raremono connected: (%04X:%04X)\n", |
447 | @@ -333,7 +344,7 @@ static int usb_raremono_probe(struct usb_interface *intf, |
448 | retval = v4l2_device_register(&intf->dev, &radio->v4l2_dev); |
449 | if (retval < 0) { |
450 | dev_err(&intf->dev, "couldn't register v4l2_device\n"); |
451 | - return retval; |
452 | + goto free_mem; |
453 | } |
454 | |
455 | mutex_init(&radio->lock); |
456 | @@ -345,6 +356,7 @@ static int usb_raremono_probe(struct usb_interface *intf, |
457 | radio->vdev.ioctl_ops = &usb_raremono_ioctl_ops; |
458 | radio->vdev.lock = &radio->lock; |
459 | radio->vdev.release = video_device_release_empty; |
460 | + radio->v4l2_dev.release = raremono_device_release; |
461 | |
462 | usb_set_intfdata(intf, &radio->v4l2_dev); |
463 | |
464 | @@ -360,6 +372,10 @@ static int usb_raremono_probe(struct usb_interface *intf, |
465 | } |
466 | dev_err(&intf->dev, "could not register video device\n"); |
467 | v4l2_device_unregister(&radio->v4l2_dev); |
468 | + |
469 | +free_mem: |
470 | + kfree(radio->buffer); |
471 | + kfree(radio); |
472 | return retval; |
473 | } |
474 | |
475 | diff --git a/drivers/media/usb/au0828/au0828-core.c b/drivers/media/usb/au0828/au0828-core.c |
476 | index 257ae0d8cfe2..e3f63299f85c 100644 |
477 | --- a/drivers/media/usb/au0828/au0828-core.c |
478 | +++ b/drivers/media/usb/au0828/au0828-core.c |
479 | @@ -623,6 +623,12 @@ static int au0828_usb_probe(struct usb_interface *interface, |
480 | /* Setup */ |
481 | au0828_card_setup(dev); |
482 | |
483 | + /* |
484 | + * Store the pointer to the au0828_dev so it can be accessed in |
485 | + * au0828_usb_disconnect |
486 | + */ |
487 | + usb_set_intfdata(interface, dev); |
488 | + |
489 | /* Analog TV */ |
490 | retval = au0828_analog_register(dev, interface); |
491 | if (retval) { |
492 | @@ -641,12 +647,6 @@ static int au0828_usb_probe(struct usb_interface *interface, |
493 | /* Remote controller */ |
494 | au0828_rc_register(dev); |
495 | |
496 | - /* |
497 | - * Store the pointer to the au0828_dev so it can be accessed in |
498 | - * au0828_usb_disconnect |
499 | - */ |
500 | - usb_set_intfdata(interface, dev); |
501 | - |
502 | pr_info("Registered device AU0828 [%s]\n", |
503 | dev->board.name == NULL ? "Unset" : dev->board.name); |
504 | |
505 | diff --git a/drivers/media/usb/cpia2/cpia2_usb.c b/drivers/media/usb/cpia2/cpia2_usb.c |
506 | index a771e0a52610..f5b04594e209 100644 |
507 | --- a/drivers/media/usb/cpia2/cpia2_usb.c |
508 | +++ b/drivers/media/usb/cpia2/cpia2_usb.c |
509 | @@ -902,7 +902,6 @@ static void cpia2_usb_disconnect(struct usb_interface *intf) |
510 | cpia2_unregister_camera(cam); |
511 | v4l2_device_disconnect(&cam->v4l2_dev); |
512 | mutex_unlock(&cam->v4l2_lock); |
513 | - v4l2_device_put(&cam->v4l2_dev); |
514 | |
515 | if(cam->buffers) { |
516 | DBG("Wakeup waiting processes\n"); |
517 | @@ -911,6 +910,8 @@ static void cpia2_usb_disconnect(struct usb_interface *intf) |
518 | wake_up_interruptible(&cam->wq_stream); |
519 | } |
520 | |
521 | + v4l2_device_put(&cam->v4l2_dev); |
522 | + |
523 | LOG("CPiA2 camera disconnected.\n"); |
524 | } |
525 | |
526 | diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c |
527 | index 673fdca8d2da..fcb201a40920 100644 |
528 | --- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c |
529 | +++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c |
530 | @@ -1680,7 +1680,7 @@ static int pvr2_decoder_enable(struct pvr2_hdw *hdw,int enablefl) |
531 | } |
532 | if (!hdw->flag_decoder_missed) { |
533 | pvr2_trace(PVR2_TRACE_ERROR_LEGS, |
534 | - "WARNING: No decoder present"); |
535 | + "***WARNING*** No decoder present"); |
536 | hdw->flag_decoder_missed = !0; |
537 | trace_stbit("flag_decoder_missed", |
538 | hdw->flag_decoder_missed); |
539 | @@ -2366,7 +2366,7 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf, |
540 | if (hdw_desc->flag_is_experimental) { |
541 | pvr2_trace(PVR2_TRACE_INFO, "**********"); |
542 | pvr2_trace(PVR2_TRACE_INFO, |
543 | - "WARNING: Support for this device (%s) is experimental.", |
544 | + "***WARNING*** Support for this device (%s) is experimental.", |
545 | hdw_desc->description); |
546 | pvr2_trace(PVR2_TRACE_INFO, |
547 | "Important functionality might not be entirely working."); |
548 | diff --git a/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c b/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c |
549 | index f3003ca05f4b..922c06279663 100644 |
550 | --- a/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c |
551 | +++ b/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c |
552 | @@ -343,11 +343,11 @@ static int i2c_hack_cx25840(struct pvr2_hdw *hdw, |
553 | |
554 | if ((ret != 0) || (*rdata == 0x04) || (*rdata == 0x0a)) { |
555 | pvr2_trace(PVR2_TRACE_ERROR_LEGS, |
556 | - "WARNING: Detected a wedged cx25840 chip; the device will not work."); |
557 | + "***WARNING*** Detected a wedged cx25840 chip; the device will not work."); |
558 | pvr2_trace(PVR2_TRACE_ERROR_LEGS, |
559 | - "WARNING: Try power cycling the pvrusb2 device."); |
560 | + "***WARNING*** Try power cycling the pvrusb2 device."); |
561 | pvr2_trace(PVR2_TRACE_ERROR_LEGS, |
562 | - "WARNING: Disabling further access to the device to prevent other foul-ups."); |
563 | + "***WARNING*** Disabling further access to the device to prevent other foul-ups."); |
564 | // This blocks all further communication with the part. |
565 | hdw->i2c_func[0x44] = NULL; |
566 | pvr2_hdw_render_useless(hdw); |
567 | diff --git a/drivers/media/usb/pvrusb2/pvrusb2-std.c b/drivers/media/usb/pvrusb2/pvrusb2-std.c |
568 | index 6b651f8b54df..37dc299a1ca2 100644 |
569 | --- a/drivers/media/usb/pvrusb2/pvrusb2-std.c |
570 | +++ b/drivers/media/usb/pvrusb2/pvrusb2-std.c |
571 | @@ -353,7 +353,7 @@ struct v4l2_standard *pvr2_std_create_enum(unsigned int *countptr, |
572 | bcnt = pvr2_std_id_to_str(buf,sizeof(buf),fmsk); |
573 | pvr2_trace( |
574 | PVR2_TRACE_ERROR_LEGS, |
575 | - "WARNING: Failed to classify the following standard(s): %.*s", |
576 | + "***WARNING*** Failed to classify the following standard(s): %.*s", |
577 | bcnt,buf); |
578 | } |
579 | |
580 | diff --git a/drivers/net/wireless/ath/ath10k/usb.c b/drivers/net/wireless/ath/ath10k/usb.c |
581 | index d4803ff5a78a..f09a4ad2e9de 100644 |
582 | --- a/drivers/net/wireless/ath/ath10k/usb.c |
583 | +++ b/drivers/net/wireless/ath/ath10k/usb.c |
584 | @@ -1025,7 +1025,7 @@ static int ath10k_usb_probe(struct usb_interface *interface, |
585 | } |
586 | |
587 | /* TODO: remove this once USB support is fully implemented */ |
588 | - ath10k_warn(ar, "WARNING: ath10k USB support is incomplete, don't expect anything to work!\n"); |
589 | + ath10k_warn(ar, "Warning: ath10k USB support is incomplete, don't expect anything to work!\n"); |
590 | |
591 | return 0; |
592 | |
593 | diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c |
594 | index 8febacb8fc54..0951564b6830 100644 |
595 | --- a/drivers/pps/pps.c |
596 | +++ b/drivers/pps/pps.c |
597 | @@ -166,6 +166,14 @@ static long pps_cdev_ioctl(struct file *file, |
598 | pps->params.mode |= PPS_CANWAIT; |
599 | pps->params.api_version = PPS_API_VERS; |
600 | |
601 | + /* |
602 | + * Clear unused fields of pps_kparams to avoid leaking |
603 | + * uninitialized data of the PPS_SETPARAMS caller via |
604 | + * PPS_GETPARAMS |
605 | + */ |
606 | + pps->params.assert_off_tu.flags = 0; |
607 | + pps->params.clear_off_tu.flags = 0; |
608 | + |
609 | spin_unlock_irq(&pps->lock); |
610 | |
611 | break; |
612 | diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c |
613 | index 32652b2c5e7c..75b926e70076 100644 |
614 | --- a/drivers/scsi/scsi_lib.c |
615 | +++ b/drivers/scsi/scsi_lib.c |
616 | @@ -3059,11 +3059,14 @@ scsi_device_quiesce(struct scsi_device *sdev) |
617 | */ |
618 | WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current); |
619 | |
620 | - blk_set_preempt_only(q); |
621 | + if (sdev->quiesced_by == current) |
622 | + return 0; |
623 | + |
624 | + blk_set_pm_only(q); |
625 | |
626 | blk_mq_freeze_queue(q); |
627 | /* |
628 | - * Ensure that the effect of blk_set_preempt_only() will be visible |
629 | + * Ensure that the effect of blk_set_pm_only() will be visible |
630 | * for percpu_ref_tryget() callers that occur after the queue |
631 | * unfreeze even if the queue was already frozen before this function |
632 | * was called. See also https://lwn.net/Articles/573497/. |
633 | @@ -3076,7 +3079,7 @@ scsi_device_quiesce(struct scsi_device *sdev) |
634 | if (err == 0) |
635 | sdev->quiesced_by = current; |
636 | else |
637 | - blk_clear_preempt_only(q); |
638 | + blk_clear_pm_only(q); |
639 | mutex_unlock(&sdev->state_mutex); |
640 | |
641 | return err; |
642 | @@ -3099,8 +3102,10 @@ void scsi_device_resume(struct scsi_device *sdev) |
643 | * device deleted during suspend) |
644 | */ |
645 | mutex_lock(&sdev->state_mutex); |
646 | - sdev->quiesced_by = NULL; |
647 | - blk_clear_preempt_only(sdev->request_queue); |
648 | + if (sdev->quiesced_by) { |
649 | + sdev->quiesced_by = NULL; |
650 | + blk_clear_pm_only(sdev->request_queue); |
651 | + } |
652 | if (sdev->sdev_state == SDEV_QUIESCE) |
653 | scsi_device_set_state(sdev, SDEV_RUNNING); |
654 | mutex_unlock(&sdev->state_mutex); |
655 | diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c |
656 | index 03614ef64ca4..3f68edde0f03 100644 |
657 | --- a/drivers/usb/dwc2/gadget.c |
658 | +++ b/drivers/usb/dwc2/gadget.c |
659 | @@ -3125,6 +3125,7 @@ void dwc2_hsotg_disconnect(struct dwc2_hsotg *hsotg) |
660 | hsotg->connected = 0; |
661 | hsotg->test_mode = 0; |
662 | |
663 | + /* all endpoints should be shutdown */ |
664 | for (ep = 0; ep < hsotg->num_of_eps; ep++) { |
665 | if (hsotg->eps_in[ep]) |
666 | kill_all_requests(hsotg, hsotg->eps_in[ep], |
667 | @@ -3175,6 +3176,7 @@ static void dwc2_hsotg_irq_fifoempty(struct dwc2_hsotg *hsotg, bool periodic) |
668 | GINTSTS_PTXFEMP | \ |
669 | GINTSTS_RXFLVL) |
670 | |
671 | +static int dwc2_hsotg_ep_disable(struct usb_ep *ep); |
672 | /** |
673 | * dwc2_hsotg_core_init - issue softreset to the core |
674 | * @hsotg: The device state |
675 | @@ -3189,13 +3191,23 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg, |
676 | u32 val; |
677 | u32 usbcfg; |
678 | u32 dcfg = 0; |
679 | + int ep; |
680 | |
681 | /* Kill any ep0 requests as controller will be reinitialized */ |
682 | kill_all_requests(hsotg, hsotg->eps_out[0], -ECONNRESET); |
683 | |
684 | - if (!is_usb_reset) |
685 | + if (!is_usb_reset) { |
686 | if (dwc2_core_reset(hsotg, true)) |
687 | return; |
688 | + } else { |
689 | + /* all endpoints should be shutdown */ |
690 | + for (ep = 1; ep < hsotg->num_of_eps; ep++) { |
691 | + if (hsotg->eps_in[ep]) |
692 | + dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep); |
693 | + if (hsotg->eps_out[ep]) |
694 | + dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep); |
695 | + } |
696 | + } |
697 | |
698 | /* |
699 | * we must now enable ep0 ready for host detection and then |
700 | @@ -3993,7 +4005,6 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep) |
701 | struct dwc2_hsotg *hsotg = hs_ep->parent; |
702 | int dir_in = hs_ep->dir_in; |
703 | int index = hs_ep->index; |
704 | - unsigned long flags; |
705 | u32 epctrl_reg; |
706 | u32 ctrl; |
707 | |
708 | @@ -4011,8 +4022,6 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep) |
709 | |
710 | epctrl_reg = dir_in ? DIEPCTL(index) : DOEPCTL(index); |
711 | |
712 | - spin_lock_irqsave(&hsotg->lock, flags); |
713 | - |
714 | ctrl = dwc2_readl(hsotg, epctrl_reg); |
715 | |
716 | if (ctrl & DXEPCTL_EPENA) |
717 | @@ -4035,10 +4044,22 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep) |
718 | hs_ep->fifo_index = 0; |
719 | hs_ep->fifo_size = 0; |
720 | |
721 | - spin_unlock_irqrestore(&hsotg->lock, flags); |
722 | return 0; |
723 | } |
724 | |
725 | +static int dwc2_hsotg_ep_disable_lock(struct usb_ep *ep) |
726 | +{ |
727 | + struct dwc2_hsotg_ep *hs_ep = our_ep(ep); |
728 | + struct dwc2_hsotg *hsotg = hs_ep->parent; |
729 | + unsigned long flags; |
730 | + int ret; |
731 | + |
732 | + spin_lock_irqsave(&hsotg->lock, flags); |
733 | + ret = dwc2_hsotg_ep_disable(ep); |
734 | + spin_unlock_irqrestore(&hsotg->lock, flags); |
735 | + return ret; |
736 | +} |
737 | + |
738 | /** |
739 | * on_list - check request is on the given endpoint |
740 | * @ep: The endpoint to check. |
741 | @@ -4186,7 +4207,7 @@ static int dwc2_hsotg_ep_sethalt_lock(struct usb_ep *ep, int value) |
742 | |
743 | static const struct usb_ep_ops dwc2_hsotg_ep_ops = { |
744 | .enable = dwc2_hsotg_ep_enable, |
745 | - .disable = dwc2_hsotg_ep_disable, |
746 | + .disable = dwc2_hsotg_ep_disable_lock, |
747 | .alloc_request = dwc2_hsotg_ep_alloc_request, |
748 | .free_request = dwc2_hsotg_ep_free_request, |
749 | .queue = dwc2_hsotg_ep_queue_lock, |
750 | @@ -4326,9 +4347,9 @@ static int dwc2_hsotg_udc_stop(struct usb_gadget *gadget) |
751 | /* all endpoints should be shutdown */ |
752 | for (ep = 1; ep < hsotg->num_of_eps; ep++) { |
753 | if (hsotg->eps_in[ep]) |
754 | - dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep); |
755 | + dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep); |
756 | if (hsotg->eps_out[ep]) |
757 | - dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep); |
758 | + dwc2_hsotg_ep_disable_lock(&hsotg->eps_out[ep]->ep); |
759 | } |
760 | |
761 | spin_lock_irqsave(&hsotg->lock, flags); |
762 | @@ -4776,9 +4797,9 @@ int dwc2_hsotg_suspend(struct dwc2_hsotg *hsotg) |
763 | |
764 | for (ep = 0; ep < hsotg->num_of_eps; ep++) { |
765 | if (hsotg->eps_in[ep]) |
766 | - dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep); |
767 | + dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep); |
768 | if (hsotg->eps_out[ep]) |
769 | - dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep); |
770 | + dwc2_hsotg_ep_disable_lock(&hsotg->eps_out[ep]->ep); |
771 | } |
772 | } |
773 | |
774 | diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c |
775 | index ae704658b528..124356dc39e1 100644 |
776 | --- a/drivers/vhost/net.c |
777 | +++ b/drivers/vhost/net.c |
778 | @@ -497,12 +497,6 @@ static size_t init_iov_iter(struct vhost_virtqueue *vq, struct iov_iter *iter, |
779 | return iov_iter_count(iter); |
780 | } |
781 | |
782 | -static bool vhost_exceeds_weight(int pkts, int total_len) |
783 | -{ |
784 | - return total_len >= VHOST_NET_WEIGHT || |
785 | - pkts >= VHOST_NET_PKT_WEIGHT; |
786 | -} |
787 | - |
788 | static int get_tx_bufs(struct vhost_net *net, |
789 | struct vhost_net_virtqueue *nvq, |
790 | struct msghdr *msg, |
791 | @@ -557,7 +551,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) |
792 | int err; |
793 | int sent_pkts = 0; |
794 | |
795 | - for (;;) { |
796 | + do { |
797 | bool busyloop_intr = false; |
798 | |
799 | head = get_tx_bufs(net, nvq, &msg, &out, &in, &len, |
800 | @@ -598,11 +592,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) |
801 | err, len); |
802 | if (++nvq->done_idx >= VHOST_NET_BATCH) |
803 | vhost_net_signal_used(nvq); |
804 | - if (vhost_exceeds_weight(++sent_pkts, total_len)) { |
805 | - vhost_poll_queue(&vq->poll); |
806 | - break; |
807 | - } |
808 | - } |
809 | + } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); |
810 | |
811 | vhost_net_signal_used(nvq); |
812 | } |
813 | @@ -626,7 +616,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) |
814 | bool zcopy_used; |
815 | int sent_pkts = 0; |
816 | |
817 | - for (;;) { |
818 | + do { |
819 | bool busyloop_intr; |
820 | |
821 | /* Release DMAs done buffers first */ |
822 | @@ -701,11 +691,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) |
823 | else |
824 | vhost_zerocopy_signal_used(net, vq); |
825 | vhost_net_tx_packet(net); |
826 | - if (unlikely(vhost_exceeds_weight(++sent_pkts, total_len))) { |
827 | - vhost_poll_queue(&vq->poll); |
828 | - break; |
829 | - } |
830 | - } |
831 | + } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); |
832 | } |
833 | |
834 | /* Expects to be always run from workqueue - which acts as |
835 | @@ -941,8 +927,11 @@ static void handle_rx(struct vhost_net *net) |
836 | vq->log : NULL; |
837 | mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); |
838 | |
839 | - while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk, |
840 | - &busyloop_intr))) { |
841 | + do { |
842 | + sock_len = vhost_net_rx_peek_head_len(net, sock->sk, |
843 | + &busyloop_intr); |
844 | + if (!sock_len) |
845 | + break; |
846 | sock_len += sock_hlen; |
847 | vhost_len = sock_len + vhost_hlen; |
848 | headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx, |
849 | @@ -1027,14 +1016,11 @@ static void handle_rx(struct vhost_net *net) |
850 | vhost_log_write(vq, vq_log, log, vhost_len, |
851 | vq->iov, in); |
852 | total_len += vhost_len; |
853 | - if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) { |
854 | - vhost_poll_queue(&vq->poll); |
855 | - goto out; |
856 | - } |
857 | - } |
858 | + } while (likely(!vhost_exceeds_weight(vq, ++recv_pkts, total_len))); |
859 | + |
860 | if (unlikely(busyloop_intr)) |
861 | vhost_poll_queue(&vq->poll); |
862 | - else |
863 | + else if (!sock_len) |
864 | vhost_net_enable_vq(net, vq); |
865 | out: |
866 | vhost_net_signal_used(nvq); |
867 | @@ -1115,7 +1101,8 @@ static int vhost_net_open(struct inode *inode, struct file *f) |
868 | vhost_net_buf_init(&n->vqs[i].rxq); |
869 | } |
870 | vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, |
871 | - UIO_MAXIOV + VHOST_NET_BATCH); |
872 | + UIO_MAXIOV + VHOST_NET_BATCH, |
873 | + VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT); |
874 | |
875 | vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev); |
876 | vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev); |
877 | diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c |
878 | index 0cfa925be4ec..5e298d9287f1 100644 |
879 | --- a/drivers/vhost/scsi.c |
880 | +++ b/drivers/vhost/scsi.c |
881 | @@ -57,6 +57,12 @@ |
882 | #define VHOST_SCSI_PREALLOC_UPAGES 2048 |
883 | #define VHOST_SCSI_PREALLOC_PROT_SGLS 2048 |
884 | |
885 | +/* Max number of requests before requeueing the job. |
886 | + * Using this limit prevents one virtqueue from starving others with |
887 | + * request. |
888 | + */ |
889 | +#define VHOST_SCSI_WEIGHT 256 |
890 | + |
891 | struct vhost_scsi_inflight { |
892 | /* Wait for the flush operation to finish */ |
893 | struct completion comp; |
894 | @@ -811,7 +817,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) |
895 | u64 tag; |
896 | u32 exp_data_len, data_direction; |
897 | unsigned int out = 0, in = 0; |
898 | - int head, ret, prot_bytes; |
899 | + int head, ret, prot_bytes, c = 0; |
900 | size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp); |
901 | size_t out_size, in_size; |
902 | u16 lun; |
903 | @@ -830,7 +836,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) |
904 | |
905 | vhost_disable_notify(&vs->dev, vq); |
906 | |
907 | - for (;;) { |
908 | + do { |
909 | head = vhost_get_vq_desc(vq, vq->iov, |
910 | ARRAY_SIZE(vq->iov), &out, &in, |
911 | NULL, NULL); |
912 | @@ -1045,7 +1051,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) |
913 | */ |
914 | INIT_WORK(&cmd->work, vhost_scsi_submission_work); |
915 | queue_work(vhost_scsi_workqueue, &cmd->work); |
916 | - } |
917 | + } while (likely(!vhost_exceeds_weight(vq, ++c, 0))); |
918 | out: |
919 | mutex_unlock(&vq->mutex); |
920 | } |
921 | @@ -1398,7 +1404,8 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) |
922 | vqs[i] = &vs->vqs[i].vq; |
923 | vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; |
924 | } |
925 | - vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV); |
926 | + vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV, |
927 | + VHOST_SCSI_WEIGHT, 0); |
928 | |
929 | vhost_scsi_init_inflight(vs, NULL); |
930 | |
931 | diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c |
932 | index c163bc15976a..0752f8dc47b1 100644 |
933 | --- a/drivers/vhost/vhost.c |
934 | +++ b/drivers/vhost/vhost.c |
935 | @@ -413,8 +413,24 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev) |
936 | vhost_vq_free_iovecs(dev->vqs[i]); |
937 | } |
938 | |
939 | +bool vhost_exceeds_weight(struct vhost_virtqueue *vq, |
940 | + int pkts, int total_len) |
941 | +{ |
942 | + struct vhost_dev *dev = vq->dev; |
943 | + |
944 | + if ((dev->byte_weight && total_len >= dev->byte_weight) || |
945 | + pkts >= dev->weight) { |
946 | + vhost_poll_queue(&vq->poll); |
947 | + return true; |
948 | + } |
949 | + |
950 | + return false; |
951 | +} |
952 | +EXPORT_SYMBOL_GPL(vhost_exceeds_weight); |
953 | + |
954 | void vhost_dev_init(struct vhost_dev *dev, |
955 | - struct vhost_virtqueue **vqs, int nvqs, int iov_limit) |
956 | + struct vhost_virtqueue **vqs, int nvqs, |
957 | + int iov_limit, int weight, int byte_weight) |
958 | { |
959 | struct vhost_virtqueue *vq; |
960 | int i; |
961 | @@ -428,6 +444,8 @@ void vhost_dev_init(struct vhost_dev *dev, |
962 | dev->mm = NULL; |
963 | dev->worker = NULL; |
964 | dev->iov_limit = iov_limit; |
965 | + dev->weight = weight; |
966 | + dev->byte_weight = byte_weight; |
967 | init_llist_head(&dev->work_list); |
968 | init_waitqueue_head(&dev->wait); |
969 | INIT_LIST_HEAD(&dev->read_list); |
970 | diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h |
971 | index 9490e7ddb340..27a78a9b8cc7 100644 |
972 | --- a/drivers/vhost/vhost.h |
973 | +++ b/drivers/vhost/vhost.h |
974 | @@ -171,10 +171,13 @@ struct vhost_dev { |
975 | struct list_head pending_list; |
976 | wait_queue_head_t wait; |
977 | int iov_limit; |
978 | + int weight; |
979 | + int byte_weight; |
980 | }; |
981 | |
982 | +bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len); |
983 | void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, |
984 | - int nvqs, int iov_limit); |
985 | + int nvqs, int iov_limit, int weight, int byte_weight); |
986 | long vhost_dev_set_owner(struct vhost_dev *dev); |
987 | bool vhost_dev_has_owner(struct vhost_dev *dev); |
988 | long vhost_dev_check_owner(struct vhost_dev *); |
989 | diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c |
990 | index e440f87ae1d6..bab495d73195 100644 |
991 | --- a/drivers/vhost/vsock.c |
992 | +++ b/drivers/vhost/vsock.c |
993 | @@ -21,6 +21,14 @@ |
994 | #include "vhost.h" |
995 | |
996 | #define VHOST_VSOCK_DEFAULT_HOST_CID 2 |
997 | +/* Max number of bytes transferred before requeueing the job. |
998 | + * Using this limit prevents one virtqueue from starving others. */ |
999 | +#define VHOST_VSOCK_WEIGHT 0x80000 |
1000 | +/* Max number of packets transferred before requeueing the job. |
1001 | + * Using this limit prevents one virtqueue from starving others with |
1002 | + * small pkts. |
1003 | + */ |
1004 | +#define VHOST_VSOCK_PKT_WEIGHT 256 |
1005 | |
1006 | enum { |
1007 | VHOST_VSOCK_FEATURES = VHOST_FEATURES, |
1008 | @@ -78,6 +86,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, |
1009 | struct vhost_virtqueue *vq) |
1010 | { |
1011 | struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; |
1012 | + int pkts = 0, total_len = 0; |
1013 | bool added = false; |
1014 | bool restart_tx = false; |
1015 | |
1016 | @@ -89,7 +98,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, |
1017 | /* Avoid further vmexits, we're already processing the virtqueue */ |
1018 | vhost_disable_notify(&vsock->dev, vq); |
1019 | |
1020 | - for (;;) { |
1021 | + do { |
1022 | struct virtio_vsock_pkt *pkt; |
1023 | struct iov_iter iov_iter; |
1024 | unsigned out, in; |
1025 | @@ -174,8 +183,9 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, |
1026 | */ |
1027 | virtio_transport_deliver_tap_pkt(pkt); |
1028 | |
1029 | + total_len += pkt->len; |
1030 | virtio_transport_free_pkt(pkt); |
1031 | - } |
1032 | + } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); |
1033 | if (added) |
1034 | vhost_signal(&vsock->dev, vq); |
1035 | |
1036 | @@ -350,7 +360,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) |
1037 | struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, |
1038 | dev); |
1039 | struct virtio_vsock_pkt *pkt; |
1040 | - int head; |
1041 | + int head, pkts = 0, total_len = 0; |
1042 | unsigned int out, in; |
1043 | bool added = false; |
1044 | |
1045 | @@ -360,7 +370,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) |
1046 | goto out; |
1047 | |
1048 | vhost_disable_notify(&vsock->dev, vq); |
1049 | - for (;;) { |
1050 | + do { |
1051 | u32 len; |
1052 | |
1053 | if (!vhost_vsock_more_replies(vsock)) { |
1054 | @@ -401,9 +411,11 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) |
1055 | else |
1056 | virtio_transport_free_pkt(pkt); |
1057 | |
1058 | - vhost_add_used(vq, head, sizeof(pkt->hdr) + len); |
1059 | + len += sizeof(pkt->hdr); |
1060 | + vhost_add_used(vq, head, len); |
1061 | + total_len += len; |
1062 | added = true; |
1063 | - } |
1064 | + } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); |
1065 | |
1066 | no_more_replies: |
1067 | if (added) |
1068 | @@ -531,7 +543,9 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) |
1069 | vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; |
1070 | vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; |
1071 | |
1072 | - vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), UIO_MAXIOV); |
1073 | + vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), |
1074 | + UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT, |
1075 | + VHOST_VSOCK_WEIGHT); |
1076 | |
1077 | file->private_data = vsock; |
1078 | spin_lock_init(&vsock->send_pkt_list_lock); |
1079 | diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c |
1080 | index c7542e8dd096..a11fa0b6b34d 100644 |
1081 | --- a/fs/ceph/caps.c |
1082 | +++ b/fs/ceph/caps.c |
1083 | @@ -1237,20 +1237,23 @@ static int send_cap_msg(struct cap_msg_args *arg) |
1084 | } |
1085 | |
1086 | /* |
1087 | - * Queue cap releases when an inode is dropped from our cache. Since |
1088 | - * inode is about to be destroyed, there is no need for i_ceph_lock. |
1089 | + * Queue cap releases when an inode is dropped from our cache. |
1090 | */ |
1091 | void ceph_queue_caps_release(struct inode *inode) |
1092 | { |
1093 | struct ceph_inode_info *ci = ceph_inode(inode); |
1094 | struct rb_node *p; |
1095 | |
1096 | + /* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU) |
1097 | + * may call __ceph_caps_issued_mask() on a freeing inode. */ |
1098 | + spin_lock(&ci->i_ceph_lock); |
1099 | p = rb_first(&ci->i_caps); |
1100 | while (p) { |
1101 | struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); |
1102 | p = rb_next(p); |
1103 | __ceph_remove_cap(cap, true); |
1104 | } |
1105 | + spin_unlock(&ci->i_ceph_lock); |
1106 | } |
1107 | |
1108 | /* |
1109 | diff --git a/fs/exec.c b/fs/exec.c |
1110 | index 433b1257694a..561ea64829ec 100644 |
1111 | --- a/fs/exec.c |
1112 | +++ b/fs/exec.c |
1113 | @@ -1826,7 +1826,7 @@ static int __do_execve_file(int fd, struct filename *filename, |
1114 | membarrier_execve(current); |
1115 | rseq_execve(current); |
1116 | acct_update_integrals(current); |
1117 | - task_numa_free(current); |
1118 | + task_numa_free(current, false); |
1119 | free_bprm(bprm); |
1120 | kfree(pathbuf); |
1121 | if (filename) |
1122 | diff --git a/fs/nfs/client.c b/fs/nfs/client.c |
1123 | index c092661147b3..0a2b59c1ecb3 100644 |
1124 | --- a/fs/nfs/client.c |
1125 | +++ b/fs/nfs/client.c |
1126 | @@ -416,10 +416,10 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) |
1127 | clp = nfs_match_client(cl_init); |
1128 | if (clp) { |
1129 | spin_unlock(&nn->nfs_client_lock); |
1130 | - if (IS_ERR(clp)) |
1131 | - return clp; |
1132 | if (new) |
1133 | new->rpc_ops->free_client(new); |
1134 | + if (IS_ERR(clp)) |
1135 | + return clp; |
1136 | return nfs_found_client(cl_init, clp); |
1137 | } |
1138 | if (new) { |
1139 | diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c |
1140 | index 8bfaa658b2c1..71b2e390becf 100644 |
1141 | --- a/fs/nfs/dir.c |
1142 | +++ b/fs/nfs/dir.c |
1143 | @@ -1072,6 +1072,100 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, |
1144 | return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU); |
1145 | } |
1146 | |
1147 | +static int |
1148 | +nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry, |
1149 | + struct inode *inode, int error) |
1150 | +{ |
1151 | + switch (error) { |
1152 | + case 1: |
1153 | + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", |
1154 | + __func__, dentry); |
1155 | + return 1; |
1156 | + case 0: |
1157 | + nfs_mark_for_revalidate(dir); |
1158 | + if (inode && S_ISDIR(inode->i_mode)) { |
1159 | + /* Purge readdir caches. */ |
1160 | + nfs_zap_caches(inode); |
1161 | + /* |
1162 | + * We can't d_drop the root of a disconnected tree: |
1163 | + * its d_hash is on the s_anon list and d_drop() would hide |
1164 | + * it from shrink_dcache_for_unmount(), leading to busy |
1165 | + * inodes on unmount and further oopses. |
1166 | + */ |
1167 | + if (IS_ROOT(dentry)) |
1168 | + return 1; |
1169 | + } |
1170 | + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n", |
1171 | + __func__, dentry); |
1172 | + return 0; |
1173 | + } |
1174 | + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n", |
1175 | + __func__, dentry, error); |
1176 | + return error; |
1177 | +} |
1178 | + |
1179 | +static int |
1180 | +nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry, |
1181 | + unsigned int flags) |
1182 | +{ |
1183 | + int ret = 1; |
1184 | + if (nfs_neg_need_reval(dir, dentry, flags)) { |
1185 | + if (flags & LOOKUP_RCU) |
1186 | + return -ECHILD; |
1187 | + ret = 0; |
1188 | + } |
1189 | + return nfs_lookup_revalidate_done(dir, dentry, NULL, ret); |
1190 | +} |
1191 | + |
1192 | +static int |
1193 | +nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry, |
1194 | + struct inode *inode) |
1195 | +{ |
1196 | + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
1197 | + return nfs_lookup_revalidate_done(dir, dentry, inode, 1); |
1198 | +} |
1199 | + |
1200 | +static int |
1201 | +nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry, |
1202 | + struct inode *inode) |
1203 | +{ |
1204 | + struct nfs_fh *fhandle; |
1205 | + struct nfs_fattr *fattr; |
1206 | + struct nfs4_label *label; |
1207 | + int ret; |
1208 | + |
1209 | + ret = -ENOMEM; |
1210 | + fhandle = nfs_alloc_fhandle(); |
1211 | + fattr = nfs_alloc_fattr(); |
1212 | + label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); |
1213 | + if (fhandle == NULL || fattr == NULL || IS_ERR(label)) |
1214 | + goto out; |
1215 | + |
1216 | + ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); |
1217 | + if (ret < 0) { |
1218 | + if (ret == -ESTALE || ret == -ENOENT) |
1219 | + ret = 0; |
1220 | + goto out; |
1221 | + } |
1222 | + ret = 0; |
1223 | + if (nfs_compare_fh(NFS_FH(inode), fhandle)) |
1224 | + goto out; |
1225 | + if (nfs_refresh_inode(inode, fattr) < 0) |
1226 | + goto out; |
1227 | + |
1228 | + nfs_setsecurity(inode, fattr, label); |
1229 | + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
1230 | + |
1231 | + /* set a readdirplus hint that we had a cache miss */ |
1232 | + nfs_force_use_readdirplus(dir); |
1233 | + ret = 1; |
1234 | +out: |
1235 | + nfs_free_fattr(fattr); |
1236 | + nfs_free_fhandle(fhandle); |
1237 | + nfs4_label_free(label); |
1238 | + return nfs_lookup_revalidate_done(dir, dentry, inode, ret); |
1239 | +} |
1240 | + |
1241 | /* |
1242 | * This is called every time the dcache has a lookup hit, |
1243 | * and we should check whether we can really trust that |
1244 | @@ -1083,58 +1177,36 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, |
1245 | * If the parent directory is seen to have changed, we throw out the |
1246 | * cached dentry and do a new lookup. |
1247 | */ |
1248 | -static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) |
1249 | +static int |
1250 | +nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, |
1251 | + unsigned int flags) |
1252 | { |
1253 | - struct inode *dir; |
1254 | struct inode *inode; |
1255 | - struct dentry *parent; |
1256 | - struct nfs_fh *fhandle = NULL; |
1257 | - struct nfs_fattr *fattr = NULL; |
1258 | - struct nfs4_label *label = NULL; |
1259 | int error; |
1260 | |
1261 | - if (flags & LOOKUP_RCU) { |
1262 | - parent = READ_ONCE(dentry->d_parent); |
1263 | - dir = d_inode_rcu(parent); |
1264 | - if (!dir) |
1265 | - return -ECHILD; |
1266 | - } else { |
1267 | - parent = dget_parent(dentry); |
1268 | - dir = d_inode(parent); |
1269 | - } |
1270 | nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); |
1271 | inode = d_inode(dentry); |
1272 | |
1273 | - if (!inode) { |
1274 | - if (nfs_neg_need_reval(dir, dentry, flags)) { |
1275 | - if (flags & LOOKUP_RCU) |
1276 | - return -ECHILD; |
1277 | - goto out_bad; |
1278 | - } |
1279 | - goto out_valid; |
1280 | - } |
1281 | + if (!inode) |
1282 | + return nfs_lookup_revalidate_negative(dir, dentry, flags); |
1283 | |
1284 | if (is_bad_inode(inode)) { |
1285 | - if (flags & LOOKUP_RCU) |
1286 | - return -ECHILD; |
1287 | dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", |
1288 | __func__, dentry); |
1289 | goto out_bad; |
1290 | } |
1291 | |
1292 | if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ)) |
1293 | - goto out_set_verifier; |
1294 | + return nfs_lookup_revalidate_delegated(dir, dentry, inode); |
1295 | |
1296 | /* Force a full look up iff the parent directory has changed */ |
1297 | if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) && |
1298 | nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) { |
1299 | error = nfs_lookup_verify_inode(inode, flags); |
1300 | if (error) { |
1301 | - if (flags & LOOKUP_RCU) |
1302 | - return -ECHILD; |
1303 | if (error == -ESTALE) |
1304 | - goto out_zap_parent; |
1305 | - goto out_error; |
1306 | + nfs_zap_caches(dir); |
1307 | + goto out_bad; |
1308 | } |
1309 | nfs_advise_use_readdirplus(dir); |
1310 | goto out_valid; |
1311 | @@ -1146,81 +1218,45 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) |
1312 | if (NFS_STALE(inode)) |
1313 | goto out_bad; |
1314 | |
1315 | - error = -ENOMEM; |
1316 | - fhandle = nfs_alloc_fhandle(); |
1317 | - fattr = nfs_alloc_fattr(); |
1318 | - if (fhandle == NULL || fattr == NULL) |
1319 | - goto out_error; |
1320 | - |
1321 | - label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT); |
1322 | - if (IS_ERR(label)) |
1323 | - goto out_error; |
1324 | - |
1325 | trace_nfs_lookup_revalidate_enter(dir, dentry, flags); |
1326 | - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); |
1327 | + error = nfs_lookup_revalidate_dentry(dir, dentry, inode); |
1328 | trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error); |
1329 | - if (error == -ESTALE || error == -ENOENT) |
1330 | - goto out_bad; |
1331 | - if (error) |
1332 | - goto out_error; |
1333 | - if (nfs_compare_fh(NFS_FH(inode), fhandle)) |
1334 | - goto out_bad; |
1335 | - if ((error = nfs_refresh_inode(inode, fattr)) != 0) |
1336 | - goto out_bad; |
1337 | - |
1338 | - nfs_setsecurity(inode, fattr, label); |
1339 | - |
1340 | - nfs_free_fattr(fattr); |
1341 | - nfs_free_fhandle(fhandle); |
1342 | - nfs4_label_free(label); |
1343 | + return error; |
1344 | +out_valid: |
1345 | + return nfs_lookup_revalidate_done(dir, dentry, inode, 1); |
1346 | +out_bad: |
1347 | + if (flags & LOOKUP_RCU) |
1348 | + return -ECHILD; |
1349 | + return nfs_lookup_revalidate_done(dir, dentry, inode, 0); |
1350 | +} |
1351 | |
1352 | - /* set a readdirplus hint that we had a cache miss */ |
1353 | - nfs_force_use_readdirplus(dir); |
1354 | +static int |
1355 | +__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags, |
1356 | + int (*reval)(struct inode *, struct dentry *, unsigned int)) |
1357 | +{ |
1358 | + struct dentry *parent; |
1359 | + struct inode *dir; |
1360 | + int ret; |
1361 | |
1362 | -out_set_verifier: |
1363 | - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
1364 | - out_valid: |
1365 | if (flags & LOOKUP_RCU) { |
1366 | + parent = READ_ONCE(dentry->d_parent); |
1367 | + dir = d_inode_rcu(parent); |
1368 | + if (!dir) |
1369 | + return -ECHILD; |
1370 | + ret = reval(dir, dentry, flags); |
1371 | if (parent != READ_ONCE(dentry->d_parent)) |
1372 | return -ECHILD; |
1373 | - } else |
1374 | + } else { |
1375 | + parent = dget_parent(dentry); |
1376 | + ret = reval(d_inode(parent), dentry, flags); |
1377 | dput(parent); |
1378 | - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", |
1379 | - __func__, dentry); |
1380 | - return 1; |
1381 | -out_zap_parent: |
1382 | - nfs_zap_caches(dir); |
1383 | - out_bad: |
1384 | - WARN_ON(flags & LOOKUP_RCU); |
1385 | - nfs_free_fattr(fattr); |
1386 | - nfs_free_fhandle(fhandle); |
1387 | - nfs4_label_free(label); |
1388 | - nfs_mark_for_revalidate(dir); |
1389 | - if (inode && S_ISDIR(inode->i_mode)) { |
1390 | - /* Purge readdir caches. */ |
1391 | - nfs_zap_caches(inode); |
1392 | - /* |
1393 | - * We can't d_drop the root of a disconnected tree: |
1394 | - * its d_hash is on the s_anon list and d_drop() would hide |
1395 | - * it from shrink_dcache_for_unmount(), leading to busy |
1396 | - * inodes on unmount and further oopses. |
1397 | - */ |
1398 | - if (IS_ROOT(dentry)) |
1399 | - goto out_valid; |
1400 | } |
1401 | - dput(parent); |
1402 | - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n", |
1403 | - __func__, dentry); |
1404 | - return 0; |
1405 | -out_error: |
1406 | - WARN_ON(flags & LOOKUP_RCU); |
1407 | - nfs_free_fattr(fattr); |
1408 | - nfs_free_fhandle(fhandle); |
1409 | - nfs4_label_free(label); |
1410 | - dput(parent); |
1411 | - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n", |
1412 | - __func__, dentry, error); |
1413 | - return error; |
1414 | + return ret; |
1415 | +} |
1416 | + |
1417 | +static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) |
1418 | +{ |
1419 | + return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate); |
1420 | } |
1421 | |
1422 | /* |
1423 | @@ -1579,62 +1615,55 @@ no_open: |
1424 | } |
1425 | EXPORT_SYMBOL_GPL(nfs_atomic_open); |
1426 | |
1427 | -static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) |
1428 | +static int |
1429 | +nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, |
1430 | + unsigned int flags) |
1431 | { |
1432 | struct inode *inode; |
1433 | - int ret = 0; |
1434 | |
1435 | if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) |
1436 | - goto no_open; |
1437 | + goto full_reval; |
1438 | if (d_mountpoint(dentry)) |
1439 | - goto no_open; |
1440 | - if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1) |
1441 | - goto no_open; |
1442 | + goto full_reval; |
1443 | |
1444 | inode = d_inode(dentry); |
1445 | |
1446 | /* We can't create new files in nfs_open_revalidate(), so we |
1447 | * optimize away revalidation of negative dentries. |
1448 | */ |
1449 | - if (inode == NULL) { |
1450 | - struct dentry *parent; |
1451 | - struct inode *dir; |
1452 | - |
1453 | - if (flags & LOOKUP_RCU) { |
1454 | - parent = READ_ONCE(dentry->d_parent); |
1455 | - dir = d_inode_rcu(parent); |
1456 | - if (!dir) |
1457 | - return -ECHILD; |
1458 | - } else { |
1459 | - parent = dget_parent(dentry); |
1460 | - dir = d_inode(parent); |
1461 | - } |
1462 | - if (!nfs_neg_need_reval(dir, dentry, flags)) |
1463 | - ret = 1; |
1464 | - else if (flags & LOOKUP_RCU) |
1465 | - ret = -ECHILD; |
1466 | - if (!(flags & LOOKUP_RCU)) |
1467 | - dput(parent); |
1468 | - else if (parent != READ_ONCE(dentry->d_parent)) |
1469 | - return -ECHILD; |
1470 | - goto out; |
1471 | - } |
1472 | + if (inode == NULL) |
1473 | + goto full_reval; |
1474 | + |
1475 | + if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ)) |
1476 | + return nfs_lookup_revalidate_delegated(dir, dentry, inode); |
1477 | |
1478 | /* NFS only supports OPEN on regular files */ |
1479 | if (!S_ISREG(inode->i_mode)) |
1480 | - goto no_open; |
1481 | + goto full_reval; |
1482 | + |
1483 | /* We cannot do exclusive creation on a positive dentry */ |
1484 | - if (flags & LOOKUP_EXCL) |
1485 | - goto no_open; |
1486 | + if (flags & (LOOKUP_EXCL | LOOKUP_REVAL)) |
1487 | + goto reval_dentry; |
1488 | + |
1489 | + /* Check if the directory changed */ |
1490 | + if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) |
1491 | + goto reval_dentry; |
1492 | |
1493 | /* Let f_op->open() actually open (and revalidate) the file */ |
1494 | - ret = 1; |
1495 | + return 1; |
1496 | +reval_dentry: |
1497 | + if (flags & LOOKUP_RCU) |
1498 | + return -ECHILD; |
1499 | + return nfs_lookup_revalidate_dentry(dir, dentry, inode);; |
1500 | |
1501 | -out: |
1502 | - return ret; |
1503 | +full_reval: |
1504 | + return nfs_do_lookup_revalidate(dir, dentry, flags); |
1505 | +} |
1506 | |
1507 | -no_open: |
1508 | - return nfs_lookup_revalidate(dentry, flags); |
1509 | +static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) |
1510 | +{ |
1511 | + return __nfs_lookup_revalidate(dentry, flags, |
1512 | + nfs4_do_lookup_revalidate); |
1513 | } |
1514 | |
1515 | #endif /* CONFIG_NFSV4 */ |
1516 | diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c |
1517 | index 1de855e0ae61..904e08bbb289 100644 |
1518 | --- a/fs/nfs/nfs4proc.c |
1519 | +++ b/fs/nfs/nfs4proc.c |
1520 | @@ -1355,12 +1355,20 @@ static bool nfs4_mode_match_open_stateid(struct nfs4_state *state, |
1521 | return false; |
1522 | } |
1523 | |
1524 | -static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode) |
1525 | +static int can_open_cached(struct nfs4_state *state, fmode_t mode, |
1526 | + int open_mode, enum open_claim_type4 claim) |
1527 | { |
1528 | int ret = 0; |
1529 | |
1530 | if (open_mode & (O_EXCL|O_TRUNC)) |
1531 | goto out; |
1532 | + switch (claim) { |
1533 | + case NFS4_OPEN_CLAIM_NULL: |
1534 | + case NFS4_OPEN_CLAIM_FH: |
1535 | + goto out; |
1536 | + default: |
1537 | + break; |
1538 | + } |
1539 | switch (mode & (FMODE_READ|FMODE_WRITE)) { |
1540 | case FMODE_READ: |
1541 | ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0 |
1542 | @@ -1753,7 +1761,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) |
1543 | |
1544 | for (;;) { |
1545 | spin_lock(&state->owner->so_lock); |
1546 | - if (can_open_cached(state, fmode, open_mode)) { |
1547 | + if (can_open_cached(state, fmode, open_mode, claim)) { |
1548 | update_open_stateflags(state, fmode); |
1549 | spin_unlock(&state->owner->so_lock); |
1550 | goto out_return_state; |
1551 | @@ -2282,7 +2290,8 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) |
1552 | if (data->state != NULL) { |
1553 | struct nfs_delegation *delegation; |
1554 | |
1555 | - if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags)) |
1556 | + if (can_open_cached(data->state, data->o_arg.fmode, |
1557 | + data->o_arg.open_flags, claim)) |
1558 | goto out_no_action; |
1559 | rcu_read_lock(); |
1560 | delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); |
1561 | diff --git a/fs/proc/base.c b/fs/proc/base.c |
1562 | index a7fbda72afeb..3b9b726b1a6c 100644 |
1563 | --- a/fs/proc/base.c |
1564 | +++ b/fs/proc/base.c |
1565 | @@ -205,12 +205,53 @@ static int proc_root_link(struct dentry *dentry, struct path *path) |
1566 | return result; |
1567 | } |
1568 | |
1569 | +/* |
1570 | + * If the user used setproctitle(), we just get the string from |
1571 | + * user space at arg_start, and limit it to a maximum of one page. |
1572 | + */ |
1573 | +static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf, |
1574 | + size_t count, unsigned long pos, |
1575 | + unsigned long arg_start) |
1576 | +{ |
1577 | + char *page; |
1578 | + int ret, got; |
1579 | + |
1580 | + if (pos >= PAGE_SIZE) |
1581 | + return 0; |
1582 | + |
1583 | + page = (char *)__get_free_page(GFP_KERNEL); |
1584 | + if (!page) |
1585 | + return -ENOMEM; |
1586 | + |
1587 | + ret = 0; |
1588 | + got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON); |
1589 | + if (got > 0) { |
1590 | + int len = strnlen(page, got); |
1591 | + |
1592 | + /* Include the NUL character if it was found */ |
1593 | + if (len < got) |
1594 | + len++; |
1595 | + |
1596 | + if (len > pos) { |
1597 | + len -= pos; |
1598 | + if (len > count) |
1599 | + len = count; |
1600 | + len -= copy_to_user(buf, page+pos, len); |
1601 | + if (!len) |
1602 | + len = -EFAULT; |
1603 | + ret = len; |
1604 | + } |
1605 | + } |
1606 | + free_page((unsigned long)page); |
1607 | + return ret; |
1608 | +} |
1609 | + |
1610 | static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, |
1611 | size_t count, loff_t *ppos) |
1612 | { |
1613 | unsigned long arg_start, arg_end, env_start, env_end; |
1614 | unsigned long pos, len; |
1615 | - char *page; |
1616 | + char *page, c; |
1617 | |
1618 | /* Check if process spawned far enough to have cmdline. */ |
1619 | if (!mm->env_end) |
1620 | @@ -227,28 +268,42 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, |
1621 | return 0; |
1622 | |
1623 | /* |
1624 | - * We have traditionally allowed the user to re-write |
1625 | - * the argument strings and overflow the end result |
1626 | - * into the environment section. But only do that if |
1627 | - * the environment area is contiguous to the arguments. |
1628 | + * We allow setproctitle() to overwrite the argument |
1629 | + * strings, and overflow past the original end. But |
1630 | + * only when it overflows into the environment area. |
1631 | */ |
1632 | - if (env_start != arg_end || env_start >= env_end) |
1633 | + if (env_start != arg_end || env_end < env_start) |
1634 | env_start = env_end = arg_end; |
1635 | - |
1636 | - /* .. and limit it to a maximum of one page of slop */ |
1637 | - if (env_end >= arg_end + PAGE_SIZE) |
1638 | - env_end = arg_end + PAGE_SIZE - 1; |
1639 | + len = env_end - arg_start; |
1640 | |
1641 | /* We're not going to care if "*ppos" has high bits set */ |
1642 | - pos = arg_start + *ppos; |
1643 | - |
1644 | - /* .. but we do check the result is in the proper range */ |
1645 | - if (pos < arg_start || pos >= env_end) |
1646 | + pos = *ppos; |
1647 | + if (pos >= len) |
1648 | return 0; |
1649 | + if (count > len - pos) |
1650 | + count = len - pos; |
1651 | + if (!count) |
1652 | + return 0; |
1653 | + |
1654 | + /* |
1655 | + * Magical special case: if the argv[] end byte is not |
1656 | + * zero, the user has overwritten it with setproctitle(3). |
1657 | + * |
1658 | + * Possible future enhancement: do this only once when |
1659 | + * pos is 0, and set a flag in the 'struct file'. |
1660 | + */ |
1661 | + if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c) |
1662 | + return get_mm_proctitle(mm, buf, count, pos, arg_start); |
1663 | |
1664 | - /* .. and we never go past env_end */ |
1665 | - if (env_end - pos < count) |
1666 | - count = env_end - pos; |
1667 | + /* |
1668 | + * For the non-setproctitle() case we limit things strictly |
1669 | + * to the [arg_start, arg_end[ range. |
1670 | + */ |
1671 | + pos += arg_start; |
1672 | + if (pos < arg_start || pos >= arg_end) |
1673 | + return 0; |
1674 | + if (count > arg_end - pos) |
1675 | + count = arg_end - pos; |
1676 | |
1677 | page = (char *)__get_free_page(GFP_KERNEL); |
1678 | if (!page) |
1679 | @@ -258,48 +313,11 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, |
1680 | while (count) { |
1681 | int got; |
1682 | size_t size = min_t(size_t, PAGE_SIZE, count); |
1683 | - long offset; |
1684 | |
1685 | - /* |
1686 | - * Are we already starting past the official end? |
1687 | - * We always include the last byte that is *supposed* |
1688 | - * to be NUL |
1689 | - */ |
1690 | - offset = (pos >= arg_end) ? pos - arg_end + 1 : 0; |
1691 | - |
1692 | - got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON); |
1693 | - if (got <= offset) |
1694 | + got = access_remote_vm(mm, pos, page, size, FOLL_ANON); |
1695 | + if (got <= 0) |
1696 | break; |
1697 | - got -= offset; |
1698 | - |
1699 | - /* Don't walk past a NUL character once you hit arg_end */ |
1700 | - if (pos + got >= arg_end) { |
1701 | - int n = 0; |
1702 | - |
1703 | - /* |
1704 | - * If we started before 'arg_end' but ended up |
1705 | - * at or after it, we start the NUL character |
1706 | - * check at arg_end-1 (where we expect the normal |
1707 | - * EOF to be). |
1708 | - * |
1709 | - * NOTE! This is smaller than 'got', because |
1710 | - * pos + got >= arg_end |
1711 | - */ |
1712 | - if (pos < arg_end) |
1713 | - n = arg_end - pos - 1; |
1714 | - |
1715 | - /* Cut off at first NUL after 'n' */ |
1716 | - got = n + strnlen(page+n, offset+got-n); |
1717 | - if (got < offset) |
1718 | - break; |
1719 | - got -= offset; |
1720 | - |
1721 | - /* Include the NUL if it existed */ |
1722 | - if (got < size) |
1723 | - got++; |
1724 | - } |
1725 | - |
1726 | - got -= copy_to_user(buf, page+offset, got); |
1727 | + got -= copy_to_user(buf, page, got); |
1728 | if (unlikely(!got)) { |
1729 | if (!len) |
1730 | len = -EFAULT; |
1731 | diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h |
1732 | index 6980014357d4..d51e10f50e75 100644 |
1733 | --- a/include/linux/blkdev.h |
1734 | +++ b/include/linux/blkdev.h |
1735 | @@ -504,6 +504,12 @@ struct request_queue { |
1736 | * various queue flags, see QUEUE_* below |
1737 | */ |
1738 | unsigned long queue_flags; |
1739 | + /* |
1740 | + * Number of contexts that have called blk_set_pm_only(). If this |
1741 | + * counter is above zero then only RQF_PM and RQF_PREEMPT requests are |
1742 | + * processed. |
1743 | + */ |
1744 | + atomic_t pm_only; |
1745 | |
1746 | /* |
1747 | * ida allocated id for this queue. Used to index queues from |
1748 | @@ -698,7 +704,6 @@ struct request_queue { |
1749 | #define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */ |
1750 | #define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */ |
1751 | #define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */ |
1752 | -#define QUEUE_FLAG_PREEMPT_ONLY 29 /* only process REQ_PREEMPT requests */ |
1753 | |
1754 | #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ |
1755 | (1 << QUEUE_FLAG_SAME_COMP) | \ |
1756 | @@ -736,12 +741,11 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); |
1757 | ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ |
1758 | REQ_FAILFAST_DRIVER)) |
1759 | #define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags) |
1760 | -#define blk_queue_preempt_only(q) \ |
1761 | - test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags) |
1762 | +#define blk_queue_pm_only(q) atomic_read(&(q)->pm_only) |
1763 | #define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags) |
1764 | |
1765 | -extern int blk_set_preempt_only(struct request_queue *q); |
1766 | -extern void blk_clear_preempt_only(struct request_queue *q); |
1767 | +extern void blk_set_pm_only(struct request_queue *q); |
1768 | +extern void blk_clear_pm_only(struct request_queue *q); |
1769 | |
1770 | static inline int queue_in_flight(struct request_queue *q) |
1771 | { |
1772 | diff --git a/include/linux/iova.h b/include/linux/iova.h |
1773 | index 928442dda565..84fbe73d2ec0 100644 |
1774 | --- a/include/linux/iova.h |
1775 | +++ b/include/linux/iova.h |
1776 | @@ -156,6 +156,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, |
1777 | void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); |
1778 | void init_iova_domain(struct iova_domain *iovad, unsigned long granule, |
1779 | unsigned long start_pfn); |
1780 | +bool has_iova_flush_queue(struct iova_domain *iovad); |
1781 | int init_iova_flush_queue(struct iova_domain *iovad, |
1782 | iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); |
1783 | struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); |
1784 | @@ -236,6 +237,11 @@ static inline void init_iova_domain(struct iova_domain *iovad, |
1785 | { |
1786 | } |
1787 | |
1788 | +static inline bool has_iova_flush_queue(struct iova_domain *iovad) |
1789 | +{ |
1790 | + return false; |
1791 | +} |
1792 | + |
1793 | static inline int init_iova_flush_queue(struct iova_domain *iovad, |
1794 | iova_flush_cb flush_cb, |
1795 | iova_entry_dtor entry_dtor) |
1796 | diff --git a/include/linux/sched.h b/include/linux/sched.h |
1797 | index 5dc024e28397..20f5ba262cc0 100644 |
1798 | --- a/include/linux/sched.h |
1799 | +++ b/include/linux/sched.h |
1800 | @@ -1023,7 +1023,15 @@ struct task_struct { |
1801 | u64 last_sum_exec_runtime; |
1802 | struct callback_head numa_work; |
1803 | |
1804 | - struct numa_group *numa_group; |
1805 | + /* |
1806 | + * This pointer is only modified for current in syscall and |
1807 | + * pagefault context (and for tasks being destroyed), so it can be read |
1808 | + * from any of the following contexts: |
1809 | + * - RCU read-side critical section |
1810 | + * - current->numa_group from everywhere |
1811 | + * - task's runqueue locked, task not running |
1812 | + */ |
1813 | + struct numa_group __rcu *numa_group; |
1814 | |
1815 | /* |
1816 | * numa_faults is an array split into four regions: |
1817 | diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h |
1818 | index e7dd04a84ba8..3988762efe15 100644 |
1819 | --- a/include/linux/sched/numa_balancing.h |
1820 | +++ b/include/linux/sched/numa_balancing.h |
1821 | @@ -19,7 +19,7 @@ |
1822 | extern void task_numa_fault(int last_node, int node, int pages, int flags); |
1823 | extern pid_t task_numa_group_id(struct task_struct *p); |
1824 | extern void set_numabalancing_state(bool enabled); |
1825 | -extern void task_numa_free(struct task_struct *p); |
1826 | +extern void task_numa_free(struct task_struct *p, bool final); |
1827 | extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, |
1828 | int src_nid, int dst_cpu); |
1829 | #else |
1830 | @@ -34,7 +34,7 @@ static inline pid_t task_numa_group_id(struct task_struct *p) |
1831 | static inline void set_numabalancing_state(bool enabled) |
1832 | { |
1833 | } |
1834 | -static inline void task_numa_free(struct task_struct *p) |
1835 | +static inline void task_numa_free(struct task_struct *p, bool final) |
1836 | { |
1837 | } |
1838 | static inline bool should_numa_migrate_memory(struct task_struct *p, |
1839 | diff --git a/kernel/fork.c b/kernel/fork.c |
1840 | index 69874db3fba8..e76ce81c9c75 100644 |
1841 | --- a/kernel/fork.c |
1842 | +++ b/kernel/fork.c |
1843 | @@ -679,7 +679,7 @@ void __put_task_struct(struct task_struct *tsk) |
1844 | WARN_ON(tsk == current); |
1845 | |
1846 | cgroup_free(tsk); |
1847 | - task_numa_free(tsk); |
1848 | + task_numa_free(tsk, true); |
1849 | security_task_free(tsk); |
1850 | exit_creds(tsk); |
1851 | delayacct_tsk_free(tsk); |
1852 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c |
1853 | index 4a433608ba74..75f322603d44 100644 |
1854 | --- a/kernel/sched/fair.c |
1855 | +++ b/kernel/sched/fair.c |
1856 | @@ -1053,6 +1053,21 @@ struct numa_group { |
1857 | unsigned long faults[0]; |
1858 | }; |
1859 | |
1860 | +/* |
1861 | + * For functions that can be called in multiple contexts that permit reading |
1862 | + * ->numa_group (see struct task_struct for locking rules). |
1863 | + */ |
1864 | +static struct numa_group *deref_task_numa_group(struct task_struct *p) |
1865 | +{ |
1866 | + return rcu_dereference_check(p->numa_group, p == current || |
1867 | + (lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu))); |
1868 | +} |
1869 | + |
1870 | +static struct numa_group *deref_curr_numa_group(struct task_struct *p) |
1871 | +{ |
1872 | + return rcu_dereference_protected(p->numa_group, p == current); |
1873 | +} |
1874 | + |
1875 | static inline unsigned long group_faults_priv(struct numa_group *ng); |
1876 | static inline unsigned long group_faults_shared(struct numa_group *ng); |
1877 | |
1878 | @@ -1096,10 +1111,12 @@ static unsigned int task_scan_start(struct task_struct *p) |
1879 | { |
1880 | unsigned long smin = task_scan_min(p); |
1881 | unsigned long period = smin; |
1882 | + struct numa_group *ng; |
1883 | |
1884 | /* Scale the maximum scan period with the amount of shared memory. */ |
1885 | - if (p->numa_group) { |
1886 | - struct numa_group *ng = p->numa_group; |
1887 | + rcu_read_lock(); |
1888 | + ng = rcu_dereference(p->numa_group); |
1889 | + if (ng) { |
1890 | unsigned long shared = group_faults_shared(ng); |
1891 | unsigned long private = group_faults_priv(ng); |
1892 | |
1893 | @@ -1107,6 +1124,7 @@ static unsigned int task_scan_start(struct task_struct *p) |
1894 | period *= shared + 1; |
1895 | period /= private + shared + 1; |
1896 | } |
1897 | + rcu_read_unlock(); |
1898 | |
1899 | return max(smin, period); |
1900 | } |
1901 | @@ -1115,13 +1133,14 @@ static unsigned int task_scan_max(struct task_struct *p) |
1902 | { |
1903 | unsigned long smin = task_scan_min(p); |
1904 | unsigned long smax; |
1905 | + struct numa_group *ng; |
1906 | |
1907 | /* Watch for min being lower than max due to floor calculations */ |
1908 | smax = sysctl_numa_balancing_scan_period_max / task_nr_scan_windows(p); |
1909 | |
1910 | /* Scale the maximum scan period with the amount of shared memory. */ |
1911 | - if (p->numa_group) { |
1912 | - struct numa_group *ng = p->numa_group; |
1913 | + ng = deref_curr_numa_group(p); |
1914 | + if (ng) { |
1915 | unsigned long shared = group_faults_shared(ng); |
1916 | unsigned long private = group_faults_priv(ng); |
1917 | unsigned long period = smax; |
1918 | @@ -1153,7 +1172,7 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p) |
1919 | p->numa_scan_period = sysctl_numa_balancing_scan_delay; |
1920 | p->numa_work.next = &p->numa_work; |
1921 | p->numa_faults = NULL; |
1922 | - p->numa_group = NULL; |
1923 | + RCU_INIT_POINTER(p->numa_group, NULL); |
1924 | p->last_task_numa_placement = 0; |
1925 | p->last_sum_exec_runtime = 0; |
1926 | |
1927 | @@ -1200,7 +1219,16 @@ static void account_numa_dequeue(struct rq *rq, struct task_struct *p) |
1928 | |
1929 | pid_t task_numa_group_id(struct task_struct *p) |
1930 | { |
1931 | - return p->numa_group ? p->numa_group->gid : 0; |
1932 | + struct numa_group *ng; |
1933 | + pid_t gid = 0; |
1934 | + |
1935 | + rcu_read_lock(); |
1936 | + ng = rcu_dereference(p->numa_group); |
1937 | + if (ng) |
1938 | + gid = ng->gid; |
1939 | + rcu_read_unlock(); |
1940 | + |
1941 | + return gid; |
1942 | } |
1943 | |
1944 | /* |
1945 | @@ -1225,11 +1253,13 @@ static inline unsigned long task_faults(struct task_struct *p, int nid) |
1946 | |
1947 | static inline unsigned long group_faults(struct task_struct *p, int nid) |
1948 | { |
1949 | - if (!p->numa_group) |
1950 | + struct numa_group *ng = deref_task_numa_group(p); |
1951 | + |
1952 | + if (!ng) |
1953 | return 0; |
1954 | |
1955 | - return p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 0)] + |
1956 | - p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 1)]; |
1957 | + return ng->faults[task_faults_idx(NUMA_MEM, nid, 0)] + |
1958 | + ng->faults[task_faults_idx(NUMA_MEM, nid, 1)]; |
1959 | } |
1960 | |
1961 | static inline unsigned long group_faults_cpu(struct numa_group *group, int nid) |
1962 | @@ -1367,12 +1397,13 @@ static inline unsigned long task_weight(struct task_struct *p, int nid, |
1963 | static inline unsigned long group_weight(struct task_struct *p, int nid, |
1964 | int dist) |
1965 | { |
1966 | + struct numa_group *ng = deref_task_numa_group(p); |
1967 | unsigned long faults, total_faults; |
1968 | |
1969 | - if (!p->numa_group) |
1970 | + if (!ng) |
1971 | return 0; |
1972 | |
1973 | - total_faults = p->numa_group->total_faults; |
1974 | + total_faults = ng->total_faults; |
1975 | |
1976 | if (!total_faults) |
1977 | return 0; |
1978 | @@ -1386,7 +1417,7 @@ static inline unsigned long group_weight(struct task_struct *p, int nid, |
1979 | bool should_numa_migrate_memory(struct task_struct *p, struct page * page, |
1980 | int src_nid, int dst_cpu) |
1981 | { |
1982 | - struct numa_group *ng = p->numa_group; |
1983 | + struct numa_group *ng = deref_curr_numa_group(p); |
1984 | int dst_nid = cpu_to_node(dst_cpu); |
1985 | int last_cpupid, this_cpupid; |
1986 | |
1987 | @@ -1592,13 +1623,14 @@ static bool load_too_imbalanced(long src_load, long dst_load, |
1988 | static void task_numa_compare(struct task_numa_env *env, |
1989 | long taskimp, long groupimp, bool maymove) |
1990 | { |
1991 | + struct numa_group *cur_ng, *p_ng = deref_curr_numa_group(env->p); |
1992 | struct rq *dst_rq = cpu_rq(env->dst_cpu); |
1993 | + long imp = p_ng ? groupimp : taskimp; |
1994 | struct task_struct *cur; |
1995 | long src_load, dst_load; |
1996 | - long load; |
1997 | - long imp = env->p->numa_group ? groupimp : taskimp; |
1998 | - long moveimp = imp; |
1999 | int dist = env->dist; |
2000 | + long moveimp = imp; |
2001 | + long load; |
2002 | |
2003 | if (READ_ONCE(dst_rq->numa_migrate_on)) |
2004 | return; |
2005 | @@ -1637,21 +1669,22 @@ static void task_numa_compare(struct task_numa_env *env, |
2006 | * If dst and source tasks are in the same NUMA group, or not |
2007 | * in any group then look only at task weights. |
2008 | */ |
2009 | - if (cur->numa_group == env->p->numa_group) { |
2010 | + cur_ng = rcu_dereference(cur->numa_group); |
2011 | + if (cur_ng == p_ng) { |
2012 | imp = taskimp + task_weight(cur, env->src_nid, dist) - |
2013 | task_weight(cur, env->dst_nid, dist); |
2014 | /* |
2015 | * Add some hysteresis to prevent swapping the |
2016 | * tasks within a group over tiny differences. |
2017 | */ |
2018 | - if (cur->numa_group) |
2019 | + if (cur_ng) |
2020 | imp -= imp / 16; |
2021 | } else { |
2022 | /* |
2023 | * Compare the group weights. If a task is all by itself |
2024 | * (not part of a group), use the task weight instead. |
2025 | */ |
2026 | - if (cur->numa_group && env->p->numa_group) |
2027 | + if (cur_ng && p_ng) |
2028 | imp += group_weight(cur, env->src_nid, dist) - |
2029 | group_weight(cur, env->dst_nid, dist); |
2030 | else |
2031 | @@ -1749,11 +1782,12 @@ static int task_numa_migrate(struct task_struct *p) |
2032 | .best_imp = 0, |
2033 | .best_cpu = -1, |
2034 | }; |
2035 | + unsigned long taskweight, groupweight; |
2036 | struct sched_domain *sd; |
2037 | + long taskimp, groupimp; |
2038 | + struct numa_group *ng; |
2039 | struct rq *best_rq; |
2040 | - unsigned long taskweight, groupweight; |
2041 | int nid, ret, dist; |
2042 | - long taskimp, groupimp; |
2043 | |
2044 | /* |
2045 | * Pick the lowest SD_NUMA domain, as that would have the smallest |
2046 | @@ -1799,7 +1833,8 @@ static int task_numa_migrate(struct task_struct *p) |
2047 | * multiple NUMA nodes; in order to better consolidate the group, |
2048 | * we need to check other locations. |
2049 | */ |
2050 | - if (env.best_cpu == -1 || (p->numa_group && p->numa_group->active_nodes > 1)) { |
2051 | + ng = deref_curr_numa_group(p); |
2052 | + if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) { |
2053 | for_each_online_node(nid) { |
2054 | if (nid == env.src_nid || nid == p->numa_preferred_nid) |
2055 | continue; |
2056 | @@ -1832,7 +1867,7 @@ static int task_numa_migrate(struct task_struct *p) |
2057 | * A task that migrated to a second choice node will be better off |
2058 | * trying for a better one later. Do not set the preferred node here. |
2059 | */ |
2060 | - if (p->numa_group) { |
2061 | + if (ng) { |
2062 | if (env.best_cpu == -1) |
2063 | nid = env.src_nid; |
2064 | else |
2065 | @@ -2127,6 +2162,7 @@ static void task_numa_placement(struct task_struct *p) |
2066 | unsigned long total_faults; |
2067 | u64 runtime, period; |
2068 | spinlock_t *group_lock = NULL; |
2069 | + struct numa_group *ng; |
2070 | |
2071 | /* |
2072 | * The p->mm->numa_scan_seq field gets updated without |
2073 | @@ -2144,8 +2180,9 @@ static void task_numa_placement(struct task_struct *p) |
2074 | runtime = numa_get_avg_runtime(p, &period); |
2075 | |
2076 | /* If the task is part of a group prevent parallel updates to group stats */ |
2077 | - if (p->numa_group) { |
2078 | - group_lock = &p->numa_group->lock; |
2079 | + ng = deref_curr_numa_group(p); |
2080 | + if (ng) { |
2081 | + group_lock = &ng->lock; |
2082 | spin_lock_irq(group_lock); |
2083 | } |
2084 | |
2085 | @@ -2186,7 +2223,7 @@ static void task_numa_placement(struct task_struct *p) |
2086 | p->numa_faults[cpu_idx] += f_diff; |
2087 | faults += p->numa_faults[mem_idx]; |
2088 | p->total_numa_faults += diff; |
2089 | - if (p->numa_group) { |
2090 | + if (ng) { |
2091 | /* |
2092 | * safe because we can only change our own group |
2093 | * |
2094 | @@ -2194,14 +2231,14 @@ static void task_numa_placement(struct task_struct *p) |
2095 | * nid and priv in a specific region because it |
2096 | * is at the beginning of the numa_faults array. |
2097 | */ |
2098 | - p->numa_group->faults[mem_idx] += diff; |
2099 | - p->numa_group->faults_cpu[mem_idx] += f_diff; |
2100 | - p->numa_group->total_faults += diff; |
2101 | - group_faults += p->numa_group->faults[mem_idx]; |
2102 | + ng->faults[mem_idx] += diff; |
2103 | + ng->faults_cpu[mem_idx] += f_diff; |
2104 | + ng->total_faults += diff; |
2105 | + group_faults += ng->faults[mem_idx]; |
2106 | } |
2107 | } |
2108 | |
2109 | - if (!p->numa_group) { |
2110 | + if (!ng) { |
2111 | if (faults > max_faults) { |
2112 | max_faults = faults; |
2113 | max_nid = nid; |
2114 | @@ -2212,8 +2249,8 @@ static void task_numa_placement(struct task_struct *p) |
2115 | } |
2116 | } |
2117 | |
2118 | - if (p->numa_group) { |
2119 | - numa_group_count_active_nodes(p->numa_group); |
2120 | + if (ng) { |
2121 | + numa_group_count_active_nodes(ng); |
2122 | spin_unlock_irq(group_lock); |
2123 | max_nid = preferred_group_nid(p, max_nid); |
2124 | } |
2125 | @@ -2247,7 +2284,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, |
2126 | int cpu = cpupid_to_cpu(cpupid); |
2127 | int i; |
2128 | |
2129 | - if (unlikely(!p->numa_group)) { |
2130 | + if (unlikely(!deref_curr_numa_group(p))) { |
2131 | unsigned int size = sizeof(struct numa_group) + |
2132 | 4*nr_node_ids*sizeof(unsigned long); |
2133 | |
2134 | @@ -2283,7 +2320,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, |
2135 | if (!grp) |
2136 | goto no_join; |
2137 | |
2138 | - my_grp = p->numa_group; |
2139 | + my_grp = deref_curr_numa_group(p); |
2140 | if (grp == my_grp) |
2141 | goto no_join; |
2142 | |
2143 | @@ -2345,13 +2382,24 @@ no_join: |
2144 | return; |
2145 | } |
2146 | |
2147 | -void task_numa_free(struct task_struct *p) |
2148 | +/* |
2149 | + * Get rid of NUMA staticstics associated with a task (either current or dead). |
2150 | + * If @final is set, the task is dead and has reached refcount zero, so we can |
2151 | + * safely free all relevant data structures. Otherwise, there might be |
2152 | + * concurrent reads from places like load balancing and procfs, and we should |
2153 | + * reset the data back to default state without freeing ->numa_faults. |
2154 | + */ |
2155 | +void task_numa_free(struct task_struct *p, bool final) |
2156 | { |
2157 | - struct numa_group *grp = p->numa_group; |
2158 | - void *numa_faults = p->numa_faults; |
2159 | + /* safe: p either is current or is being freed by current */ |
2160 | + struct numa_group *grp = rcu_dereference_raw(p->numa_group); |
2161 | + unsigned long *numa_faults = p->numa_faults; |
2162 | unsigned long flags; |
2163 | int i; |
2164 | |
2165 | + if (!numa_faults) |
2166 | + return; |
2167 | + |
2168 | if (grp) { |
2169 | spin_lock_irqsave(&grp->lock, flags); |
2170 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) |
2171 | @@ -2364,8 +2412,14 @@ void task_numa_free(struct task_struct *p) |
2172 | put_numa_group(grp); |
2173 | } |
2174 | |
2175 | - p->numa_faults = NULL; |
2176 | - kfree(numa_faults); |
2177 | + if (final) { |
2178 | + p->numa_faults = NULL; |
2179 | + kfree(numa_faults); |
2180 | + } else { |
2181 | + p->total_numa_faults = 0; |
2182 | + for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) |
2183 | + numa_faults[i] = 0; |
2184 | + } |
2185 | } |
2186 | |
2187 | /* |
2188 | @@ -2418,7 +2472,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) |
2189 | * actively using should be counted as local. This allows the |
2190 | * scan rate to slow down when a workload has settled down. |
2191 | */ |
2192 | - ng = p->numa_group; |
2193 | + ng = deref_curr_numa_group(p); |
2194 | if (!priv && !local && ng && ng->active_nodes > 1 && |
2195 | numa_is_active_node(cpu_node, ng) && |
2196 | numa_is_active_node(mem_node, ng)) |
2197 | @@ -10218,18 +10272,22 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m) |
2198 | { |
2199 | int node; |
2200 | unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0; |
2201 | + struct numa_group *ng; |
2202 | |
2203 | + rcu_read_lock(); |
2204 | + ng = rcu_dereference(p->numa_group); |
2205 | for_each_online_node(node) { |
2206 | if (p->numa_faults) { |
2207 | tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)]; |
2208 | tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)]; |
2209 | } |
2210 | - if (p->numa_group) { |
2211 | - gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)], |
2212 | - gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)]; |
2213 | + if (ng) { |
2214 | + gsf = ng->faults[task_faults_idx(NUMA_MEM, node, 0)], |
2215 | + gpf = ng->faults[task_faults_idx(NUMA_MEM, node, 1)]; |
2216 | } |
2217 | print_numa_stats(m, node, tsf, tpf, gsf, gpf); |
2218 | } |
2219 | + rcu_read_unlock(); |
2220 | } |
2221 | #endif /* CONFIG_NUMA_BALANCING */ |
2222 | #endif /* CONFIG_SCHED_DEBUG */ |
2223 | diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c |
2224 | index c248e0dccbe1..67ef9d853d90 100644 |
2225 | --- a/net/ipv4/ip_tunnel_core.c |
2226 | +++ b/net/ipv4/ip_tunnel_core.c |
2227 | @@ -89,9 +89,12 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, |
2228 | __ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1); |
2229 | |
2230 | err = ip_local_out(net, sk, skb); |
2231 | - if (unlikely(net_xmit_eval(err))) |
2232 | - pkt_len = 0; |
2233 | - iptunnel_xmit_stats(dev, pkt_len); |
2234 | + |
2235 | + if (dev) { |
2236 | + if (unlikely(net_xmit_eval(err))) |
2237 | + pkt_len = 0; |
2238 | + iptunnel_xmit_stats(dev, pkt_len); |
2239 | + } |
2240 | } |
2241 | EXPORT_SYMBOL_GPL(iptunnel_xmit); |
2242 | |
2243 | diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c |
2244 | index ab27a2872935..2e30bf197583 100644 |
2245 | --- a/net/vmw_vsock/af_vsock.c |
2246 | +++ b/net/vmw_vsock/af_vsock.c |
2247 | @@ -281,7 +281,8 @@ EXPORT_SYMBOL_GPL(vsock_insert_connected); |
2248 | void vsock_remove_bound(struct vsock_sock *vsk) |
2249 | { |
2250 | spin_lock_bh(&vsock_table_lock); |
2251 | - __vsock_remove_bound(vsk); |
2252 | + if (__vsock_in_bound_table(vsk)) |
2253 | + __vsock_remove_bound(vsk); |
2254 | spin_unlock_bh(&vsock_table_lock); |
2255 | } |
2256 | EXPORT_SYMBOL_GPL(vsock_remove_bound); |
2257 | @@ -289,7 +290,8 @@ EXPORT_SYMBOL_GPL(vsock_remove_bound); |
2258 | void vsock_remove_connected(struct vsock_sock *vsk) |
2259 | { |
2260 | spin_lock_bh(&vsock_table_lock); |
2261 | - __vsock_remove_connected(vsk); |
2262 | + if (__vsock_in_connected_table(vsk)) |
2263 | + __vsock_remove_connected(vsk); |
2264 | spin_unlock_bh(&vsock_table_lock); |
2265 | } |
2266 | EXPORT_SYMBOL_GPL(vsock_remove_connected); |
2267 | @@ -325,35 +327,10 @@ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, |
2268 | } |
2269 | EXPORT_SYMBOL_GPL(vsock_find_connected_socket); |
2270 | |
2271 | -static bool vsock_in_bound_table(struct vsock_sock *vsk) |
2272 | -{ |
2273 | - bool ret; |
2274 | - |
2275 | - spin_lock_bh(&vsock_table_lock); |
2276 | - ret = __vsock_in_bound_table(vsk); |
2277 | - spin_unlock_bh(&vsock_table_lock); |
2278 | - |
2279 | - return ret; |
2280 | -} |
2281 | - |
2282 | -static bool vsock_in_connected_table(struct vsock_sock *vsk) |
2283 | -{ |
2284 | - bool ret; |
2285 | - |
2286 | - spin_lock_bh(&vsock_table_lock); |
2287 | - ret = __vsock_in_connected_table(vsk); |
2288 | - spin_unlock_bh(&vsock_table_lock); |
2289 | - |
2290 | - return ret; |
2291 | -} |
2292 | - |
2293 | void vsock_remove_sock(struct vsock_sock *vsk) |
2294 | { |
2295 | - if (vsock_in_bound_table(vsk)) |
2296 | - vsock_remove_bound(vsk); |
2297 | - |
2298 | - if (vsock_in_connected_table(vsk)) |
2299 | - vsock_remove_connected(vsk); |
2300 | + vsock_remove_bound(vsk); |
2301 | + vsock_remove_connected(vsk); |
2302 | } |
2303 | EXPORT_SYMBOL_GPL(vsock_remove_sock); |
2304 | |
2305 | @@ -484,8 +461,7 @@ static void vsock_pending_work(struct work_struct *work) |
2306 | * incoming packets can't find this socket, and to reduce the reference |
2307 | * count. |
2308 | */ |
2309 | - if (vsock_in_connected_table(vsk)) |
2310 | - vsock_remove_connected(vsk); |
2311 | + vsock_remove_connected(vsk); |
2312 | |
2313 | sk->sk_state = TCP_CLOSE; |
2314 | |
2315 | diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c |
2316 | index b131561a9469..9c7da811d130 100644 |
2317 | --- a/net/vmw_vsock/hyperv_transport.c |
2318 | +++ b/net/vmw_vsock/hyperv_transport.c |
2319 | @@ -35,6 +35,9 @@ |
2320 | /* The MTU is 16KB per the host side's design */ |
2321 | #define HVS_MTU_SIZE (1024 * 16) |
2322 | |
2323 | +/* How long to wait for graceful shutdown of a connection */ |
2324 | +#define HVS_CLOSE_TIMEOUT (8 * HZ) |
2325 | + |
2326 | struct vmpipe_proto_header { |
2327 | u32 pkt_type; |
2328 | u32 data_size; |
2329 | @@ -290,19 +293,32 @@ static void hvs_channel_cb(void *ctx) |
2330 | sk->sk_write_space(sk); |
2331 | } |
2332 | |
2333 | -static void hvs_close_connection(struct vmbus_channel *chan) |
2334 | +static void hvs_do_close_lock_held(struct vsock_sock *vsk, |
2335 | + bool cancel_timeout) |
2336 | { |
2337 | - struct sock *sk = get_per_channel_state(chan); |
2338 | - struct vsock_sock *vsk = vsock_sk(sk); |
2339 | - |
2340 | - lock_sock(sk); |
2341 | + struct sock *sk = sk_vsock(vsk); |
2342 | |
2343 | - sk->sk_state = TCP_CLOSE; |
2344 | sock_set_flag(sk, SOCK_DONE); |
2345 | - vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN; |
2346 | - |
2347 | + vsk->peer_shutdown = SHUTDOWN_MASK; |
2348 | + if (vsock_stream_has_data(vsk) <= 0) |
2349 | + sk->sk_state = TCP_CLOSING; |
2350 | sk->sk_state_change(sk); |
2351 | + if (vsk->close_work_scheduled && |
2352 | + (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { |
2353 | + vsk->close_work_scheduled = false; |
2354 | + vsock_remove_sock(vsk); |
2355 | |
2356 | + /* Release the reference taken while scheduling the timeout */ |
2357 | + sock_put(sk); |
2358 | + } |
2359 | +} |
2360 | + |
2361 | +static void hvs_close_connection(struct vmbus_channel *chan) |
2362 | +{ |
2363 | + struct sock *sk = get_per_channel_state(chan); |
2364 | + |
2365 | + lock_sock(sk); |
2366 | + hvs_do_close_lock_held(vsock_sk(sk), true); |
2367 | release_sock(sk); |
2368 | } |
2369 | |
2370 | @@ -445,50 +461,80 @@ static int hvs_connect(struct vsock_sock *vsk) |
2371 | return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id); |
2372 | } |
2373 | |
2374 | +static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode) |
2375 | +{ |
2376 | + struct vmpipe_proto_header hdr; |
2377 | + |
2378 | + if (hvs->fin_sent || !hvs->chan) |
2379 | + return; |
2380 | + |
2381 | + /* It can't fail: see hvs_channel_writable_bytes(). */ |
2382 | + (void)hvs_send_data(hvs->chan, (struct hvs_send_buf *)&hdr, 0); |
2383 | + hvs->fin_sent = true; |
2384 | +} |
2385 | + |
2386 | static int hvs_shutdown(struct vsock_sock *vsk, int mode) |
2387 | { |
2388 | struct sock *sk = sk_vsock(vsk); |
2389 | - struct vmpipe_proto_header hdr; |
2390 | - struct hvs_send_buf *send_buf; |
2391 | - struct hvsock *hvs; |
2392 | |
2393 | if (!(mode & SEND_SHUTDOWN)) |
2394 | return 0; |
2395 | |
2396 | lock_sock(sk); |
2397 | + hvs_shutdown_lock_held(vsk->trans, mode); |
2398 | + release_sock(sk); |
2399 | + return 0; |
2400 | +} |
2401 | |
2402 | - hvs = vsk->trans; |
2403 | - if (hvs->fin_sent) |
2404 | - goto out; |
2405 | - |
2406 | - send_buf = (struct hvs_send_buf *)&hdr; |
2407 | +static void hvs_close_timeout(struct work_struct *work) |
2408 | +{ |
2409 | + struct vsock_sock *vsk = |
2410 | + container_of(work, struct vsock_sock, close_work.work); |
2411 | + struct sock *sk = sk_vsock(vsk); |
2412 | |
2413 | - /* It can't fail: see hvs_channel_writable_bytes(). */ |
2414 | - (void)hvs_send_data(hvs->chan, send_buf, 0); |
2415 | + sock_hold(sk); |
2416 | + lock_sock(sk); |
2417 | + if (!sock_flag(sk, SOCK_DONE)) |
2418 | + hvs_do_close_lock_held(vsk, false); |
2419 | |
2420 | - hvs->fin_sent = true; |
2421 | -out: |
2422 | + vsk->close_work_scheduled = false; |
2423 | release_sock(sk); |
2424 | - return 0; |
2425 | + sock_put(sk); |
2426 | } |
2427 | |
2428 | -static void hvs_release(struct vsock_sock *vsk) |
2429 | +/* Returns true, if it is safe to remove socket; false otherwise */ |
2430 | +static bool hvs_close_lock_held(struct vsock_sock *vsk) |
2431 | { |
2432 | struct sock *sk = sk_vsock(vsk); |
2433 | - struct hvsock *hvs = vsk->trans; |
2434 | - struct vmbus_channel *chan; |
2435 | |
2436 | - lock_sock(sk); |
2437 | + if (!(sk->sk_state == TCP_ESTABLISHED || |
2438 | + sk->sk_state == TCP_CLOSING)) |
2439 | + return true; |
2440 | |
2441 | - sk->sk_state = TCP_CLOSING; |
2442 | - vsock_remove_sock(vsk); |
2443 | + if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) |
2444 | + hvs_shutdown_lock_held(vsk->trans, SHUTDOWN_MASK); |
2445 | |
2446 | - release_sock(sk); |
2447 | + if (sock_flag(sk, SOCK_DONE)) |
2448 | + return true; |
2449 | |
2450 | - chan = hvs->chan; |
2451 | - if (chan) |
2452 | - hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN); |
2453 | + /* This reference will be dropped by the delayed close routine */ |
2454 | + sock_hold(sk); |
2455 | + INIT_DELAYED_WORK(&vsk->close_work, hvs_close_timeout); |
2456 | + vsk->close_work_scheduled = true; |
2457 | + schedule_delayed_work(&vsk->close_work, HVS_CLOSE_TIMEOUT); |
2458 | + return false; |
2459 | +} |
2460 | |
2461 | +static void hvs_release(struct vsock_sock *vsk) |
2462 | +{ |
2463 | + struct sock *sk = sk_vsock(vsk); |
2464 | + bool remove_sock; |
2465 | + |
2466 | + lock_sock(sk); |
2467 | + remove_sock = hvs_close_lock_held(vsk); |
2468 | + release_sock(sk); |
2469 | + if (remove_sock) |
2470 | + vsock_remove_sock(vsk); |
2471 | } |
2472 | |
2473 | static void hvs_destruct(struct vsock_sock *vsk) |