Annotation of /trunk/kernel-alx/patches-4.19/0163-4.19.64-all-fixes.patch
Parent Directory | Revision Log
Revision 3442 -
(hide annotations)
(download)
Mon Aug 5 07:52:44 2019 UTC (4 years, 9 months ago) by niro
File size: 78299 byte(s)
Mon Aug 5 07:52:44 2019 UTC (4 years, 9 months ago) by niro
File size: 78299 byte(s)
-linux-4.19.64
1 | niro | 3442 | diff --git a/Makefile b/Makefile |
2 | index 8ad77a93de30..203d9e80a315 100644 | ||
3 | --- a/Makefile | ||
4 | +++ b/Makefile | ||
5 | @@ -1,7 +1,7 @@ | ||
6 | # SPDX-License-Identifier: GPL-2.0 | ||
7 | VERSION = 4 | ||
8 | PATCHLEVEL = 19 | ||
9 | -SUBLEVEL = 63 | ||
10 | +SUBLEVEL = 64 | ||
11 | EXTRAVERSION = | ||
12 | NAME = "People's Front" | ||
13 | |||
14 | diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h | ||
15 | index 1a037b94eba1..cee28a05ee98 100644 | ||
16 | --- a/arch/arm64/include/asm/compat.h | ||
17 | +++ b/arch/arm64/include/asm/compat.h | ||
18 | @@ -159,6 +159,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) | ||
19 | } | ||
20 | |||
21 | #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) | ||
22 | +#define COMPAT_MINSIGSTKSZ 2048 | ||
23 | |||
24 | static inline void __user *arch_compat_alloc_user_space(long len) | ||
25 | { | ||
26 | diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig | ||
27 | index 6394b4f0a69b..f42feab25dcf 100644 | ||
28 | --- a/arch/sh/boards/Kconfig | ||
29 | +++ b/arch/sh/boards/Kconfig | ||
30 | @@ -8,27 +8,19 @@ config SH_ALPHA_BOARD | ||
31 | bool | ||
32 | |||
33 | config SH_DEVICE_TREE | ||
34 | - bool "Board Described by Device Tree" | ||
35 | + bool | ||
36 | select OF | ||
37 | select OF_EARLY_FLATTREE | ||
38 | select TIMER_OF | ||
39 | select COMMON_CLK | ||
40 | select GENERIC_CALIBRATE_DELAY | ||
41 | - help | ||
42 | - Select Board Described by Device Tree to build a kernel that | ||
43 | - does not hard-code any board-specific knowledge but instead uses | ||
44 | - a device tree blob provided by the boot-loader. You must enable | ||
45 | - drivers for any hardware you want to use separately. At this | ||
46 | - time, only boards based on the open-hardware J-Core processors | ||
47 | - have sufficient driver coverage to use this option; do not | ||
48 | - select it if you are using original SuperH hardware. | ||
49 | |||
50 | config SH_JCORE_SOC | ||
51 | bool "J-Core SoC" | ||
52 | - depends on SH_DEVICE_TREE && (CPU_SH2 || CPU_J2) | ||
53 | + select SH_DEVICE_TREE | ||
54 | select CLKSRC_JCORE_PIT | ||
55 | select JCORE_AIC | ||
56 | - default y if CPU_J2 | ||
57 | + depends on CPU_J2 | ||
58 | help | ||
59 | Select this option to include drivers core components of the | ||
60 | J-Core SoC, including interrupt controllers and timers. | ||
61 | diff --git a/block/blk-core.c b/block/blk-core.c | ||
62 | index 9ca703bcfe3b..4a3e1f417880 100644 | ||
63 | --- a/block/blk-core.c | ||
64 | +++ b/block/blk-core.c | ||
65 | @@ -421,24 +421,25 @@ void blk_sync_queue(struct request_queue *q) | ||
66 | EXPORT_SYMBOL(blk_sync_queue); | ||
67 | |||
68 | /** | ||
69 | - * blk_set_preempt_only - set QUEUE_FLAG_PREEMPT_ONLY | ||
70 | + * blk_set_pm_only - increment pm_only counter | ||
71 | * @q: request queue pointer | ||
72 | - * | ||
73 | - * Returns the previous value of the PREEMPT_ONLY flag - 0 if the flag was not | ||
74 | - * set and 1 if the flag was already set. | ||
75 | */ | ||
76 | -int blk_set_preempt_only(struct request_queue *q) | ||
77 | +void blk_set_pm_only(struct request_queue *q) | ||
78 | { | ||
79 | - return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q); | ||
80 | + atomic_inc(&q->pm_only); | ||
81 | } | ||
82 | -EXPORT_SYMBOL_GPL(blk_set_preempt_only); | ||
83 | +EXPORT_SYMBOL_GPL(blk_set_pm_only); | ||
84 | |||
85 | -void blk_clear_preempt_only(struct request_queue *q) | ||
86 | +void blk_clear_pm_only(struct request_queue *q) | ||
87 | { | ||
88 | - blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q); | ||
89 | - wake_up_all(&q->mq_freeze_wq); | ||
90 | + int pm_only; | ||
91 | + | ||
92 | + pm_only = atomic_dec_return(&q->pm_only); | ||
93 | + WARN_ON_ONCE(pm_only < 0); | ||
94 | + if (pm_only == 0) | ||
95 | + wake_up_all(&q->mq_freeze_wq); | ||
96 | } | ||
97 | -EXPORT_SYMBOL_GPL(blk_clear_preempt_only); | ||
98 | +EXPORT_SYMBOL_GPL(blk_clear_pm_only); | ||
99 | |||
100 | /** | ||
101 | * __blk_run_queue_uncond - run a queue whether or not it has been stopped | ||
102 | @@ -916,7 +917,7 @@ EXPORT_SYMBOL(blk_alloc_queue); | ||
103 | */ | ||
104 | int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) | ||
105 | { | ||
106 | - const bool preempt = flags & BLK_MQ_REQ_PREEMPT; | ||
107 | + const bool pm = flags & BLK_MQ_REQ_PREEMPT; | ||
108 | |||
109 | while (true) { | ||
110 | bool success = false; | ||
111 | @@ -924,11 +925,11 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) | ||
112 | rcu_read_lock(); | ||
113 | if (percpu_ref_tryget_live(&q->q_usage_counter)) { | ||
114 | /* | ||
115 | - * The code that sets the PREEMPT_ONLY flag is | ||
116 | - * responsible for ensuring that that flag is globally | ||
117 | - * visible before the queue is unfrozen. | ||
118 | + * The code that increments the pm_only counter is | ||
119 | + * responsible for ensuring that that counter is | ||
120 | + * globally visible before the queue is unfrozen. | ||
121 | */ | ||
122 | - if (preempt || !blk_queue_preempt_only(q)) { | ||
123 | + if (pm || !blk_queue_pm_only(q)) { | ||
124 | success = true; | ||
125 | } else { | ||
126 | percpu_ref_put(&q->q_usage_counter); | ||
127 | @@ -953,7 +954,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) | ||
128 | |||
129 | wait_event(q->mq_freeze_wq, | ||
130 | (atomic_read(&q->mq_freeze_depth) == 0 && | ||
131 | - (preempt || !blk_queue_preempt_only(q))) || | ||
132 | + (pm || !blk_queue_pm_only(q))) || | ||
133 | blk_queue_dying(q)); | ||
134 | if (blk_queue_dying(q)) | ||
135 | return -ENODEV; | ||
136 | diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c | ||
137 | index cb1e6cf7ac48..a5ea86835fcb 100644 | ||
138 | --- a/block/blk-mq-debugfs.c | ||
139 | +++ b/block/blk-mq-debugfs.c | ||
140 | @@ -102,6 +102,14 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags, | ||
141 | return 0; | ||
142 | } | ||
143 | |||
144 | +static int queue_pm_only_show(void *data, struct seq_file *m) | ||
145 | +{ | ||
146 | + struct request_queue *q = data; | ||
147 | + | ||
148 | + seq_printf(m, "%d\n", atomic_read(&q->pm_only)); | ||
149 | + return 0; | ||
150 | +} | ||
151 | + | ||
152 | #define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name | ||
153 | static const char *const blk_queue_flag_name[] = { | ||
154 | QUEUE_FLAG_NAME(QUEUED), | ||
155 | @@ -132,7 +140,6 @@ static const char *const blk_queue_flag_name[] = { | ||
156 | QUEUE_FLAG_NAME(REGISTERED), | ||
157 | QUEUE_FLAG_NAME(SCSI_PASSTHROUGH), | ||
158 | QUEUE_FLAG_NAME(QUIESCED), | ||
159 | - QUEUE_FLAG_NAME(PREEMPT_ONLY), | ||
160 | }; | ||
161 | #undef QUEUE_FLAG_NAME | ||
162 | |||
163 | @@ -209,6 +216,7 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf, | ||
164 | static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { | ||
165 | { "poll_stat", 0400, queue_poll_stat_show }, | ||
166 | { "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops }, | ||
167 | + { "pm_only", 0600, queue_pm_only_show, NULL }, | ||
168 | { "state", 0600, queue_state_show, queue_state_write }, | ||
169 | { "write_hints", 0600, queue_write_hint_show, queue_write_hint_store }, | ||
170 | { "zone_wlock", 0400, queue_zone_wlock_show, NULL }, | ||
171 | diff --git a/drivers/android/binder.c b/drivers/android/binder.c | ||
172 | index 1e0e438f079f..6e04e7a707a1 100644 | ||
173 | --- a/drivers/android/binder.c | ||
174 | +++ b/drivers/android/binder.c | ||
175 | @@ -1960,8 +1960,18 @@ static struct binder_thread *binder_get_txn_from_and_acq_inner( | ||
176 | |||
177 | static void binder_free_transaction(struct binder_transaction *t) | ||
178 | { | ||
179 | - if (t->buffer) | ||
180 | - t->buffer->transaction = NULL; | ||
181 | + struct binder_proc *target_proc = t->to_proc; | ||
182 | + | ||
183 | + if (target_proc) { | ||
184 | + binder_inner_proc_lock(target_proc); | ||
185 | + if (t->buffer) | ||
186 | + t->buffer->transaction = NULL; | ||
187 | + binder_inner_proc_unlock(target_proc); | ||
188 | + } | ||
189 | + /* | ||
190 | + * If the transaction has no target_proc, then | ||
191 | + * t->buffer->transaction has already been cleared. | ||
192 | + */ | ||
193 | kfree(t); | ||
194 | binder_stats_deleted(BINDER_STAT_TRANSACTION); | ||
195 | } | ||
196 | @@ -3484,10 +3494,12 @@ static int binder_thread_write(struct binder_proc *proc, | ||
197 | buffer->debug_id, | ||
198 | buffer->transaction ? "active" : "finished"); | ||
199 | |||
200 | + binder_inner_proc_lock(proc); | ||
201 | if (buffer->transaction) { | ||
202 | buffer->transaction->buffer = NULL; | ||
203 | buffer->transaction = NULL; | ||
204 | } | ||
205 | + binder_inner_proc_unlock(proc); | ||
206 | if (buffer->async_transaction && buffer->target_node) { | ||
207 | struct binder_node *buf_node; | ||
208 | struct binder_work *w; | ||
209 | diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c | ||
210 | index d568fbd94d6c..20235925344d 100644 | ||
211 | --- a/drivers/bluetooth/hci_ath.c | ||
212 | +++ b/drivers/bluetooth/hci_ath.c | ||
213 | @@ -112,6 +112,9 @@ static int ath_open(struct hci_uart *hu) | ||
214 | |||
215 | BT_DBG("hu %p", hu); | ||
216 | |||
217 | + if (!hci_uart_has_flow_control(hu)) | ||
218 | + return -EOPNOTSUPP; | ||
219 | + | ||
220 | ath = kzalloc(sizeof(*ath), GFP_KERNEL); | ||
221 | if (!ath) | ||
222 | return -ENOMEM; | ||
223 | diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c | ||
224 | index 800132369134..aa6b7ed9fdf1 100644 | ||
225 | --- a/drivers/bluetooth/hci_bcm.c | ||
226 | +++ b/drivers/bluetooth/hci_bcm.c | ||
227 | @@ -369,6 +369,9 @@ static int bcm_open(struct hci_uart *hu) | ||
228 | |||
229 | bt_dev_dbg(hu->hdev, "hu %p", hu); | ||
230 | |||
231 | + if (!hci_uart_has_flow_control(hu)) | ||
232 | + return -EOPNOTSUPP; | ||
233 | + | ||
234 | bcm = kzalloc(sizeof(*bcm), GFP_KERNEL); | ||
235 | if (!bcm) | ||
236 | return -ENOMEM; | ||
237 | diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c | ||
238 | index 46ace321bf60..e9228520e4c7 100644 | ||
239 | --- a/drivers/bluetooth/hci_intel.c | ||
240 | +++ b/drivers/bluetooth/hci_intel.c | ||
241 | @@ -406,6 +406,9 @@ static int intel_open(struct hci_uart *hu) | ||
242 | |||
243 | BT_DBG("hu %p", hu); | ||
244 | |||
245 | + if (!hci_uart_has_flow_control(hu)) | ||
246 | + return -EOPNOTSUPP; | ||
247 | + | ||
248 | intel = kzalloc(sizeof(*intel), GFP_KERNEL); | ||
249 | if (!intel) | ||
250 | return -ENOMEM; | ||
251 | diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c | ||
252 | index c915daf01a89..efeb8137ec67 100644 | ||
253 | --- a/drivers/bluetooth/hci_ldisc.c | ||
254 | +++ b/drivers/bluetooth/hci_ldisc.c | ||
255 | @@ -299,6 +299,19 @@ static int hci_uart_send_frame(struct hci_dev *hdev, struct sk_buff *skb) | ||
256 | return 0; | ||
257 | } | ||
258 | |||
259 | +/* Check the underlying device or tty has flow control support */ | ||
260 | +bool hci_uart_has_flow_control(struct hci_uart *hu) | ||
261 | +{ | ||
262 | + /* serdev nodes check if the needed operations are present */ | ||
263 | + if (hu->serdev) | ||
264 | + return true; | ||
265 | + | ||
266 | + if (hu->tty->driver->ops->tiocmget && hu->tty->driver->ops->tiocmset) | ||
267 | + return true; | ||
268 | + | ||
269 | + return false; | ||
270 | +} | ||
271 | + | ||
272 | /* Flow control or un-flow control the device */ | ||
273 | void hci_uart_set_flow_control(struct hci_uart *hu, bool enable) | ||
274 | { | ||
275 | diff --git a/drivers/bluetooth/hci_mrvl.c b/drivers/bluetooth/hci_mrvl.c | ||
276 | index ffb00669346f..23791df081ba 100644 | ||
277 | --- a/drivers/bluetooth/hci_mrvl.c | ||
278 | +++ b/drivers/bluetooth/hci_mrvl.c | ||
279 | @@ -66,6 +66,9 @@ static int mrvl_open(struct hci_uart *hu) | ||
280 | |||
281 | BT_DBG("hu %p", hu); | ||
282 | |||
283 | + if (!hci_uart_has_flow_control(hu)) | ||
284 | + return -EOPNOTSUPP; | ||
285 | + | ||
286 | mrvl = kzalloc(sizeof(*mrvl), GFP_KERNEL); | ||
287 | if (!mrvl) | ||
288 | return -ENOMEM; | ||
289 | diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c | ||
290 | index 77004c29da08..f96e58de049b 100644 | ||
291 | --- a/drivers/bluetooth/hci_qca.c | ||
292 | +++ b/drivers/bluetooth/hci_qca.c | ||
293 | @@ -450,6 +450,9 @@ static int qca_open(struct hci_uart *hu) | ||
294 | |||
295 | BT_DBG("hu %p qca_open", hu); | ||
296 | |||
297 | + if (!hci_uart_has_flow_control(hu)) | ||
298 | + return -EOPNOTSUPP; | ||
299 | + | ||
300 | qca = kzalloc(sizeof(struct qca_data), GFP_KERNEL); | ||
301 | if (!qca) | ||
302 | return -ENOMEM; | ||
303 | diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h | ||
304 | index 00cab2fd7a1b..067a610f1372 100644 | ||
305 | --- a/drivers/bluetooth/hci_uart.h | ||
306 | +++ b/drivers/bluetooth/hci_uart.h | ||
307 | @@ -118,6 +118,7 @@ int hci_uart_tx_wakeup(struct hci_uart *hu); | ||
308 | int hci_uart_init_ready(struct hci_uart *hu); | ||
309 | void hci_uart_init_work(struct work_struct *work); | ||
310 | void hci_uart_set_baudrate(struct hci_uart *hu, unsigned int speed); | ||
311 | +bool hci_uart_has_flow_control(struct hci_uart *hu); | ||
312 | void hci_uart_set_flow_control(struct hci_uart *hu, bool enable); | ||
313 | void hci_uart_set_speeds(struct hci_uart *hu, unsigned int init_speed, | ||
314 | unsigned int oper_speed); | ||
315 | diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c | ||
316 | index c1439019dd12..b9af2419006f 100644 | ||
317 | --- a/drivers/iommu/intel-iommu.c | ||
318 | +++ b/drivers/iommu/intel-iommu.c | ||
319 | @@ -3721,7 +3721,7 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) | ||
320 | |||
321 | freelist = domain_unmap(domain, start_pfn, last_pfn); | ||
322 | |||
323 | - if (intel_iommu_strict) { | ||
324 | + if (intel_iommu_strict || !has_iova_flush_queue(&domain->iovad)) { | ||
325 | iommu_flush_iotlb_psi(iommu, domain, start_pfn, | ||
326 | nrpages, !freelist, 0); | ||
327 | /* free iova */ | ||
328 | diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c | ||
329 | index 83fe2621effe..60348d707b99 100644 | ||
330 | --- a/drivers/iommu/iova.c | ||
331 | +++ b/drivers/iommu/iova.c | ||
332 | @@ -65,9 +65,14 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, | ||
333 | } | ||
334 | EXPORT_SYMBOL_GPL(init_iova_domain); | ||
335 | |||
336 | +bool has_iova_flush_queue(struct iova_domain *iovad) | ||
337 | +{ | ||
338 | + return !!iovad->fq; | ||
339 | +} | ||
340 | + | ||
341 | static void free_iova_flush_queue(struct iova_domain *iovad) | ||
342 | { | ||
343 | - if (!iovad->fq) | ||
344 | + if (!has_iova_flush_queue(iovad)) | ||
345 | return; | ||
346 | |||
347 | if (timer_pending(&iovad->fq_timer)) | ||
348 | @@ -85,13 +90,14 @@ static void free_iova_flush_queue(struct iova_domain *iovad) | ||
349 | int init_iova_flush_queue(struct iova_domain *iovad, | ||
350 | iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) | ||
351 | { | ||
352 | + struct iova_fq __percpu *queue; | ||
353 | int cpu; | ||
354 | |||
355 | atomic64_set(&iovad->fq_flush_start_cnt, 0); | ||
356 | atomic64_set(&iovad->fq_flush_finish_cnt, 0); | ||
357 | |||
358 | - iovad->fq = alloc_percpu(struct iova_fq); | ||
359 | - if (!iovad->fq) | ||
360 | + queue = alloc_percpu(struct iova_fq); | ||
361 | + if (!queue) | ||
362 | return -ENOMEM; | ||
363 | |||
364 | iovad->flush_cb = flush_cb; | ||
365 | @@ -100,13 +106,17 @@ int init_iova_flush_queue(struct iova_domain *iovad, | ||
366 | for_each_possible_cpu(cpu) { | ||
367 | struct iova_fq *fq; | ||
368 | |||
369 | - fq = per_cpu_ptr(iovad->fq, cpu); | ||
370 | + fq = per_cpu_ptr(queue, cpu); | ||
371 | fq->head = 0; | ||
372 | fq->tail = 0; | ||
373 | |||
374 | spin_lock_init(&fq->lock); | ||
375 | } | ||
376 | |||
377 | + smp_wmb(); | ||
378 | + | ||
379 | + iovad->fq = queue; | ||
380 | + | ||
381 | timer_setup(&iovad->fq_timer, fq_flush_timeout, 0); | ||
382 | atomic_set(&iovad->fq_timer_on, 0); | ||
383 | |||
384 | diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c | ||
385 | index 6d05946b445e..060dc7fd66c1 100644 | ||
386 | --- a/drivers/isdn/hardware/mISDN/hfcsusb.c | ||
387 | +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c | ||
388 | @@ -1967,6 +1967,9 @@ hfcsusb_probe(struct usb_interface *intf, const struct usb_device_id *id) | ||
389 | |||
390 | /* get endpoint base */ | ||
391 | idx = ((ep_addr & 0x7f) - 1) * 2; | ||
392 | + if (idx > 15) | ||
393 | + return -EIO; | ||
394 | + | ||
395 | if (ep_addr & 0x80) | ||
396 | idx++; | ||
397 | attr = ep->desc.bmAttributes; | ||
398 | diff --git a/drivers/media/radio/radio-raremono.c b/drivers/media/radio/radio-raremono.c | ||
399 | index 9a5079d64c4a..729600c4a056 100644 | ||
400 | --- a/drivers/media/radio/radio-raremono.c | ||
401 | +++ b/drivers/media/radio/radio-raremono.c | ||
402 | @@ -271,6 +271,14 @@ static int vidioc_g_frequency(struct file *file, void *priv, | ||
403 | return 0; | ||
404 | } | ||
405 | |||
406 | +static void raremono_device_release(struct v4l2_device *v4l2_dev) | ||
407 | +{ | ||
408 | + struct raremono_device *radio = to_raremono_dev(v4l2_dev); | ||
409 | + | ||
410 | + kfree(radio->buffer); | ||
411 | + kfree(radio); | ||
412 | +} | ||
413 | + | ||
414 | /* File system interface */ | ||
415 | static const struct v4l2_file_operations usb_raremono_fops = { | ||
416 | .owner = THIS_MODULE, | ||
417 | @@ -295,12 +303,14 @@ static int usb_raremono_probe(struct usb_interface *intf, | ||
418 | struct raremono_device *radio; | ||
419 | int retval = 0; | ||
420 | |||
421 | - radio = devm_kzalloc(&intf->dev, sizeof(struct raremono_device), GFP_KERNEL); | ||
422 | - if (radio) | ||
423 | - radio->buffer = devm_kmalloc(&intf->dev, BUFFER_LENGTH, GFP_KERNEL); | ||
424 | - | ||
425 | - if (!radio || !radio->buffer) | ||
426 | + radio = kzalloc(sizeof(*radio), GFP_KERNEL); | ||
427 | + if (!radio) | ||
428 | + return -ENOMEM; | ||
429 | + radio->buffer = kmalloc(BUFFER_LENGTH, GFP_KERNEL); | ||
430 | + if (!radio->buffer) { | ||
431 | + kfree(radio); | ||
432 | return -ENOMEM; | ||
433 | + } | ||
434 | |||
435 | radio->usbdev = interface_to_usbdev(intf); | ||
436 | radio->intf = intf; | ||
437 | @@ -324,7 +334,8 @@ static int usb_raremono_probe(struct usb_interface *intf, | ||
438 | if (retval != 3 || | ||
439 | (get_unaligned_be16(&radio->buffer[1]) & 0xfff) == 0x0242) { | ||
440 | dev_info(&intf->dev, "this is not Thanko's Raremono.\n"); | ||
441 | - return -ENODEV; | ||
442 | + retval = -ENODEV; | ||
443 | + goto free_mem; | ||
444 | } | ||
445 | |||
446 | dev_info(&intf->dev, "Thanko's Raremono connected: (%04X:%04X)\n", | ||
447 | @@ -333,7 +344,7 @@ static int usb_raremono_probe(struct usb_interface *intf, | ||
448 | retval = v4l2_device_register(&intf->dev, &radio->v4l2_dev); | ||
449 | if (retval < 0) { | ||
450 | dev_err(&intf->dev, "couldn't register v4l2_device\n"); | ||
451 | - return retval; | ||
452 | + goto free_mem; | ||
453 | } | ||
454 | |||
455 | mutex_init(&radio->lock); | ||
456 | @@ -345,6 +356,7 @@ static int usb_raremono_probe(struct usb_interface *intf, | ||
457 | radio->vdev.ioctl_ops = &usb_raremono_ioctl_ops; | ||
458 | radio->vdev.lock = &radio->lock; | ||
459 | radio->vdev.release = video_device_release_empty; | ||
460 | + radio->v4l2_dev.release = raremono_device_release; | ||
461 | |||
462 | usb_set_intfdata(intf, &radio->v4l2_dev); | ||
463 | |||
464 | @@ -360,6 +372,10 @@ static int usb_raremono_probe(struct usb_interface *intf, | ||
465 | } | ||
466 | dev_err(&intf->dev, "could not register video device\n"); | ||
467 | v4l2_device_unregister(&radio->v4l2_dev); | ||
468 | + | ||
469 | +free_mem: | ||
470 | + kfree(radio->buffer); | ||
471 | + kfree(radio); | ||
472 | return retval; | ||
473 | } | ||
474 | |||
475 | diff --git a/drivers/media/usb/au0828/au0828-core.c b/drivers/media/usb/au0828/au0828-core.c | ||
476 | index 257ae0d8cfe2..e3f63299f85c 100644 | ||
477 | --- a/drivers/media/usb/au0828/au0828-core.c | ||
478 | +++ b/drivers/media/usb/au0828/au0828-core.c | ||
479 | @@ -623,6 +623,12 @@ static int au0828_usb_probe(struct usb_interface *interface, | ||
480 | /* Setup */ | ||
481 | au0828_card_setup(dev); | ||
482 | |||
483 | + /* | ||
484 | + * Store the pointer to the au0828_dev so it can be accessed in | ||
485 | + * au0828_usb_disconnect | ||
486 | + */ | ||
487 | + usb_set_intfdata(interface, dev); | ||
488 | + | ||
489 | /* Analog TV */ | ||
490 | retval = au0828_analog_register(dev, interface); | ||
491 | if (retval) { | ||
492 | @@ -641,12 +647,6 @@ static int au0828_usb_probe(struct usb_interface *interface, | ||
493 | /* Remote controller */ | ||
494 | au0828_rc_register(dev); | ||
495 | |||
496 | - /* | ||
497 | - * Store the pointer to the au0828_dev so it can be accessed in | ||
498 | - * au0828_usb_disconnect | ||
499 | - */ | ||
500 | - usb_set_intfdata(interface, dev); | ||
501 | - | ||
502 | pr_info("Registered device AU0828 [%s]\n", | ||
503 | dev->board.name == NULL ? "Unset" : dev->board.name); | ||
504 | |||
505 | diff --git a/drivers/media/usb/cpia2/cpia2_usb.c b/drivers/media/usb/cpia2/cpia2_usb.c | ||
506 | index a771e0a52610..f5b04594e209 100644 | ||
507 | --- a/drivers/media/usb/cpia2/cpia2_usb.c | ||
508 | +++ b/drivers/media/usb/cpia2/cpia2_usb.c | ||
509 | @@ -902,7 +902,6 @@ static void cpia2_usb_disconnect(struct usb_interface *intf) | ||
510 | cpia2_unregister_camera(cam); | ||
511 | v4l2_device_disconnect(&cam->v4l2_dev); | ||
512 | mutex_unlock(&cam->v4l2_lock); | ||
513 | - v4l2_device_put(&cam->v4l2_dev); | ||
514 | |||
515 | if(cam->buffers) { | ||
516 | DBG("Wakeup waiting processes\n"); | ||
517 | @@ -911,6 +910,8 @@ static void cpia2_usb_disconnect(struct usb_interface *intf) | ||
518 | wake_up_interruptible(&cam->wq_stream); | ||
519 | } | ||
520 | |||
521 | + v4l2_device_put(&cam->v4l2_dev); | ||
522 | + | ||
523 | LOG("CPiA2 camera disconnected.\n"); | ||
524 | } | ||
525 | |||
526 | diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c | ||
527 | index 673fdca8d2da..fcb201a40920 100644 | ||
528 | --- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c | ||
529 | +++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c | ||
530 | @@ -1680,7 +1680,7 @@ static int pvr2_decoder_enable(struct pvr2_hdw *hdw,int enablefl) | ||
531 | } | ||
532 | if (!hdw->flag_decoder_missed) { | ||
533 | pvr2_trace(PVR2_TRACE_ERROR_LEGS, | ||
534 | - "WARNING: No decoder present"); | ||
535 | + "***WARNING*** No decoder present"); | ||
536 | hdw->flag_decoder_missed = !0; | ||
537 | trace_stbit("flag_decoder_missed", | ||
538 | hdw->flag_decoder_missed); | ||
539 | @@ -2366,7 +2366,7 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf, | ||
540 | if (hdw_desc->flag_is_experimental) { | ||
541 | pvr2_trace(PVR2_TRACE_INFO, "**********"); | ||
542 | pvr2_trace(PVR2_TRACE_INFO, | ||
543 | - "WARNING: Support for this device (%s) is experimental.", | ||
544 | + "***WARNING*** Support for this device (%s) is experimental.", | ||
545 | hdw_desc->description); | ||
546 | pvr2_trace(PVR2_TRACE_INFO, | ||
547 | "Important functionality might not be entirely working."); | ||
548 | diff --git a/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c b/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c | ||
549 | index f3003ca05f4b..922c06279663 100644 | ||
550 | --- a/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c | ||
551 | +++ b/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c | ||
552 | @@ -343,11 +343,11 @@ static int i2c_hack_cx25840(struct pvr2_hdw *hdw, | ||
553 | |||
554 | if ((ret != 0) || (*rdata == 0x04) || (*rdata == 0x0a)) { | ||
555 | pvr2_trace(PVR2_TRACE_ERROR_LEGS, | ||
556 | - "WARNING: Detected a wedged cx25840 chip; the device will not work."); | ||
557 | + "***WARNING*** Detected a wedged cx25840 chip; the device will not work."); | ||
558 | pvr2_trace(PVR2_TRACE_ERROR_LEGS, | ||
559 | - "WARNING: Try power cycling the pvrusb2 device."); | ||
560 | + "***WARNING*** Try power cycling the pvrusb2 device."); | ||
561 | pvr2_trace(PVR2_TRACE_ERROR_LEGS, | ||
562 | - "WARNING: Disabling further access to the device to prevent other foul-ups."); | ||
563 | + "***WARNING*** Disabling further access to the device to prevent other foul-ups."); | ||
564 | // This blocks all further communication with the part. | ||
565 | hdw->i2c_func[0x44] = NULL; | ||
566 | pvr2_hdw_render_useless(hdw); | ||
567 | diff --git a/drivers/media/usb/pvrusb2/pvrusb2-std.c b/drivers/media/usb/pvrusb2/pvrusb2-std.c | ||
568 | index 6b651f8b54df..37dc299a1ca2 100644 | ||
569 | --- a/drivers/media/usb/pvrusb2/pvrusb2-std.c | ||
570 | +++ b/drivers/media/usb/pvrusb2/pvrusb2-std.c | ||
571 | @@ -353,7 +353,7 @@ struct v4l2_standard *pvr2_std_create_enum(unsigned int *countptr, | ||
572 | bcnt = pvr2_std_id_to_str(buf,sizeof(buf),fmsk); | ||
573 | pvr2_trace( | ||
574 | PVR2_TRACE_ERROR_LEGS, | ||
575 | - "WARNING: Failed to classify the following standard(s): %.*s", | ||
576 | + "***WARNING*** Failed to classify the following standard(s): %.*s", | ||
577 | bcnt,buf); | ||
578 | } | ||
579 | |||
580 | diff --git a/drivers/net/wireless/ath/ath10k/usb.c b/drivers/net/wireless/ath/ath10k/usb.c | ||
581 | index d4803ff5a78a..f09a4ad2e9de 100644 | ||
582 | --- a/drivers/net/wireless/ath/ath10k/usb.c | ||
583 | +++ b/drivers/net/wireless/ath/ath10k/usb.c | ||
584 | @@ -1025,7 +1025,7 @@ static int ath10k_usb_probe(struct usb_interface *interface, | ||
585 | } | ||
586 | |||
587 | /* TODO: remove this once USB support is fully implemented */ | ||
588 | - ath10k_warn(ar, "WARNING: ath10k USB support is incomplete, don't expect anything to work!\n"); | ||
589 | + ath10k_warn(ar, "Warning: ath10k USB support is incomplete, don't expect anything to work!\n"); | ||
590 | |||
591 | return 0; | ||
592 | |||
593 | diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c | ||
594 | index 8febacb8fc54..0951564b6830 100644 | ||
595 | --- a/drivers/pps/pps.c | ||
596 | +++ b/drivers/pps/pps.c | ||
597 | @@ -166,6 +166,14 @@ static long pps_cdev_ioctl(struct file *file, | ||
598 | pps->params.mode |= PPS_CANWAIT; | ||
599 | pps->params.api_version = PPS_API_VERS; | ||
600 | |||
601 | + /* | ||
602 | + * Clear unused fields of pps_kparams to avoid leaking | ||
603 | + * uninitialized data of the PPS_SETPARAMS caller via | ||
604 | + * PPS_GETPARAMS | ||
605 | + */ | ||
606 | + pps->params.assert_off_tu.flags = 0; | ||
607 | + pps->params.clear_off_tu.flags = 0; | ||
608 | + | ||
609 | spin_unlock_irq(&pps->lock); | ||
610 | |||
611 | break; | ||
612 | diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c | ||
613 | index 32652b2c5e7c..75b926e70076 100644 | ||
614 | --- a/drivers/scsi/scsi_lib.c | ||
615 | +++ b/drivers/scsi/scsi_lib.c | ||
616 | @@ -3059,11 +3059,14 @@ scsi_device_quiesce(struct scsi_device *sdev) | ||
617 | */ | ||
618 | WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current); | ||
619 | |||
620 | - blk_set_preempt_only(q); | ||
621 | + if (sdev->quiesced_by == current) | ||
622 | + return 0; | ||
623 | + | ||
624 | + blk_set_pm_only(q); | ||
625 | |||
626 | blk_mq_freeze_queue(q); | ||
627 | /* | ||
628 | - * Ensure that the effect of blk_set_preempt_only() will be visible | ||
629 | + * Ensure that the effect of blk_set_pm_only() will be visible | ||
630 | * for percpu_ref_tryget() callers that occur after the queue | ||
631 | * unfreeze even if the queue was already frozen before this function | ||
632 | * was called. See also https://lwn.net/Articles/573497/. | ||
633 | @@ -3076,7 +3079,7 @@ scsi_device_quiesce(struct scsi_device *sdev) | ||
634 | if (err == 0) | ||
635 | sdev->quiesced_by = current; | ||
636 | else | ||
637 | - blk_clear_preempt_only(q); | ||
638 | + blk_clear_pm_only(q); | ||
639 | mutex_unlock(&sdev->state_mutex); | ||
640 | |||
641 | return err; | ||
642 | @@ -3099,8 +3102,10 @@ void scsi_device_resume(struct scsi_device *sdev) | ||
643 | * device deleted during suspend) | ||
644 | */ | ||
645 | mutex_lock(&sdev->state_mutex); | ||
646 | - sdev->quiesced_by = NULL; | ||
647 | - blk_clear_preempt_only(sdev->request_queue); | ||
648 | + if (sdev->quiesced_by) { | ||
649 | + sdev->quiesced_by = NULL; | ||
650 | + blk_clear_pm_only(sdev->request_queue); | ||
651 | + } | ||
652 | if (sdev->sdev_state == SDEV_QUIESCE) | ||
653 | scsi_device_set_state(sdev, SDEV_RUNNING); | ||
654 | mutex_unlock(&sdev->state_mutex); | ||
655 | diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c | ||
656 | index 03614ef64ca4..3f68edde0f03 100644 | ||
657 | --- a/drivers/usb/dwc2/gadget.c | ||
658 | +++ b/drivers/usb/dwc2/gadget.c | ||
659 | @@ -3125,6 +3125,7 @@ void dwc2_hsotg_disconnect(struct dwc2_hsotg *hsotg) | ||
660 | hsotg->connected = 0; | ||
661 | hsotg->test_mode = 0; | ||
662 | |||
663 | + /* all endpoints should be shutdown */ | ||
664 | for (ep = 0; ep < hsotg->num_of_eps; ep++) { | ||
665 | if (hsotg->eps_in[ep]) | ||
666 | kill_all_requests(hsotg, hsotg->eps_in[ep], | ||
667 | @@ -3175,6 +3176,7 @@ static void dwc2_hsotg_irq_fifoempty(struct dwc2_hsotg *hsotg, bool periodic) | ||
668 | GINTSTS_PTXFEMP | \ | ||
669 | GINTSTS_RXFLVL) | ||
670 | |||
671 | +static int dwc2_hsotg_ep_disable(struct usb_ep *ep); | ||
672 | /** | ||
673 | * dwc2_hsotg_core_init - issue softreset to the core | ||
674 | * @hsotg: The device state | ||
675 | @@ -3189,13 +3191,23 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg, | ||
676 | u32 val; | ||
677 | u32 usbcfg; | ||
678 | u32 dcfg = 0; | ||
679 | + int ep; | ||
680 | |||
681 | /* Kill any ep0 requests as controller will be reinitialized */ | ||
682 | kill_all_requests(hsotg, hsotg->eps_out[0], -ECONNRESET); | ||
683 | |||
684 | - if (!is_usb_reset) | ||
685 | + if (!is_usb_reset) { | ||
686 | if (dwc2_core_reset(hsotg, true)) | ||
687 | return; | ||
688 | + } else { | ||
689 | + /* all endpoints should be shutdown */ | ||
690 | + for (ep = 1; ep < hsotg->num_of_eps; ep++) { | ||
691 | + if (hsotg->eps_in[ep]) | ||
692 | + dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep); | ||
693 | + if (hsotg->eps_out[ep]) | ||
694 | + dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep); | ||
695 | + } | ||
696 | + } | ||
697 | |||
698 | /* | ||
699 | * we must now enable ep0 ready for host detection and then | ||
700 | @@ -3993,7 +4005,6 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep) | ||
701 | struct dwc2_hsotg *hsotg = hs_ep->parent; | ||
702 | int dir_in = hs_ep->dir_in; | ||
703 | int index = hs_ep->index; | ||
704 | - unsigned long flags; | ||
705 | u32 epctrl_reg; | ||
706 | u32 ctrl; | ||
707 | |||
708 | @@ -4011,8 +4022,6 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep) | ||
709 | |||
710 | epctrl_reg = dir_in ? DIEPCTL(index) : DOEPCTL(index); | ||
711 | |||
712 | - spin_lock_irqsave(&hsotg->lock, flags); | ||
713 | - | ||
714 | ctrl = dwc2_readl(hsotg, epctrl_reg); | ||
715 | |||
716 | if (ctrl & DXEPCTL_EPENA) | ||
717 | @@ -4035,10 +4044,22 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep) | ||
718 | hs_ep->fifo_index = 0; | ||
719 | hs_ep->fifo_size = 0; | ||
720 | |||
721 | - spin_unlock_irqrestore(&hsotg->lock, flags); | ||
722 | return 0; | ||
723 | } | ||
724 | |||
725 | +static int dwc2_hsotg_ep_disable_lock(struct usb_ep *ep) | ||
726 | +{ | ||
727 | + struct dwc2_hsotg_ep *hs_ep = our_ep(ep); | ||
728 | + struct dwc2_hsotg *hsotg = hs_ep->parent; | ||
729 | + unsigned long flags; | ||
730 | + int ret; | ||
731 | + | ||
732 | + spin_lock_irqsave(&hsotg->lock, flags); | ||
733 | + ret = dwc2_hsotg_ep_disable(ep); | ||
734 | + spin_unlock_irqrestore(&hsotg->lock, flags); | ||
735 | + return ret; | ||
736 | +} | ||
737 | + | ||
738 | /** | ||
739 | * on_list - check request is on the given endpoint | ||
740 | * @ep: The endpoint to check. | ||
741 | @@ -4186,7 +4207,7 @@ static int dwc2_hsotg_ep_sethalt_lock(struct usb_ep *ep, int value) | ||
742 | |||
743 | static const struct usb_ep_ops dwc2_hsotg_ep_ops = { | ||
744 | .enable = dwc2_hsotg_ep_enable, | ||
745 | - .disable = dwc2_hsotg_ep_disable, | ||
746 | + .disable = dwc2_hsotg_ep_disable_lock, | ||
747 | .alloc_request = dwc2_hsotg_ep_alloc_request, | ||
748 | .free_request = dwc2_hsotg_ep_free_request, | ||
749 | .queue = dwc2_hsotg_ep_queue_lock, | ||
750 | @@ -4326,9 +4347,9 @@ static int dwc2_hsotg_udc_stop(struct usb_gadget *gadget) | ||
751 | /* all endpoints should be shutdown */ | ||
752 | for (ep = 1; ep < hsotg->num_of_eps; ep++) { | ||
753 | if (hsotg->eps_in[ep]) | ||
754 | - dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep); | ||
755 | + dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep); | ||
756 | if (hsotg->eps_out[ep]) | ||
757 | - dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep); | ||
758 | + dwc2_hsotg_ep_disable_lock(&hsotg->eps_out[ep]->ep); | ||
759 | } | ||
760 | |||
761 | spin_lock_irqsave(&hsotg->lock, flags); | ||
762 | @@ -4776,9 +4797,9 @@ int dwc2_hsotg_suspend(struct dwc2_hsotg *hsotg) | ||
763 | |||
764 | for (ep = 0; ep < hsotg->num_of_eps; ep++) { | ||
765 | if (hsotg->eps_in[ep]) | ||
766 | - dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep); | ||
767 | + dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep); | ||
768 | if (hsotg->eps_out[ep]) | ||
769 | - dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep); | ||
770 | + dwc2_hsotg_ep_disable_lock(&hsotg->eps_out[ep]->ep); | ||
771 | } | ||
772 | } | ||
773 | |||
774 | diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c | ||
775 | index ae704658b528..124356dc39e1 100644 | ||
776 | --- a/drivers/vhost/net.c | ||
777 | +++ b/drivers/vhost/net.c | ||
778 | @@ -497,12 +497,6 @@ static size_t init_iov_iter(struct vhost_virtqueue *vq, struct iov_iter *iter, | ||
779 | return iov_iter_count(iter); | ||
780 | } | ||
781 | |||
782 | -static bool vhost_exceeds_weight(int pkts, int total_len) | ||
783 | -{ | ||
784 | - return total_len >= VHOST_NET_WEIGHT || | ||
785 | - pkts >= VHOST_NET_PKT_WEIGHT; | ||
786 | -} | ||
787 | - | ||
788 | static int get_tx_bufs(struct vhost_net *net, | ||
789 | struct vhost_net_virtqueue *nvq, | ||
790 | struct msghdr *msg, | ||
791 | @@ -557,7 +551,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) | ||
792 | int err; | ||
793 | int sent_pkts = 0; | ||
794 | |||
795 | - for (;;) { | ||
796 | + do { | ||
797 | bool busyloop_intr = false; | ||
798 | |||
799 | head = get_tx_bufs(net, nvq, &msg, &out, &in, &len, | ||
800 | @@ -598,11 +592,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) | ||
801 | err, len); | ||
802 | if (++nvq->done_idx >= VHOST_NET_BATCH) | ||
803 | vhost_net_signal_used(nvq); | ||
804 | - if (vhost_exceeds_weight(++sent_pkts, total_len)) { | ||
805 | - vhost_poll_queue(&vq->poll); | ||
806 | - break; | ||
807 | - } | ||
808 | - } | ||
809 | + } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); | ||
810 | |||
811 | vhost_net_signal_used(nvq); | ||
812 | } | ||
813 | @@ -626,7 +616,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) | ||
814 | bool zcopy_used; | ||
815 | int sent_pkts = 0; | ||
816 | |||
817 | - for (;;) { | ||
818 | + do { | ||
819 | bool busyloop_intr; | ||
820 | |||
821 | /* Release DMAs done buffers first */ | ||
822 | @@ -701,11 +691,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) | ||
823 | else | ||
824 | vhost_zerocopy_signal_used(net, vq); | ||
825 | vhost_net_tx_packet(net); | ||
826 | - if (unlikely(vhost_exceeds_weight(++sent_pkts, total_len))) { | ||
827 | - vhost_poll_queue(&vq->poll); | ||
828 | - break; | ||
829 | - } | ||
830 | - } | ||
831 | + } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); | ||
832 | } | ||
833 | |||
834 | /* Expects to be always run from workqueue - which acts as | ||
835 | @@ -941,8 +927,11 @@ static void handle_rx(struct vhost_net *net) | ||
836 | vq->log : NULL; | ||
837 | mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); | ||
838 | |||
839 | - while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk, | ||
840 | - &busyloop_intr))) { | ||
841 | + do { | ||
842 | + sock_len = vhost_net_rx_peek_head_len(net, sock->sk, | ||
843 | + &busyloop_intr); | ||
844 | + if (!sock_len) | ||
845 | + break; | ||
846 | sock_len += sock_hlen; | ||
847 | vhost_len = sock_len + vhost_hlen; | ||
848 | headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx, | ||
849 | @@ -1027,14 +1016,11 @@ static void handle_rx(struct vhost_net *net) | ||
850 | vhost_log_write(vq, vq_log, log, vhost_len, | ||
851 | vq->iov, in); | ||
852 | total_len += vhost_len; | ||
853 | - if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) { | ||
854 | - vhost_poll_queue(&vq->poll); | ||
855 | - goto out; | ||
856 | - } | ||
857 | - } | ||
858 | + } while (likely(!vhost_exceeds_weight(vq, ++recv_pkts, total_len))); | ||
859 | + | ||
860 | if (unlikely(busyloop_intr)) | ||
861 | vhost_poll_queue(&vq->poll); | ||
862 | - else | ||
863 | + else if (!sock_len) | ||
864 | vhost_net_enable_vq(net, vq); | ||
865 | out: | ||
866 | vhost_net_signal_used(nvq); | ||
867 | @@ -1115,7 +1101,8 @@ static int vhost_net_open(struct inode *inode, struct file *f) | ||
868 | vhost_net_buf_init(&n->vqs[i].rxq); | ||
869 | } | ||
870 | vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, | ||
871 | - UIO_MAXIOV + VHOST_NET_BATCH); | ||
872 | + UIO_MAXIOV + VHOST_NET_BATCH, | ||
873 | + VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT); | ||
874 | |||
875 | vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev); | ||
876 | vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev); | ||
877 | diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c | ||
878 | index 0cfa925be4ec..5e298d9287f1 100644 | ||
879 | --- a/drivers/vhost/scsi.c | ||
880 | +++ b/drivers/vhost/scsi.c | ||
881 | @@ -57,6 +57,12 @@ | ||
882 | #define VHOST_SCSI_PREALLOC_UPAGES 2048 | ||
883 | #define VHOST_SCSI_PREALLOC_PROT_SGLS 2048 | ||
884 | |||
885 | +/* Max number of requests before requeueing the job. | ||
886 | + * Using this limit prevents one virtqueue from starving others with | ||
887 | + * request. | ||
888 | + */ | ||
889 | +#define VHOST_SCSI_WEIGHT 256 | ||
890 | + | ||
891 | struct vhost_scsi_inflight { | ||
892 | /* Wait for the flush operation to finish */ | ||
893 | struct completion comp; | ||
894 | @@ -811,7 +817,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) | ||
895 | u64 tag; | ||
896 | u32 exp_data_len, data_direction; | ||
897 | unsigned int out = 0, in = 0; | ||
898 | - int head, ret, prot_bytes; | ||
899 | + int head, ret, prot_bytes, c = 0; | ||
900 | size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp); | ||
901 | size_t out_size, in_size; | ||
902 | u16 lun; | ||
903 | @@ -830,7 +836,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) | ||
904 | |||
905 | vhost_disable_notify(&vs->dev, vq); | ||
906 | |||
907 | - for (;;) { | ||
908 | + do { | ||
909 | head = vhost_get_vq_desc(vq, vq->iov, | ||
910 | ARRAY_SIZE(vq->iov), &out, &in, | ||
911 | NULL, NULL); | ||
912 | @@ -1045,7 +1051,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) | ||
913 | */ | ||
914 | INIT_WORK(&cmd->work, vhost_scsi_submission_work); | ||
915 | queue_work(vhost_scsi_workqueue, &cmd->work); | ||
916 | - } | ||
917 | + } while (likely(!vhost_exceeds_weight(vq, ++c, 0))); | ||
918 | out: | ||
919 | mutex_unlock(&vq->mutex); | ||
920 | } | ||
921 | @@ -1398,7 +1404,8 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) | ||
922 | vqs[i] = &vs->vqs[i].vq; | ||
923 | vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; | ||
924 | } | ||
925 | - vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV); | ||
926 | + vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV, | ||
927 | + VHOST_SCSI_WEIGHT, 0); | ||
928 | |||
929 | vhost_scsi_init_inflight(vs, NULL); | ||
930 | |||
931 | diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c | ||
932 | index c163bc15976a..0752f8dc47b1 100644 | ||
933 | --- a/drivers/vhost/vhost.c | ||
934 | +++ b/drivers/vhost/vhost.c | ||
935 | @@ -413,8 +413,24 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev) | ||
936 | vhost_vq_free_iovecs(dev->vqs[i]); | ||
937 | } | ||
938 | |||
939 | +bool vhost_exceeds_weight(struct vhost_virtqueue *vq, | ||
940 | + int pkts, int total_len) | ||
941 | +{ | ||
942 | + struct vhost_dev *dev = vq->dev; | ||
943 | + | ||
944 | + if ((dev->byte_weight && total_len >= dev->byte_weight) || | ||
945 | + pkts >= dev->weight) { | ||
946 | + vhost_poll_queue(&vq->poll); | ||
947 | + return true; | ||
948 | + } | ||
949 | + | ||
950 | + return false; | ||
951 | +} | ||
952 | +EXPORT_SYMBOL_GPL(vhost_exceeds_weight); | ||
953 | + | ||
954 | void vhost_dev_init(struct vhost_dev *dev, | ||
955 | - struct vhost_virtqueue **vqs, int nvqs, int iov_limit) | ||
956 | + struct vhost_virtqueue **vqs, int nvqs, | ||
957 | + int iov_limit, int weight, int byte_weight) | ||
958 | { | ||
959 | struct vhost_virtqueue *vq; | ||
960 | int i; | ||
961 | @@ -428,6 +444,8 @@ void vhost_dev_init(struct vhost_dev *dev, | ||
962 | dev->mm = NULL; | ||
963 | dev->worker = NULL; | ||
964 | dev->iov_limit = iov_limit; | ||
965 | + dev->weight = weight; | ||
966 | + dev->byte_weight = byte_weight; | ||
967 | init_llist_head(&dev->work_list); | ||
968 | init_waitqueue_head(&dev->wait); | ||
969 | INIT_LIST_HEAD(&dev->read_list); | ||
970 | diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h | ||
971 | index 9490e7ddb340..27a78a9b8cc7 100644 | ||
972 | --- a/drivers/vhost/vhost.h | ||
973 | +++ b/drivers/vhost/vhost.h | ||
974 | @@ -171,10 +171,13 @@ struct vhost_dev { | ||
975 | struct list_head pending_list; | ||
976 | wait_queue_head_t wait; | ||
977 | int iov_limit; | ||
978 | + int weight; | ||
979 | + int byte_weight; | ||
980 | }; | ||
981 | |||
982 | +bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len); | ||
983 | void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, | ||
984 | - int nvqs, int iov_limit); | ||
985 | + int nvqs, int iov_limit, int weight, int byte_weight); | ||
986 | long vhost_dev_set_owner(struct vhost_dev *dev); | ||
987 | bool vhost_dev_has_owner(struct vhost_dev *dev); | ||
988 | long vhost_dev_check_owner(struct vhost_dev *); | ||
989 | diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c | ||
990 | index e440f87ae1d6..bab495d73195 100644 | ||
991 | --- a/drivers/vhost/vsock.c | ||
992 | +++ b/drivers/vhost/vsock.c | ||
993 | @@ -21,6 +21,14 @@ | ||
994 | #include "vhost.h" | ||
995 | |||
996 | #define VHOST_VSOCK_DEFAULT_HOST_CID 2 | ||
997 | +/* Max number of bytes transferred before requeueing the job. | ||
998 | + * Using this limit prevents one virtqueue from starving others. */ | ||
999 | +#define VHOST_VSOCK_WEIGHT 0x80000 | ||
1000 | +/* Max number of packets transferred before requeueing the job. | ||
1001 | + * Using this limit prevents one virtqueue from starving others with | ||
1002 | + * small pkts. | ||
1003 | + */ | ||
1004 | +#define VHOST_VSOCK_PKT_WEIGHT 256 | ||
1005 | |||
1006 | enum { | ||
1007 | VHOST_VSOCK_FEATURES = VHOST_FEATURES, | ||
1008 | @@ -78,6 +86,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, | ||
1009 | struct vhost_virtqueue *vq) | ||
1010 | { | ||
1011 | struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; | ||
1012 | + int pkts = 0, total_len = 0; | ||
1013 | bool added = false; | ||
1014 | bool restart_tx = false; | ||
1015 | |||
1016 | @@ -89,7 +98,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, | ||
1017 | /* Avoid further vmexits, we're already processing the virtqueue */ | ||
1018 | vhost_disable_notify(&vsock->dev, vq); | ||
1019 | |||
1020 | - for (;;) { | ||
1021 | + do { | ||
1022 | struct virtio_vsock_pkt *pkt; | ||
1023 | struct iov_iter iov_iter; | ||
1024 | unsigned out, in; | ||
1025 | @@ -174,8 +183,9 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, | ||
1026 | */ | ||
1027 | virtio_transport_deliver_tap_pkt(pkt); | ||
1028 | |||
1029 | + total_len += pkt->len; | ||
1030 | virtio_transport_free_pkt(pkt); | ||
1031 | - } | ||
1032 | + } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); | ||
1033 | if (added) | ||
1034 | vhost_signal(&vsock->dev, vq); | ||
1035 | |||
1036 | @@ -350,7 +360,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) | ||
1037 | struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, | ||
1038 | dev); | ||
1039 | struct virtio_vsock_pkt *pkt; | ||
1040 | - int head; | ||
1041 | + int head, pkts = 0, total_len = 0; | ||
1042 | unsigned int out, in; | ||
1043 | bool added = false; | ||
1044 | |||
1045 | @@ -360,7 +370,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) | ||
1046 | goto out; | ||
1047 | |||
1048 | vhost_disable_notify(&vsock->dev, vq); | ||
1049 | - for (;;) { | ||
1050 | + do { | ||
1051 | u32 len; | ||
1052 | |||
1053 | if (!vhost_vsock_more_replies(vsock)) { | ||
1054 | @@ -401,9 +411,11 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) | ||
1055 | else | ||
1056 | virtio_transport_free_pkt(pkt); | ||
1057 | |||
1058 | - vhost_add_used(vq, head, sizeof(pkt->hdr) + len); | ||
1059 | + len += sizeof(pkt->hdr); | ||
1060 | + vhost_add_used(vq, head, len); | ||
1061 | + total_len += len; | ||
1062 | added = true; | ||
1063 | - } | ||
1064 | + } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); | ||
1065 | |||
1066 | no_more_replies: | ||
1067 | if (added) | ||
1068 | @@ -531,7 +543,9 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) | ||
1069 | vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; | ||
1070 | vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; | ||
1071 | |||
1072 | - vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), UIO_MAXIOV); | ||
1073 | + vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), | ||
1074 | + UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT, | ||
1075 | + VHOST_VSOCK_WEIGHT); | ||
1076 | |||
1077 | file->private_data = vsock; | ||
1078 | spin_lock_init(&vsock->send_pkt_list_lock); | ||
1079 | diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c | ||
1080 | index c7542e8dd096..a11fa0b6b34d 100644 | ||
1081 | --- a/fs/ceph/caps.c | ||
1082 | +++ b/fs/ceph/caps.c | ||
1083 | @@ -1237,20 +1237,23 @@ static int send_cap_msg(struct cap_msg_args *arg) | ||
1084 | } | ||
1085 | |||
1086 | /* | ||
1087 | - * Queue cap releases when an inode is dropped from our cache. Since | ||
1088 | - * inode is about to be destroyed, there is no need for i_ceph_lock. | ||
1089 | + * Queue cap releases when an inode is dropped from our cache. | ||
1090 | */ | ||
1091 | void ceph_queue_caps_release(struct inode *inode) | ||
1092 | { | ||
1093 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
1094 | struct rb_node *p; | ||
1095 | |||
1096 | + /* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU) | ||
1097 | + * may call __ceph_caps_issued_mask() on a freeing inode. */ | ||
1098 | + spin_lock(&ci->i_ceph_lock); | ||
1099 | p = rb_first(&ci->i_caps); | ||
1100 | while (p) { | ||
1101 | struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); | ||
1102 | p = rb_next(p); | ||
1103 | __ceph_remove_cap(cap, true); | ||
1104 | } | ||
1105 | + spin_unlock(&ci->i_ceph_lock); | ||
1106 | } | ||
1107 | |||
1108 | /* | ||
1109 | diff --git a/fs/exec.c b/fs/exec.c | ||
1110 | index 433b1257694a..561ea64829ec 100644 | ||
1111 | --- a/fs/exec.c | ||
1112 | +++ b/fs/exec.c | ||
1113 | @@ -1826,7 +1826,7 @@ static int __do_execve_file(int fd, struct filename *filename, | ||
1114 | membarrier_execve(current); | ||
1115 | rseq_execve(current); | ||
1116 | acct_update_integrals(current); | ||
1117 | - task_numa_free(current); | ||
1118 | + task_numa_free(current, false); | ||
1119 | free_bprm(bprm); | ||
1120 | kfree(pathbuf); | ||
1121 | if (filename) | ||
1122 | diff --git a/fs/nfs/client.c b/fs/nfs/client.c | ||
1123 | index c092661147b3..0a2b59c1ecb3 100644 | ||
1124 | --- a/fs/nfs/client.c | ||
1125 | +++ b/fs/nfs/client.c | ||
1126 | @@ -416,10 +416,10 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) | ||
1127 | clp = nfs_match_client(cl_init); | ||
1128 | if (clp) { | ||
1129 | spin_unlock(&nn->nfs_client_lock); | ||
1130 | - if (IS_ERR(clp)) | ||
1131 | - return clp; | ||
1132 | if (new) | ||
1133 | new->rpc_ops->free_client(new); | ||
1134 | + if (IS_ERR(clp)) | ||
1135 | + return clp; | ||
1136 | return nfs_found_client(cl_init, clp); | ||
1137 | } | ||
1138 | if (new) { | ||
1139 | diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c | ||
1140 | index 8bfaa658b2c1..71b2e390becf 100644 | ||
1141 | --- a/fs/nfs/dir.c | ||
1142 | +++ b/fs/nfs/dir.c | ||
1143 | @@ -1072,6 +1072,100 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, | ||
1144 | return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU); | ||
1145 | } | ||
1146 | |||
1147 | +static int | ||
1148 | +nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry, | ||
1149 | + struct inode *inode, int error) | ||
1150 | +{ | ||
1151 | + switch (error) { | ||
1152 | + case 1: | ||
1153 | + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", | ||
1154 | + __func__, dentry); | ||
1155 | + return 1; | ||
1156 | + case 0: | ||
1157 | + nfs_mark_for_revalidate(dir); | ||
1158 | + if (inode && S_ISDIR(inode->i_mode)) { | ||
1159 | + /* Purge readdir caches. */ | ||
1160 | + nfs_zap_caches(inode); | ||
1161 | + /* | ||
1162 | + * We can't d_drop the root of a disconnected tree: | ||
1163 | + * its d_hash is on the s_anon list and d_drop() would hide | ||
1164 | + * it from shrink_dcache_for_unmount(), leading to busy | ||
1165 | + * inodes on unmount and further oopses. | ||
1166 | + */ | ||
1167 | + if (IS_ROOT(dentry)) | ||
1168 | + return 1; | ||
1169 | + } | ||
1170 | + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n", | ||
1171 | + __func__, dentry); | ||
1172 | + return 0; | ||
1173 | + } | ||
1174 | + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n", | ||
1175 | + __func__, dentry, error); | ||
1176 | + return error; | ||
1177 | +} | ||
1178 | + | ||
1179 | +static int | ||
1180 | +nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry, | ||
1181 | + unsigned int flags) | ||
1182 | +{ | ||
1183 | + int ret = 1; | ||
1184 | + if (nfs_neg_need_reval(dir, dentry, flags)) { | ||
1185 | + if (flags & LOOKUP_RCU) | ||
1186 | + return -ECHILD; | ||
1187 | + ret = 0; | ||
1188 | + } | ||
1189 | + return nfs_lookup_revalidate_done(dir, dentry, NULL, ret); | ||
1190 | +} | ||
1191 | + | ||
1192 | +static int | ||
1193 | +nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry, | ||
1194 | + struct inode *inode) | ||
1195 | +{ | ||
1196 | + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1197 | + return nfs_lookup_revalidate_done(dir, dentry, inode, 1); | ||
1198 | +} | ||
1199 | + | ||
1200 | +static int | ||
1201 | +nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry, | ||
1202 | + struct inode *inode) | ||
1203 | +{ | ||
1204 | + struct nfs_fh *fhandle; | ||
1205 | + struct nfs_fattr *fattr; | ||
1206 | + struct nfs4_label *label; | ||
1207 | + int ret; | ||
1208 | + | ||
1209 | + ret = -ENOMEM; | ||
1210 | + fhandle = nfs_alloc_fhandle(); | ||
1211 | + fattr = nfs_alloc_fattr(); | ||
1212 | + label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); | ||
1213 | + if (fhandle == NULL || fattr == NULL || IS_ERR(label)) | ||
1214 | + goto out; | ||
1215 | + | ||
1216 | + ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); | ||
1217 | + if (ret < 0) { | ||
1218 | + if (ret == -ESTALE || ret == -ENOENT) | ||
1219 | + ret = 0; | ||
1220 | + goto out; | ||
1221 | + } | ||
1222 | + ret = 0; | ||
1223 | + if (nfs_compare_fh(NFS_FH(inode), fhandle)) | ||
1224 | + goto out; | ||
1225 | + if (nfs_refresh_inode(inode, fattr) < 0) | ||
1226 | + goto out; | ||
1227 | + | ||
1228 | + nfs_setsecurity(inode, fattr, label); | ||
1229 | + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1230 | + | ||
1231 | + /* set a readdirplus hint that we had a cache miss */ | ||
1232 | + nfs_force_use_readdirplus(dir); | ||
1233 | + ret = 1; | ||
1234 | +out: | ||
1235 | + nfs_free_fattr(fattr); | ||
1236 | + nfs_free_fhandle(fhandle); | ||
1237 | + nfs4_label_free(label); | ||
1238 | + return nfs_lookup_revalidate_done(dir, dentry, inode, ret); | ||
1239 | +} | ||
1240 | + | ||
1241 | /* | ||
1242 | * This is called every time the dcache has a lookup hit, | ||
1243 | * and we should check whether we can really trust that | ||
1244 | @@ -1083,58 +1177,36 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, | ||
1245 | * If the parent directory is seen to have changed, we throw out the | ||
1246 | * cached dentry and do a new lookup. | ||
1247 | */ | ||
1248 | -static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) | ||
1249 | +static int | ||
1250 | +nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, | ||
1251 | + unsigned int flags) | ||
1252 | { | ||
1253 | - struct inode *dir; | ||
1254 | struct inode *inode; | ||
1255 | - struct dentry *parent; | ||
1256 | - struct nfs_fh *fhandle = NULL; | ||
1257 | - struct nfs_fattr *fattr = NULL; | ||
1258 | - struct nfs4_label *label = NULL; | ||
1259 | int error; | ||
1260 | |||
1261 | - if (flags & LOOKUP_RCU) { | ||
1262 | - parent = READ_ONCE(dentry->d_parent); | ||
1263 | - dir = d_inode_rcu(parent); | ||
1264 | - if (!dir) | ||
1265 | - return -ECHILD; | ||
1266 | - } else { | ||
1267 | - parent = dget_parent(dentry); | ||
1268 | - dir = d_inode(parent); | ||
1269 | - } | ||
1270 | nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); | ||
1271 | inode = d_inode(dentry); | ||
1272 | |||
1273 | - if (!inode) { | ||
1274 | - if (nfs_neg_need_reval(dir, dentry, flags)) { | ||
1275 | - if (flags & LOOKUP_RCU) | ||
1276 | - return -ECHILD; | ||
1277 | - goto out_bad; | ||
1278 | - } | ||
1279 | - goto out_valid; | ||
1280 | - } | ||
1281 | + if (!inode) | ||
1282 | + return nfs_lookup_revalidate_negative(dir, dentry, flags); | ||
1283 | |||
1284 | if (is_bad_inode(inode)) { | ||
1285 | - if (flags & LOOKUP_RCU) | ||
1286 | - return -ECHILD; | ||
1287 | dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", | ||
1288 | __func__, dentry); | ||
1289 | goto out_bad; | ||
1290 | } | ||
1291 | |||
1292 | if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ)) | ||
1293 | - goto out_set_verifier; | ||
1294 | + return nfs_lookup_revalidate_delegated(dir, dentry, inode); | ||
1295 | |||
1296 | /* Force a full look up iff the parent directory has changed */ | ||
1297 | if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) && | ||
1298 | nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) { | ||
1299 | error = nfs_lookup_verify_inode(inode, flags); | ||
1300 | if (error) { | ||
1301 | - if (flags & LOOKUP_RCU) | ||
1302 | - return -ECHILD; | ||
1303 | if (error == -ESTALE) | ||
1304 | - goto out_zap_parent; | ||
1305 | - goto out_error; | ||
1306 | + nfs_zap_caches(dir); | ||
1307 | + goto out_bad; | ||
1308 | } | ||
1309 | nfs_advise_use_readdirplus(dir); | ||
1310 | goto out_valid; | ||
1311 | @@ -1146,81 +1218,45 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) | ||
1312 | if (NFS_STALE(inode)) | ||
1313 | goto out_bad; | ||
1314 | |||
1315 | - error = -ENOMEM; | ||
1316 | - fhandle = nfs_alloc_fhandle(); | ||
1317 | - fattr = nfs_alloc_fattr(); | ||
1318 | - if (fhandle == NULL || fattr == NULL) | ||
1319 | - goto out_error; | ||
1320 | - | ||
1321 | - label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT); | ||
1322 | - if (IS_ERR(label)) | ||
1323 | - goto out_error; | ||
1324 | - | ||
1325 | trace_nfs_lookup_revalidate_enter(dir, dentry, flags); | ||
1326 | - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); | ||
1327 | + error = nfs_lookup_revalidate_dentry(dir, dentry, inode); | ||
1328 | trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error); | ||
1329 | - if (error == -ESTALE || error == -ENOENT) | ||
1330 | - goto out_bad; | ||
1331 | - if (error) | ||
1332 | - goto out_error; | ||
1333 | - if (nfs_compare_fh(NFS_FH(inode), fhandle)) | ||
1334 | - goto out_bad; | ||
1335 | - if ((error = nfs_refresh_inode(inode, fattr)) != 0) | ||
1336 | - goto out_bad; | ||
1337 | - | ||
1338 | - nfs_setsecurity(inode, fattr, label); | ||
1339 | - | ||
1340 | - nfs_free_fattr(fattr); | ||
1341 | - nfs_free_fhandle(fhandle); | ||
1342 | - nfs4_label_free(label); | ||
1343 | + return error; | ||
1344 | +out_valid: | ||
1345 | + return nfs_lookup_revalidate_done(dir, dentry, inode, 1); | ||
1346 | +out_bad: | ||
1347 | + if (flags & LOOKUP_RCU) | ||
1348 | + return -ECHILD; | ||
1349 | + return nfs_lookup_revalidate_done(dir, dentry, inode, 0); | ||
1350 | +} | ||
1351 | |||
1352 | - /* set a readdirplus hint that we had a cache miss */ | ||
1353 | - nfs_force_use_readdirplus(dir); | ||
1354 | +static int | ||
1355 | +__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags, | ||
1356 | + int (*reval)(struct inode *, struct dentry *, unsigned int)) | ||
1357 | +{ | ||
1358 | + struct dentry *parent; | ||
1359 | + struct inode *dir; | ||
1360 | + int ret; | ||
1361 | |||
1362 | -out_set_verifier: | ||
1363 | - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1364 | - out_valid: | ||
1365 | if (flags & LOOKUP_RCU) { | ||
1366 | + parent = READ_ONCE(dentry->d_parent); | ||
1367 | + dir = d_inode_rcu(parent); | ||
1368 | + if (!dir) | ||
1369 | + return -ECHILD; | ||
1370 | + ret = reval(dir, dentry, flags); | ||
1371 | if (parent != READ_ONCE(dentry->d_parent)) | ||
1372 | return -ECHILD; | ||
1373 | - } else | ||
1374 | + } else { | ||
1375 | + parent = dget_parent(dentry); | ||
1376 | + ret = reval(d_inode(parent), dentry, flags); | ||
1377 | dput(parent); | ||
1378 | - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", | ||
1379 | - __func__, dentry); | ||
1380 | - return 1; | ||
1381 | -out_zap_parent: | ||
1382 | - nfs_zap_caches(dir); | ||
1383 | - out_bad: | ||
1384 | - WARN_ON(flags & LOOKUP_RCU); | ||
1385 | - nfs_free_fattr(fattr); | ||
1386 | - nfs_free_fhandle(fhandle); | ||
1387 | - nfs4_label_free(label); | ||
1388 | - nfs_mark_for_revalidate(dir); | ||
1389 | - if (inode && S_ISDIR(inode->i_mode)) { | ||
1390 | - /* Purge readdir caches. */ | ||
1391 | - nfs_zap_caches(inode); | ||
1392 | - /* | ||
1393 | - * We can't d_drop the root of a disconnected tree: | ||
1394 | - * its d_hash is on the s_anon list and d_drop() would hide | ||
1395 | - * it from shrink_dcache_for_unmount(), leading to busy | ||
1396 | - * inodes on unmount and further oopses. | ||
1397 | - */ | ||
1398 | - if (IS_ROOT(dentry)) | ||
1399 | - goto out_valid; | ||
1400 | } | ||
1401 | - dput(parent); | ||
1402 | - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n", | ||
1403 | - __func__, dentry); | ||
1404 | - return 0; | ||
1405 | -out_error: | ||
1406 | - WARN_ON(flags & LOOKUP_RCU); | ||
1407 | - nfs_free_fattr(fattr); | ||
1408 | - nfs_free_fhandle(fhandle); | ||
1409 | - nfs4_label_free(label); | ||
1410 | - dput(parent); | ||
1411 | - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n", | ||
1412 | - __func__, dentry, error); | ||
1413 | - return error; | ||
1414 | + return ret; | ||
1415 | +} | ||
1416 | + | ||
1417 | +static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) | ||
1418 | +{ | ||
1419 | + return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate); | ||
1420 | } | ||
1421 | |||
1422 | /* | ||
1423 | @@ -1579,62 +1615,55 @@ no_open: | ||
1424 | } | ||
1425 | EXPORT_SYMBOL_GPL(nfs_atomic_open); | ||
1426 | |||
1427 | -static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) | ||
1428 | +static int | ||
1429 | +nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, | ||
1430 | + unsigned int flags) | ||
1431 | { | ||
1432 | struct inode *inode; | ||
1433 | - int ret = 0; | ||
1434 | |||
1435 | if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) | ||
1436 | - goto no_open; | ||
1437 | + goto full_reval; | ||
1438 | if (d_mountpoint(dentry)) | ||
1439 | - goto no_open; | ||
1440 | - if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1) | ||
1441 | - goto no_open; | ||
1442 | + goto full_reval; | ||
1443 | |||
1444 | inode = d_inode(dentry); | ||
1445 | |||
1446 | /* We can't create new files in nfs_open_revalidate(), so we | ||
1447 | * optimize away revalidation of negative dentries. | ||
1448 | */ | ||
1449 | - if (inode == NULL) { | ||
1450 | - struct dentry *parent; | ||
1451 | - struct inode *dir; | ||
1452 | - | ||
1453 | - if (flags & LOOKUP_RCU) { | ||
1454 | - parent = READ_ONCE(dentry->d_parent); | ||
1455 | - dir = d_inode_rcu(parent); | ||
1456 | - if (!dir) | ||
1457 | - return -ECHILD; | ||
1458 | - } else { | ||
1459 | - parent = dget_parent(dentry); | ||
1460 | - dir = d_inode(parent); | ||
1461 | - } | ||
1462 | - if (!nfs_neg_need_reval(dir, dentry, flags)) | ||
1463 | - ret = 1; | ||
1464 | - else if (flags & LOOKUP_RCU) | ||
1465 | - ret = -ECHILD; | ||
1466 | - if (!(flags & LOOKUP_RCU)) | ||
1467 | - dput(parent); | ||
1468 | - else if (parent != READ_ONCE(dentry->d_parent)) | ||
1469 | - return -ECHILD; | ||
1470 | - goto out; | ||
1471 | - } | ||
1472 | + if (inode == NULL) | ||
1473 | + goto full_reval; | ||
1474 | + | ||
1475 | + if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ)) | ||
1476 | + return nfs_lookup_revalidate_delegated(dir, dentry, inode); | ||
1477 | |||
1478 | /* NFS only supports OPEN on regular files */ | ||
1479 | if (!S_ISREG(inode->i_mode)) | ||
1480 | - goto no_open; | ||
1481 | + goto full_reval; | ||
1482 | + | ||
1483 | /* We cannot do exclusive creation on a positive dentry */ | ||
1484 | - if (flags & LOOKUP_EXCL) | ||
1485 | - goto no_open; | ||
1486 | + if (flags & (LOOKUP_EXCL | LOOKUP_REVAL)) | ||
1487 | + goto reval_dentry; | ||
1488 | + | ||
1489 | + /* Check if the directory changed */ | ||
1490 | + if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) | ||
1491 | + goto reval_dentry; | ||
1492 | |||
1493 | /* Let f_op->open() actually open (and revalidate) the file */ | ||
1494 | - ret = 1; | ||
1495 | + return 1; | ||
1496 | +reval_dentry: | ||
1497 | + if (flags & LOOKUP_RCU) | ||
1498 | + return -ECHILD; | ||
1499 | + return nfs_lookup_revalidate_dentry(dir, dentry, inode);; | ||
1500 | |||
1501 | -out: | ||
1502 | - return ret; | ||
1503 | +full_reval: | ||
1504 | + return nfs_do_lookup_revalidate(dir, dentry, flags); | ||
1505 | +} | ||
1506 | |||
1507 | -no_open: | ||
1508 | - return nfs_lookup_revalidate(dentry, flags); | ||
1509 | +static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) | ||
1510 | +{ | ||
1511 | + return __nfs_lookup_revalidate(dentry, flags, | ||
1512 | + nfs4_do_lookup_revalidate); | ||
1513 | } | ||
1514 | |||
1515 | #endif /* CONFIG_NFSV4 */ | ||
1516 | diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c | ||
1517 | index 1de855e0ae61..904e08bbb289 100644 | ||
1518 | --- a/fs/nfs/nfs4proc.c | ||
1519 | +++ b/fs/nfs/nfs4proc.c | ||
1520 | @@ -1355,12 +1355,20 @@ static bool nfs4_mode_match_open_stateid(struct nfs4_state *state, | ||
1521 | return false; | ||
1522 | } | ||
1523 | |||
1524 | -static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode) | ||
1525 | +static int can_open_cached(struct nfs4_state *state, fmode_t mode, | ||
1526 | + int open_mode, enum open_claim_type4 claim) | ||
1527 | { | ||
1528 | int ret = 0; | ||
1529 | |||
1530 | if (open_mode & (O_EXCL|O_TRUNC)) | ||
1531 | goto out; | ||
1532 | + switch (claim) { | ||
1533 | + case NFS4_OPEN_CLAIM_NULL: | ||
1534 | + case NFS4_OPEN_CLAIM_FH: | ||
1535 | + goto out; | ||
1536 | + default: | ||
1537 | + break; | ||
1538 | + } | ||
1539 | switch (mode & (FMODE_READ|FMODE_WRITE)) { | ||
1540 | case FMODE_READ: | ||
1541 | ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0 | ||
1542 | @@ -1753,7 +1761,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) | ||
1543 | |||
1544 | for (;;) { | ||
1545 | spin_lock(&state->owner->so_lock); | ||
1546 | - if (can_open_cached(state, fmode, open_mode)) { | ||
1547 | + if (can_open_cached(state, fmode, open_mode, claim)) { | ||
1548 | update_open_stateflags(state, fmode); | ||
1549 | spin_unlock(&state->owner->so_lock); | ||
1550 | goto out_return_state; | ||
1551 | @@ -2282,7 +2290,8 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) | ||
1552 | if (data->state != NULL) { | ||
1553 | struct nfs_delegation *delegation; | ||
1554 | |||
1555 | - if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags)) | ||
1556 | + if (can_open_cached(data->state, data->o_arg.fmode, | ||
1557 | + data->o_arg.open_flags, claim)) | ||
1558 | goto out_no_action; | ||
1559 | rcu_read_lock(); | ||
1560 | delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); | ||
1561 | diff --git a/fs/proc/base.c b/fs/proc/base.c | ||
1562 | index a7fbda72afeb..3b9b726b1a6c 100644 | ||
1563 | --- a/fs/proc/base.c | ||
1564 | +++ b/fs/proc/base.c | ||
1565 | @@ -205,12 +205,53 @@ static int proc_root_link(struct dentry *dentry, struct path *path) | ||
1566 | return result; | ||
1567 | } | ||
1568 | |||
1569 | +/* | ||
1570 | + * If the user used setproctitle(), we just get the string from | ||
1571 | + * user space at arg_start, and limit it to a maximum of one page. | ||
1572 | + */ | ||
1573 | +static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf, | ||
1574 | + size_t count, unsigned long pos, | ||
1575 | + unsigned long arg_start) | ||
1576 | +{ | ||
1577 | + char *page; | ||
1578 | + int ret, got; | ||
1579 | + | ||
1580 | + if (pos >= PAGE_SIZE) | ||
1581 | + return 0; | ||
1582 | + | ||
1583 | + page = (char *)__get_free_page(GFP_KERNEL); | ||
1584 | + if (!page) | ||
1585 | + return -ENOMEM; | ||
1586 | + | ||
1587 | + ret = 0; | ||
1588 | + got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON); | ||
1589 | + if (got > 0) { | ||
1590 | + int len = strnlen(page, got); | ||
1591 | + | ||
1592 | + /* Include the NUL character if it was found */ | ||
1593 | + if (len < got) | ||
1594 | + len++; | ||
1595 | + | ||
1596 | + if (len > pos) { | ||
1597 | + len -= pos; | ||
1598 | + if (len > count) | ||
1599 | + len = count; | ||
1600 | + len -= copy_to_user(buf, page+pos, len); | ||
1601 | + if (!len) | ||
1602 | + len = -EFAULT; | ||
1603 | + ret = len; | ||
1604 | + } | ||
1605 | + } | ||
1606 | + free_page((unsigned long)page); | ||
1607 | + return ret; | ||
1608 | +} | ||
1609 | + | ||
1610 | static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, | ||
1611 | size_t count, loff_t *ppos) | ||
1612 | { | ||
1613 | unsigned long arg_start, arg_end, env_start, env_end; | ||
1614 | unsigned long pos, len; | ||
1615 | - char *page; | ||
1616 | + char *page, c; | ||
1617 | |||
1618 | /* Check if process spawned far enough to have cmdline. */ | ||
1619 | if (!mm->env_end) | ||
1620 | @@ -227,28 +268,42 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, | ||
1621 | return 0; | ||
1622 | |||
1623 | /* | ||
1624 | - * We have traditionally allowed the user to re-write | ||
1625 | - * the argument strings and overflow the end result | ||
1626 | - * into the environment section. But only do that if | ||
1627 | - * the environment area is contiguous to the arguments. | ||
1628 | + * We allow setproctitle() to overwrite the argument | ||
1629 | + * strings, and overflow past the original end. But | ||
1630 | + * only when it overflows into the environment area. | ||
1631 | */ | ||
1632 | - if (env_start != arg_end || env_start >= env_end) | ||
1633 | + if (env_start != arg_end || env_end < env_start) | ||
1634 | env_start = env_end = arg_end; | ||
1635 | - | ||
1636 | - /* .. and limit it to a maximum of one page of slop */ | ||
1637 | - if (env_end >= arg_end + PAGE_SIZE) | ||
1638 | - env_end = arg_end + PAGE_SIZE - 1; | ||
1639 | + len = env_end - arg_start; | ||
1640 | |||
1641 | /* We're not going to care if "*ppos" has high bits set */ | ||
1642 | - pos = arg_start + *ppos; | ||
1643 | - | ||
1644 | - /* .. but we do check the result is in the proper range */ | ||
1645 | - if (pos < arg_start || pos >= env_end) | ||
1646 | + pos = *ppos; | ||
1647 | + if (pos >= len) | ||
1648 | return 0; | ||
1649 | + if (count > len - pos) | ||
1650 | + count = len - pos; | ||
1651 | + if (!count) | ||
1652 | + return 0; | ||
1653 | + | ||
1654 | + /* | ||
1655 | + * Magical special case: if the argv[] end byte is not | ||
1656 | + * zero, the user has overwritten it with setproctitle(3). | ||
1657 | + * | ||
1658 | + * Possible future enhancement: do this only once when | ||
1659 | + * pos is 0, and set a flag in the 'struct file'. | ||
1660 | + */ | ||
1661 | + if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c) | ||
1662 | + return get_mm_proctitle(mm, buf, count, pos, arg_start); | ||
1663 | |||
1664 | - /* .. and we never go past env_end */ | ||
1665 | - if (env_end - pos < count) | ||
1666 | - count = env_end - pos; | ||
1667 | + /* | ||
1668 | + * For the non-setproctitle() case we limit things strictly | ||
1669 | + * to the [arg_start, arg_end[ range. | ||
1670 | + */ | ||
1671 | + pos += arg_start; | ||
1672 | + if (pos < arg_start || pos >= arg_end) | ||
1673 | + return 0; | ||
1674 | + if (count > arg_end - pos) | ||
1675 | + count = arg_end - pos; | ||
1676 | |||
1677 | page = (char *)__get_free_page(GFP_KERNEL); | ||
1678 | if (!page) | ||
1679 | @@ -258,48 +313,11 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, | ||
1680 | while (count) { | ||
1681 | int got; | ||
1682 | size_t size = min_t(size_t, PAGE_SIZE, count); | ||
1683 | - long offset; | ||
1684 | |||
1685 | - /* | ||
1686 | - * Are we already starting past the official end? | ||
1687 | - * We always include the last byte that is *supposed* | ||
1688 | - * to be NUL | ||
1689 | - */ | ||
1690 | - offset = (pos >= arg_end) ? pos - arg_end + 1 : 0; | ||
1691 | - | ||
1692 | - got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON); | ||
1693 | - if (got <= offset) | ||
1694 | + got = access_remote_vm(mm, pos, page, size, FOLL_ANON); | ||
1695 | + if (got <= 0) | ||
1696 | break; | ||
1697 | - got -= offset; | ||
1698 | - | ||
1699 | - /* Don't walk past a NUL character once you hit arg_end */ | ||
1700 | - if (pos + got >= arg_end) { | ||
1701 | - int n = 0; | ||
1702 | - | ||
1703 | - /* | ||
1704 | - * If we started before 'arg_end' but ended up | ||
1705 | - * at or after it, we start the NUL character | ||
1706 | - * check at arg_end-1 (where we expect the normal | ||
1707 | - * EOF to be). | ||
1708 | - * | ||
1709 | - * NOTE! This is smaller than 'got', because | ||
1710 | - * pos + got >= arg_end | ||
1711 | - */ | ||
1712 | - if (pos < arg_end) | ||
1713 | - n = arg_end - pos - 1; | ||
1714 | - | ||
1715 | - /* Cut off at first NUL after 'n' */ | ||
1716 | - got = n + strnlen(page+n, offset+got-n); | ||
1717 | - if (got < offset) | ||
1718 | - break; | ||
1719 | - got -= offset; | ||
1720 | - | ||
1721 | - /* Include the NUL if it existed */ | ||
1722 | - if (got < size) | ||
1723 | - got++; | ||
1724 | - } | ||
1725 | - | ||
1726 | - got -= copy_to_user(buf, page+offset, got); | ||
1727 | + got -= copy_to_user(buf, page, got); | ||
1728 | if (unlikely(!got)) { | ||
1729 | if (!len) | ||
1730 | len = -EFAULT; | ||
1731 | diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h | ||
1732 | index 6980014357d4..d51e10f50e75 100644 | ||
1733 | --- a/include/linux/blkdev.h | ||
1734 | +++ b/include/linux/blkdev.h | ||
1735 | @@ -504,6 +504,12 @@ struct request_queue { | ||
1736 | * various queue flags, see QUEUE_* below | ||
1737 | */ | ||
1738 | unsigned long queue_flags; | ||
1739 | + /* | ||
1740 | + * Number of contexts that have called blk_set_pm_only(). If this | ||
1741 | + * counter is above zero then only RQF_PM and RQF_PREEMPT requests are | ||
1742 | + * processed. | ||
1743 | + */ | ||
1744 | + atomic_t pm_only; | ||
1745 | |||
1746 | /* | ||
1747 | * ida allocated id for this queue. Used to index queues from | ||
1748 | @@ -698,7 +704,6 @@ struct request_queue { | ||
1749 | #define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */ | ||
1750 | #define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */ | ||
1751 | #define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */ | ||
1752 | -#define QUEUE_FLAG_PREEMPT_ONLY 29 /* only process REQ_PREEMPT requests */ | ||
1753 | |||
1754 | #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ | ||
1755 | (1 << QUEUE_FLAG_SAME_COMP) | \ | ||
1756 | @@ -736,12 +741,11 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); | ||
1757 | ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ | ||
1758 | REQ_FAILFAST_DRIVER)) | ||
1759 | #define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags) | ||
1760 | -#define blk_queue_preempt_only(q) \ | ||
1761 | - test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags) | ||
1762 | +#define blk_queue_pm_only(q) atomic_read(&(q)->pm_only) | ||
1763 | #define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags) | ||
1764 | |||
1765 | -extern int blk_set_preempt_only(struct request_queue *q); | ||
1766 | -extern void blk_clear_preempt_only(struct request_queue *q); | ||
1767 | +extern void blk_set_pm_only(struct request_queue *q); | ||
1768 | +extern void blk_clear_pm_only(struct request_queue *q); | ||
1769 | |||
1770 | static inline int queue_in_flight(struct request_queue *q) | ||
1771 | { | ||
1772 | diff --git a/include/linux/iova.h b/include/linux/iova.h | ||
1773 | index 928442dda565..84fbe73d2ec0 100644 | ||
1774 | --- a/include/linux/iova.h | ||
1775 | +++ b/include/linux/iova.h | ||
1776 | @@ -156,6 +156,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, | ||
1777 | void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); | ||
1778 | void init_iova_domain(struct iova_domain *iovad, unsigned long granule, | ||
1779 | unsigned long start_pfn); | ||
1780 | +bool has_iova_flush_queue(struct iova_domain *iovad); | ||
1781 | int init_iova_flush_queue(struct iova_domain *iovad, | ||
1782 | iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); | ||
1783 | struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); | ||
1784 | @@ -236,6 +237,11 @@ static inline void init_iova_domain(struct iova_domain *iovad, | ||
1785 | { | ||
1786 | } | ||
1787 | |||
1788 | +static inline bool has_iova_flush_queue(struct iova_domain *iovad) | ||
1789 | +{ | ||
1790 | + return false; | ||
1791 | +} | ||
1792 | + | ||
1793 | static inline int init_iova_flush_queue(struct iova_domain *iovad, | ||
1794 | iova_flush_cb flush_cb, | ||
1795 | iova_entry_dtor entry_dtor) | ||
1796 | diff --git a/include/linux/sched.h b/include/linux/sched.h | ||
1797 | index 5dc024e28397..20f5ba262cc0 100644 | ||
1798 | --- a/include/linux/sched.h | ||
1799 | +++ b/include/linux/sched.h | ||
1800 | @@ -1023,7 +1023,15 @@ struct task_struct { | ||
1801 | u64 last_sum_exec_runtime; | ||
1802 | struct callback_head numa_work; | ||
1803 | |||
1804 | - struct numa_group *numa_group; | ||
1805 | + /* | ||
1806 | + * This pointer is only modified for current in syscall and | ||
1807 | + * pagefault context (and for tasks being destroyed), so it can be read | ||
1808 | + * from any of the following contexts: | ||
1809 | + * - RCU read-side critical section | ||
1810 | + * - current->numa_group from everywhere | ||
1811 | + * - task's runqueue locked, task not running | ||
1812 | + */ | ||
1813 | + struct numa_group __rcu *numa_group; | ||
1814 | |||
1815 | /* | ||
1816 | * numa_faults is an array split into four regions: | ||
1817 | diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h | ||
1818 | index e7dd04a84ba8..3988762efe15 100644 | ||
1819 | --- a/include/linux/sched/numa_balancing.h | ||
1820 | +++ b/include/linux/sched/numa_balancing.h | ||
1821 | @@ -19,7 +19,7 @@ | ||
1822 | extern void task_numa_fault(int last_node, int node, int pages, int flags); | ||
1823 | extern pid_t task_numa_group_id(struct task_struct *p); | ||
1824 | extern void set_numabalancing_state(bool enabled); | ||
1825 | -extern void task_numa_free(struct task_struct *p); | ||
1826 | +extern void task_numa_free(struct task_struct *p, bool final); | ||
1827 | extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, | ||
1828 | int src_nid, int dst_cpu); | ||
1829 | #else | ||
1830 | @@ -34,7 +34,7 @@ static inline pid_t task_numa_group_id(struct task_struct *p) | ||
1831 | static inline void set_numabalancing_state(bool enabled) | ||
1832 | { | ||
1833 | } | ||
1834 | -static inline void task_numa_free(struct task_struct *p) | ||
1835 | +static inline void task_numa_free(struct task_struct *p, bool final) | ||
1836 | { | ||
1837 | } | ||
1838 | static inline bool should_numa_migrate_memory(struct task_struct *p, | ||
1839 | diff --git a/kernel/fork.c b/kernel/fork.c | ||
1840 | index 69874db3fba8..e76ce81c9c75 100644 | ||
1841 | --- a/kernel/fork.c | ||
1842 | +++ b/kernel/fork.c | ||
1843 | @@ -679,7 +679,7 @@ void __put_task_struct(struct task_struct *tsk) | ||
1844 | WARN_ON(tsk == current); | ||
1845 | |||
1846 | cgroup_free(tsk); | ||
1847 | - task_numa_free(tsk); | ||
1848 | + task_numa_free(tsk, true); | ||
1849 | security_task_free(tsk); | ||
1850 | exit_creds(tsk); | ||
1851 | delayacct_tsk_free(tsk); | ||
1852 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c | ||
1853 | index 4a433608ba74..75f322603d44 100644 | ||
1854 | --- a/kernel/sched/fair.c | ||
1855 | +++ b/kernel/sched/fair.c | ||
1856 | @@ -1053,6 +1053,21 @@ struct numa_group { | ||
1857 | unsigned long faults[0]; | ||
1858 | }; | ||
1859 | |||
1860 | +/* | ||
1861 | + * For functions that can be called in multiple contexts that permit reading | ||
1862 | + * ->numa_group (see struct task_struct for locking rules). | ||
1863 | + */ | ||
1864 | +static struct numa_group *deref_task_numa_group(struct task_struct *p) | ||
1865 | +{ | ||
1866 | + return rcu_dereference_check(p->numa_group, p == current || | ||
1867 | + (lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu))); | ||
1868 | +} | ||
1869 | + | ||
1870 | +static struct numa_group *deref_curr_numa_group(struct task_struct *p) | ||
1871 | +{ | ||
1872 | + return rcu_dereference_protected(p->numa_group, p == current); | ||
1873 | +} | ||
1874 | + | ||
1875 | static inline unsigned long group_faults_priv(struct numa_group *ng); | ||
1876 | static inline unsigned long group_faults_shared(struct numa_group *ng); | ||
1877 | |||
1878 | @@ -1096,10 +1111,12 @@ static unsigned int task_scan_start(struct task_struct *p) | ||
1879 | { | ||
1880 | unsigned long smin = task_scan_min(p); | ||
1881 | unsigned long period = smin; | ||
1882 | + struct numa_group *ng; | ||
1883 | |||
1884 | /* Scale the maximum scan period with the amount of shared memory. */ | ||
1885 | - if (p->numa_group) { | ||
1886 | - struct numa_group *ng = p->numa_group; | ||
1887 | + rcu_read_lock(); | ||
1888 | + ng = rcu_dereference(p->numa_group); | ||
1889 | + if (ng) { | ||
1890 | unsigned long shared = group_faults_shared(ng); | ||
1891 | unsigned long private = group_faults_priv(ng); | ||
1892 | |||
1893 | @@ -1107,6 +1124,7 @@ static unsigned int task_scan_start(struct task_struct *p) | ||
1894 | period *= shared + 1; | ||
1895 | period /= private + shared + 1; | ||
1896 | } | ||
1897 | + rcu_read_unlock(); | ||
1898 | |||
1899 | return max(smin, period); | ||
1900 | } | ||
1901 | @@ -1115,13 +1133,14 @@ static unsigned int task_scan_max(struct task_struct *p) | ||
1902 | { | ||
1903 | unsigned long smin = task_scan_min(p); | ||
1904 | unsigned long smax; | ||
1905 | + struct numa_group *ng; | ||
1906 | |||
1907 | /* Watch for min being lower than max due to floor calculations */ | ||
1908 | smax = sysctl_numa_balancing_scan_period_max / task_nr_scan_windows(p); | ||
1909 | |||
1910 | /* Scale the maximum scan period with the amount of shared memory. */ | ||
1911 | - if (p->numa_group) { | ||
1912 | - struct numa_group *ng = p->numa_group; | ||
1913 | + ng = deref_curr_numa_group(p); | ||
1914 | + if (ng) { | ||
1915 | unsigned long shared = group_faults_shared(ng); | ||
1916 | unsigned long private = group_faults_priv(ng); | ||
1917 | unsigned long period = smax; | ||
1918 | @@ -1153,7 +1172,7 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p) | ||
1919 | p->numa_scan_period = sysctl_numa_balancing_scan_delay; | ||
1920 | p->numa_work.next = &p->numa_work; | ||
1921 | p->numa_faults = NULL; | ||
1922 | - p->numa_group = NULL; | ||
1923 | + RCU_INIT_POINTER(p->numa_group, NULL); | ||
1924 | p->last_task_numa_placement = 0; | ||
1925 | p->last_sum_exec_runtime = 0; | ||
1926 | |||
1927 | @@ -1200,7 +1219,16 @@ static void account_numa_dequeue(struct rq *rq, struct task_struct *p) | ||
1928 | |||
1929 | pid_t task_numa_group_id(struct task_struct *p) | ||
1930 | { | ||
1931 | - return p->numa_group ? p->numa_group->gid : 0; | ||
1932 | + struct numa_group *ng; | ||
1933 | + pid_t gid = 0; | ||
1934 | + | ||
1935 | + rcu_read_lock(); | ||
1936 | + ng = rcu_dereference(p->numa_group); | ||
1937 | + if (ng) | ||
1938 | + gid = ng->gid; | ||
1939 | + rcu_read_unlock(); | ||
1940 | + | ||
1941 | + return gid; | ||
1942 | } | ||
1943 | |||
1944 | /* | ||
1945 | @@ -1225,11 +1253,13 @@ static inline unsigned long task_faults(struct task_struct *p, int nid) | ||
1946 | |||
1947 | static inline unsigned long group_faults(struct task_struct *p, int nid) | ||
1948 | { | ||
1949 | - if (!p->numa_group) | ||
1950 | + struct numa_group *ng = deref_task_numa_group(p); | ||
1951 | + | ||
1952 | + if (!ng) | ||
1953 | return 0; | ||
1954 | |||
1955 | - return p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 0)] + | ||
1956 | - p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 1)]; | ||
1957 | + return ng->faults[task_faults_idx(NUMA_MEM, nid, 0)] + | ||
1958 | + ng->faults[task_faults_idx(NUMA_MEM, nid, 1)]; | ||
1959 | } | ||
1960 | |||
1961 | static inline unsigned long group_faults_cpu(struct numa_group *group, int nid) | ||
1962 | @@ -1367,12 +1397,13 @@ static inline unsigned long task_weight(struct task_struct *p, int nid, | ||
1963 | static inline unsigned long group_weight(struct task_struct *p, int nid, | ||
1964 | int dist) | ||
1965 | { | ||
1966 | + struct numa_group *ng = deref_task_numa_group(p); | ||
1967 | unsigned long faults, total_faults; | ||
1968 | |||
1969 | - if (!p->numa_group) | ||
1970 | + if (!ng) | ||
1971 | return 0; | ||
1972 | |||
1973 | - total_faults = p->numa_group->total_faults; | ||
1974 | + total_faults = ng->total_faults; | ||
1975 | |||
1976 | if (!total_faults) | ||
1977 | return 0; | ||
1978 | @@ -1386,7 +1417,7 @@ static inline unsigned long group_weight(struct task_struct *p, int nid, | ||
1979 | bool should_numa_migrate_memory(struct task_struct *p, struct page * page, | ||
1980 | int src_nid, int dst_cpu) | ||
1981 | { | ||
1982 | - struct numa_group *ng = p->numa_group; | ||
1983 | + struct numa_group *ng = deref_curr_numa_group(p); | ||
1984 | int dst_nid = cpu_to_node(dst_cpu); | ||
1985 | int last_cpupid, this_cpupid; | ||
1986 | |||
1987 | @@ -1592,13 +1623,14 @@ static bool load_too_imbalanced(long src_load, long dst_load, | ||
1988 | static void task_numa_compare(struct task_numa_env *env, | ||
1989 | long taskimp, long groupimp, bool maymove) | ||
1990 | { | ||
1991 | + struct numa_group *cur_ng, *p_ng = deref_curr_numa_group(env->p); | ||
1992 | struct rq *dst_rq = cpu_rq(env->dst_cpu); | ||
1993 | + long imp = p_ng ? groupimp : taskimp; | ||
1994 | struct task_struct *cur; | ||
1995 | long src_load, dst_load; | ||
1996 | - long load; | ||
1997 | - long imp = env->p->numa_group ? groupimp : taskimp; | ||
1998 | - long moveimp = imp; | ||
1999 | int dist = env->dist; | ||
2000 | + long moveimp = imp; | ||
2001 | + long load; | ||
2002 | |||
2003 | if (READ_ONCE(dst_rq->numa_migrate_on)) | ||
2004 | return; | ||
2005 | @@ -1637,21 +1669,22 @@ static void task_numa_compare(struct task_numa_env *env, | ||
2006 | * If dst and source tasks are in the same NUMA group, or not | ||
2007 | * in any group then look only at task weights. | ||
2008 | */ | ||
2009 | - if (cur->numa_group == env->p->numa_group) { | ||
2010 | + cur_ng = rcu_dereference(cur->numa_group); | ||
2011 | + if (cur_ng == p_ng) { | ||
2012 | imp = taskimp + task_weight(cur, env->src_nid, dist) - | ||
2013 | task_weight(cur, env->dst_nid, dist); | ||
2014 | /* | ||
2015 | * Add some hysteresis to prevent swapping the | ||
2016 | * tasks within a group over tiny differences. | ||
2017 | */ | ||
2018 | - if (cur->numa_group) | ||
2019 | + if (cur_ng) | ||
2020 | imp -= imp / 16; | ||
2021 | } else { | ||
2022 | /* | ||
2023 | * Compare the group weights. If a task is all by itself | ||
2024 | * (not part of a group), use the task weight instead. | ||
2025 | */ | ||
2026 | - if (cur->numa_group && env->p->numa_group) | ||
2027 | + if (cur_ng && p_ng) | ||
2028 | imp += group_weight(cur, env->src_nid, dist) - | ||
2029 | group_weight(cur, env->dst_nid, dist); | ||
2030 | else | ||
2031 | @@ -1749,11 +1782,12 @@ static int task_numa_migrate(struct task_struct *p) | ||
2032 | .best_imp = 0, | ||
2033 | .best_cpu = -1, | ||
2034 | }; | ||
2035 | + unsigned long taskweight, groupweight; | ||
2036 | struct sched_domain *sd; | ||
2037 | + long taskimp, groupimp; | ||
2038 | + struct numa_group *ng; | ||
2039 | struct rq *best_rq; | ||
2040 | - unsigned long taskweight, groupweight; | ||
2041 | int nid, ret, dist; | ||
2042 | - long taskimp, groupimp; | ||
2043 | |||
2044 | /* | ||
2045 | * Pick the lowest SD_NUMA domain, as that would have the smallest | ||
2046 | @@ -1799,7 +1833,8 @@ static int task_numa_migrate(struct task_struct *p) | ||
2047 | * multiple NUMA nodes; in order to better consolidate the group, | ||
2048 | * we need to check other locations. | ||
2049 | */ | ||
2050 | - if (env.best_cpu == -1 || (p->numa_group && p->numa_group->active_nodes > 1)) { | ||
2051 | + ng = deref_curr_numa_group(p); | ||
2052 | + if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) { | ||
2053 | for_each_online_node(nid) { | ||
2054 | if (nid == env.src_nid || nid == p->numa_preferred_nid) | ||
2055 | continue; | ||
2056 | @@ -1832,7 +1867,7 @@ static int task_numa_migrate(struct task_struct *p) | ||
2057 | * A task that migrated to a second choice node will be better off | ||
2058 | * trying for a better one later. Do not set the preferred node here. | ||
2059 | */ | ||
2060 | - if (p->numa_group) { | ||
2061 | + if (ng) { | ||
2062 | if (env.best_cpu == -1) | ||
2063 | nid = env.src_nid; | ||
2064 | else | ||
2065 | @@ -2127,6 +2162,7 @@ static void task_numa_placement(struct task_struct *p) | ||
2066 | unsigned long total_faults; | ||
2067 | u64 runtime, period; | ||
2068 | spinlock_t *group_lock = NULL; | ||
2069 | + struct numa_group *ng; | ||
2070 | |||
2071 | /* | ||
2072 | * The p->mm->numa_scan_seq field gets updated without | ||
2073 | @@ -2144,8 +2180,9 @@ static void task_numa_placement(struct task_struct *p) | ||
2074 | runtime = numa_get_avg_runtime(p, &period); | ||
2075 | |||
2076 | /* If the task is part of a group prevent parallel updates to group stats */ | ||
2077 | - if (p->numa_group) { | ||
2078 | - group_lock = &p->numa_group->lock; | ||
2079 | + ng = deref_curr_numa_group(p); | ||
2080 | + if (ng) { | ||
2081 | + group_lock = &ng->lock; | ||
2082 | spin_lock_irq(group_lock); | ||
2083 | } | ||
2084 | |||
2085 | @@ -2186,7 +2223,7 @@ static void task_numa_placement(struct task_struct *p) | ||
2086 | p->numa_faults[cpu_idx] += f_diff; | ||
2087 | faults += p->numa_faults[mem_idx]; | ||
2088 | p->total_numa_faults += diff; | ||
2089 | - if (p->numa_group) { | ||
2090 | + if (ng) { | ||
2091 | /* | ||
2092 | * safe because we can only change our own group | ||
2093 | * | ||
2094 | @@ -2194,14 +2231,14 @@ static void task_numa_placement(struct task_struct *p) | ||
2095 | * nid and priv in a specific region because it | ||
2096 | * is at the beginning of the numa_faults array. | ||
2097 | */ | ||
2098 | - p->numa_group->faults[mem_idx] += diff; | ||
2099 | - p->numa_group->faults_cpu[mem_idx] += f_diff; | ||
2100 | - p->numa_group->total_faults += diff; | ||
2101 | - group_faults += p->numa_group->faults[mem_idx]; | ||
2102 | + ng->faults[mem_idx] += diff; | ||
2103 | + ng->faults_cpu[mem_idx] += f_diff; | ||
2104 | + ng->total_faults += diff; | ||
2105 | + group_faults += ng->faults[mem_idx]; | ||
2106 | } | ||
2107 | } | ||
2108 | |||
2109 | - if (!p->numa_group) { | ||
2110 | + if (!ng) { | ||
2111 | if (faults > max_faults) { | ||
2112 | max_faults = faults; | ||
2113 | max_nid = nid; | ||
2114 | @@ -2212,8 +2249,8 @@ static void task_numa_placement(struct task_struct *p) | ||
2115 | } | ||
2116 | } | ||
2117 | |||
2118 | - if (p->numa_group) { | ||
2119 | - numa_group_count_active_nodes(p->numa_group); | ||
2120 | + if (ng) { | ||
2121 | + numa_group_count_active_nodes(ng); | ||
2122 | spin_unlock_irq(group_lock); | ||
2123 | max_nid = preferred_group_nid(p, max_nid); | ||
2124 | } | ||
2125 | @@ -2247,7 +2284,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, | ||
2126 | int cpu = cpupid_to_cpu(cpupid); | ||
2127 | int i; | ||
2128 | |||
2129 | - if (unlikely(!p->numa_group)) { | ||
2130 | + if (unlikely(!deref_curr_numa_group(p))) { | ||
2131 | unsigned int size = sizeof(struct numa_group) + | ||
2132 | 4*nr_node_ids*sizeof(unsigned long); | ||
2133 | |||
2134 | @@ -2283,7 +2320,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, | ||
2135 | if (!grp) | ||
2136 | goto no_join; | ||
2137 | |||
2138 | - my_grp = p->numa_group; | ||
2139 | + my_grp = deref_curr_numa_group(p); | ||
2140 | if (grp == my_grp) | ||
2141 | goto no_join; | ||
2142 | |||
2143 | @@ -2345,13 +2382,24 @@ no_join: | ||
2144 | return; | ||
2145 | } | ||
2146 | |||
2147 | -void task_numa_free(struct task_struct *p) | ||
2148 | +/* | ||
2149 | + * Get rid of NUMA staticstics associated with a task (either current or dead). | ||
2150 | + * If @final is set, the task is dead and has reached refcount zero, so we can | ||
2151 | + * safely free all relevant data structures. Otherwise, there might be | ||
2152 | + * concurrent reads from places like load balancing and procfs, and we should | ||
2153 | + * reset the data back to default state without freeing ->numa_faults. | ||
2154 | + */ | ||
2155 | +void task_numa_free(struct task_struct *p, bool final) | ||
2156 | { | ||
2157 | - struct numa_group *grp = p->numa_group; | ||
2158 | - void *numa_faults = p->numa_faults; | ||
2159 | + /* safe: p either is current or is being freed by current */ | ||
2160 | + struct numa_group *grp = rcu_dereference_raw(p->numa_group); | ||
2161 | + unsigned long *numa_faults = p->numa_faults; | ||
2162 | unsigned long flags; | ||
2163 | int i; | ||
2164 | |||
2165 | + if (!numa_faults) | ||
2166 | + return; | ||
2167 | + | ||
2168 | if (grp) { | ||
2169 | spin_lock_irqsave(&grp->lock, flags); | ||
2170 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) | ||
2171 | @@ -2364,8 +2412,14 @@ void task_numa_free(struct task_struct *p) | ||
2172 | put_numa_group(grp); | ||
2173 | } | ||
2174 | |||
2175 | - p->numa_faults = NULL; | ||
2176 | - kfree(numa_faults); | ||
2177 | + if (final) { | ||
2178 | + p->numa_faults = NULL; | ||
2179 | + kfree(numa_faults); | ||
2180 | + } else { | ||
2181 | + p->total_numa_faults = 0; | ||
2182 | + for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) | ||
2183 | + numa_faults[i] = 0; | ||
2184 | + } | ||
2185 | } | ||
2186 | |||
2187 | /* | ||
2188 | @@ -2418,7 +2472,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) | ||
2189 | * actively using should be counted as local. This allows the | ||
2190 | * scan rate to slow down when a workload has settled down. | ||
2191 | */ | ||
2192 | - ng = p->numa_group; | ||
2193 | + ng = deref_curr_numa_group(p); | ||
2194 | if (!priv && !local && ng && ng->active_nodes > 1 && | ||
2195 | numa_is_active_node(cpu_node, ng) && | ||
2196 | numa_is_active_node(mem_node, ng)) | ||
2197 | @@ -10218,18 +10272,22 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m) | ||
2198 | { | ||
2199 | int node; | ||
2200 | unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0; | ||
2201 | + struct numa_group *ng; | ||
2202 | |||
2203 | + rcu_read_lock(); | ||
2204 | + ng = rcu_dereference(p->numa_group); | ||
2205 | for_each_online_node(node) { | ||
2206 | if (p->numa_faults) { | ||
2207 | tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)]; | ||
2208 | tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)]; | ||
2209 | } | ||
2210 | - if (p->numa_group) { | ||
2211 | - gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)], | ||
2212 | - gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)]; | ||
2213 | + if (ng) { | ||
2214 | + gsf = ng->faults[task_faults_idx(NUMA_MEM, node, 0)], | ||
2215 | + gpf = ng->faults[task_faults_idx(NUMA_MEM, node, 1)]; | ||
2216 | } | ||
2217 | print_numa_stats(m, node, tsf, tpf, gsf, gpf); | ||
2218 | } | ||
2219 | + rcu_read_unlock(); | ||
2220 | } | ||
2221 | #endif /* CONFIG_NUMA_BALANCING */ | ||
2222 | #endif /* CONFIG_SCHED_DEBUG */ | ||
2223 | diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c | ||
2224 | index c248e0dccbe1..67ef9d853d90 100644 | ||
2225 | --- a/net/ipv4/ip_tunnel_core.c | ||
2226 | +++ b/net/ipv4/ip_tunnel_core.c | ||
2227 | @@ -89,9 +89,12 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, | ||
2228 | __ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1); | ||
2229 | |||
2230 | err = ip_local_out(net, sk, skb); | ||
2231 | - if (unlikely(net_xmit_eval(err))) | ||
2232 | - pkt_len = 0; | ||
2233 | - iptunnel_xmit_stats(dev, pkt_len); | ||
2234 | + | ||
2235 | + if (dev) { | ||
2236 | + if (unlikely(net_xmit_eval(err))) | ||
2237 | + pkt_len = 0; | ||
2238 | + iptunnel_xmit_stats(dev, pkt_len); | ||
2239 | + } | ||
2240 | } | ||
2241 | EXPORT_SYMBOL_GPL(iptunnel_xmit); | ||
2242 | |||
2243 | diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c | ||
2244 | index ab27a2872935..2e30bf197583 100644 | ||
2245 | --- a/net/vmw_vsock/af_vsock.c | ||
2246 | +++ b/net/vmw_vsock/af_vsock.c | ||
2247 | @@ -281,7 +281,8 @@ EXPORT_SYMBOL_GPL(vsock_insert_connected); | ||
2248 | void vsock_remove_bound(struct vsock_sock *vsk) | ||
2249 | { | ||
2250 | spin_lock_bh(&vsock_table_lock); | ||
2251 | - __vsock_remove_bound(vsk); | ||
2252 | + if (__vsock_in_bound_table(vsk)) | ||
2253 | + __vsock_remove_bound(vsk); | ||
2254 | spin_unlock_bh(&vsock_table_lock); | ||
2255 | } | ||
2256 | EXPORT_SYMBOL_GPL(vsock_remove_bound); | ||
2257 | @@ -289,7 +290,8 @@ EXPORT_SYMBOL_GPL(vsock_remove_bound); | ||
2258 | void vsock_remove_connected(struct vsock_sock *vsk) | ||
2259 | { | ||
2260 | spin_lock_bh(&vsock_table_lock); | ||
2261 | - __vsock_remove_connected(vsk); | ||
2262 | + if (__vsock_in_connected_table(vsk)) | ||
2263 | + __vsock_remove_connected(vsk); | ||
2264 | spin_unlock_bh(&vsock_table_lock); | ||
2265 | } | ||
2266 | EXPORT_SYMBOL_GPL(vsock_remove_connected); | ||
2267 | @@ -325,35 +327,10 @@ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, | ||
2268 | } | ||
2269 | EXPORT_SYMBOL_GPL(vsock_find_connected_socket); | ||
2270 | |||
2271 | -static bool vsock_in_bound_table(struct vsock_sock *vsk) | ||
2272 | -{ | ||
2273 | - bool ret; | ||
2274 | - | ||
2275 | - spin_lock_bh(&vsock_table_lock); | ||
2276 | - ret = __vsock_in_bound_table(vsk); | ||
2277 | - spin_unlock_bh(&vsock_table_lock); | ||
2278 | - | ||
2279 | - return ret; | ||
2280 | -} | ||
2281 | - | ||
2282 | -static bool vsock_in_connected_table(struct vsock_sock *vsk) | ||
2283 | -{ | ||
2284 | - bool ret; | ||
2285 | - | ||
2286 | - spin_lock_bh(&vsock_table_lock); | ||
2287 | - ret = __vsock_in_connected_table(vsk); | ||
2288 | - spin_unlock_bh(&vsock_table_lock); | ||
2289 | - | ||
2290 | - return ret; | ||
2291 | -} | ||
2292 | - | ||
2293 | void vsock_remove_sock(struct vsock_sock *vsk) | ||
2294 | { | ||
2295 | - if (vsock_in_bound_table(vsk)) | ||
2296 | - vsock_remove_bound(vsk); | ||
2297 | - | ||
2298 | - if (vsock_in_connected_table(vsk)) | ||
2299 | - vsock_remove_connected(vsk); | ||
2300 | + vsock_remove_bound(vsk); | ||
2301 | + vsock_remove_connected(vsk); | ||
2302 | } | ||
2303 | EXPORT_SYMBOL_GPL(vsock_remove_sock); | ||
2304 | |||
2305 | @@ -484,8 +461,7 @@ static void vsock_pending_work(struct work_struct *work) | ||
2306 | * incoming packets can't find this socket, and to reduce the reference | ||
2307 | * count. | ||
2308 | */ | ||
2309 | - if (vsock_in_connected_table(vsk)) | ||
2310 | - vsock_remove_connected(vsk); | ||
2311 | + vsock_remove_connected(vsk); | ||
2312 | |||
2313 | sk->sk_state = TCP_CLOSE; | ||
2314 | |||
2315 | diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c | ||
2316 | index b131561a9469..9c7da811d130 100644 | ||
2317 | --- a/net/vmw_vsock/hyperv_transport.c | ||
2318 | +++ b/net/vmw_vsock/hyperv_transport.c | ||
2319 | @@ -35,6 +35,9 @@ | ||
2320 | /* The MTU is 16KB per the host side's design */ | ||
2321 | #define HVS_MTU_SIZE (1024 * 16) | ||
2322 | |||
2323 | +/* How long to wait for graceful shutdown of a connection */ | ||
2324 | +#define HVS_CLOSE_TIMEOUT (8 * HZ) | ||
2325 | + | ||
2326 | struct vmpipe_proto_header { | ||
2327 | u32 pkt_type; | ||
2328 | u32 data_size; | ||
2329 | @@ -290,19 +293,32 @@ static void hvs_channel_cb(void *ctx) | ||
2330 | sk->sk_write_space(sk); | ||
2331 | } | ||
2332 | |||
2333 | -static void hvs_close_connection(struct vmbus_channel *chan) | ||
2334 | +static void hvs_do_close_lock_held(struct vsock_sock *vsk, | ||
2335 | + bool cancel_timeout) | ||
2336 | { | ||
2337 | - struct sock *sk = get_per_channel_state(chan); | ||
2338 | - struct vsock_sock *vsk = vsock_sk(sk); | ||
2339 | - | ||
2340 | - lock_sock(sk); | ||
2341 | + struct sock *sk = sk_vsock(vsk); | ||
2342 | |||
2343 | - sk->sk_state = TCP_CLOSE; | ||
2344 | sock_set_flag(sk, SOCK_DONE); | ||
2345 | - vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN; | ||
2346 | - | ||
2347 | + vsk->peer_shutdown = SHUTDOWN_MASK; | ||
2348 | + if (vsock_stream_has_data(vsk) <= 0) | ||
2349 | + sk->sk_state = TCP_CLOSING; | ||
2350 | sk->sk_state_change(sk); | ||
2351 | + if (vsk->close_work_scheduled && | ||
2352 | + (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { | ||
2353 | + vsk->close_work_scheduled = false; | ||
2354 | + vsock_remove_sock(vsk); | ||
2355 | |||
2356 | + /* Release the reference taken while scheduling the timeout */ | ||
2357 | + sock_put(sk); | ||
2358 | + } | ||
2359 | +} | ||
2360 | + | ||
2361 | +static void hvs_close_connection(struct vmbus_channel *chan) | ||
2362 | +{ | ||
2363 | + struct sock *sk = get_per_channel_state(chan); | ||
2364 | + | ||
2365 | + lock_sock(sk); | ||
2366 | + hvs_do_close_lock_held(vsock_sk(sk), true); | ||
2367 | release_sock(sk); | ||
2368 | } | ||
2369 | |||
2370 | @@ -445,50 +461,80 @@ static int hvs_connect(struct vsock_sock *vsk) | ||
2371 | return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id); | ||
2372 | } | ||
2373 | |||
2374 | +static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode) | ||
2375 | +{ | ||
2376 | + struct vmpipe_proto_header hdr; | ||
2377 | + | ||
2378 | + if (hvs->fin_sent || !hvs->chan) | ||
2379 | + return; | ||
2380 | + | ||
2381 | + /* It can't fail: see hvs_channel_writable_bytes(). */ | ||
2382 | + (void)hvs_send_data(hvs->chan, (struct hvs_send_buf *)&hdr, 0); | ||
2383 | + hvs->fin_sent = true; | ||
2384 | +} | ||
2385 | + | ||
2386 | static int hvs_shutdown(struct vsock_sock *vsk, int mode) | ||
2387 | { | ||
2388 | struct sock *sk = sk_vsock(vsk); | ||
2389 | - struct vmpipe_proto_header hdr; | ||
2390 | - struct hvs_send_buf *send_buf; | ||
2391 | - struct hvsock *hvs; | ||
2392 | |||
2393 | if (!(mode & SEND_SHUTDOWN)) | ||
2394 | return 0; | ||
2395 | |||
2396 | lock_sock(sk); | ||
2397 | + hvs_shutdown_lock_held(vsk->trans, mode); | ||
2398 | + release_sock(sk); | ||
2399 | + return 0; | ||
2400 | +} | ||
2401 | |||
2402 | - hvs = vsk->trans; | ||
2403 | - if (hvs->fin_sent) | ||
2404 | - goto out; | ||
2405 | - | ||
2406 | - send_buf = (struct hvs_send_buf *)&hdr; | ||
2407 | +static void hvs_close_timeout(struct work_struct *work) | ||
2408 | +{ | ||
2409 | + struct vsock_sock *vsk = | ||
2410 | + container_of(work, struct vsock_sock, close_work.work); | ||
2411 | + struct sock *sk = sk_vsock(vsk); | ||
2412 | |||
2413 | - /* It can't fail: see hvs_channel_writable_bytes(). */ | ||
2414 | - (void)hvs_send_data(hvs->chan, send_buf, 0); | ||
2415 | + sock_hold(sk); | ||
2416 | + lock_sock(sk); | ||
2417 | + if (!sock_flag(sk, SOCK_DONE)) | ||
2418 | + hvs_do_close_lock_held(vsk, false); | ||
2419 | |||
2420 | - hvs->fin_sent = true; | ||
2421 | -out: | ||
2422 | + vsk->close_work_scheduled = false; | ||
2423 | release_sock(sk); | ||
2424 | - return 0; | ||
2425 | + sock_put(sk); | ||
2426 | } | ||
2427 | |||
2428 | -static void hvs_release(struct vsock_sock *vsk) | ||
2429 | +/* Returns true, if it is safe to remove socket; false otherwise */ | ||
2430 | +static bool hvs_close_lock_held(struct vsock_sock *vsk) | ||
2431 | { | ||
2432 | struct sock *sk = sk_vsock(vsk); | ||
2433 | - struct hvsock *hvs = vsk->trans; | ||
2434 | - struct vmbus_channel *chan; | ||
2435 | |||
2436 | - lock_sock(sk); | ||
2437 | + if (!(sk->sk_state == TCP_ESTABLISHED || | ||
2438 | + sk->sk_state == TCP_CLOSING)) | ||
2439 | + return true; | ||
2440 | |||
2441 | - sk->sk_state = TCP_CLOSING; | ||
2442 | - vsock_remove_sock(vsk); | ||
2443 | + if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) | ||
2444 | + hvs_shutdown_lock_held(vsk->trans, SHUTDOWN_MASK); | ||
2445 | |||
2446 | - release_sock(sk); | ||
2447 | + if (sock_flag(sk, SOCK_DONE)) | ||
2448 | + return true; | ||
2449 | |||
2450 | - chan = hvs->chan; | ||
2451 | - if (chan) | ||
2452 | - hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN); | ||
2453 | + /* This reference will be dropped by the delayed close routine */ | ||
2454 | + sock_hold(sk); | ||
2455 | + INIT_DELAYED_WORK(&vsk->close_work, hvs_close_timeout); | ||
2456 | + vsk->close_work_scheduled = true; | ||
2457 | + schedule_delayed_work(&vsk->close_work, HVS_CLOSE_TIMEOUT); | ||
2458 | + return false; | ||
2459 | +} | ||
2460 | |||
2461 | +static void hvs_release(struct vsock_sock *vsk) | ||
2462 | +{ | ||
2463 | + struct sock *sk = sk_vsock(vsk); | ||
2464 | + bool remove_sock; | ||
2465 | + | ||
2466 | + lock_sock(sk); | ||
2467 | + remove_sock = hvs_close_lock_held(vsk); | ||
2468 | + release_sock(sk); | ||
2469 | + if (remove_sock) | ||
2470 | + vsock_remove_sock(vsk); | ||
2471 | } | ||
2472 | |||
2473 | static void hvs_destruct(struct vsock_sock *vsk) |