Magellan Linux

Annotation of /trunk/kernel-magellan/patches-4.3/0102-4.3.3-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2746 - (hide annotations) (download)
Mon Dec 28 09:57:11 2015 UTC (8 years, 5 months ago) by niro
File size: 140376 byte(s)
-linux-4.3.3
1 niro 2746 diff --git a/Makefile b/Makefile
2     index 1a4953b3e10f..2070d16bb5a4 100644
3     --- a/Makefile
4     +++ b/Makefile
5     @@ -1,6 +1,6 @@
6     VERSION = 4
7     PATCHLEVEL = 3
8     -SUBLEVEL = 2
9     +SUBLEVEL = 3
10     EXTRAVERSION =
11     NAME = Blurry Fish Butt
12    
13     diff --git a/block/blk-merge.c b/block/blk-merge.c
14     index c4e9c37f3e38..0e5f4fc12449 100644
15     --- a/block/blk-merge.c
16     +++ b/block/blk-merge.c
17     @@ -91,7 +91,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
18    
19     seg_size += bv.bv_len;
20     bvprv = bv;
21     - bvprvp = &bv;
22     + bvprvp = &bvprv;
23     sectors += bv.bv_len >> 9;
24     continue;
25     }
26     @@ -101,7 +101,7 @@ new_segment:
27    
28     nsegs++;
29     bvprv = bv;
30     - bvprvp = &bv;
31     + bvprvp = &bvprv;
32     seg_size = bv.bv_len;
33     sectors += bv.bv_len >> 9;
34     }
35     diff --git a/certs/.gitignore b/certs/.gitignore
36     new file mode 100644
37     index 000000000000..f51aea4a71ec
38     --- /dev/null
39     +++ b/certs/.gitignore
40     @@ -0,0 +1,4 @@
41     +#
42     +# Generated files
43     +#
44     +x509_certificate_list
45     diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
46     index 128e7df5b807..8630a77ea462 100644
47     --- a/drivers/block/rbd.c
48     +++ b/drivers/block/rbd.c
49     @@ -3444,6 +3444,7 @@ static void rbd_queue_workfn(struct work_struct *work)
50     goto err_rq;
51     }
52     img_request->rq = rq;
53     + snapc = NULL; /* img_request consumes a ref */
54    
55     if (op_type == OBJ_OP_DISCARD)
56     result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA,
57     diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
58     index f51d376d10ba..c2f5117fd8cb 100644
59     --- a/drivers/firewire/ohci.c
60     +++ b/drivers/firewire/ohci.c
61     @@ -3675,6 +3675,11 @@ static int pci_probe(struct pci_dev *dev,
62    
63     reg_write(ohci, OHCI1394_IsoXmitIntMaskSet, ~0);
64     ohci->it_context_support = reg_read(ohci, OHCI1394_IsoXmitIntMaskSet);
65     + /* JMicron JMB38x often shows 0 at first read, just ignore it */
66     + if (!ohci->it_context_support) {
67     + ohci_notice(ohci, "overriding IsoXmitIntMask\n");
68     + ohci->it_context_support = 0xf;
69     + }
70     reg_write(ohci, OHCI1394_IsoXmitIntMaskClear, ~0);
71     ohci->it_context_mask = ohci->it_context_support;
72     ohci->n_it = hweight32(ohci->it_context_mask);
73     diff --git a/drivers/media/pci/cobalt/Kconfig b/drivers/media/pci/cobalt/Kconfig
74     index 1f88ccc174da..a01f0cc745cc 100644
75     --- a/drivers/media/pci/cobalt/Kconfig
76     +++ b/drivers/media/pci/cobalt/Kconfig
77     @@ -1,6 +1,6 @@
78     config VIDEO_COBALT
79     tristate "Cisco Cobalt support"
80     - depends on VIDEO_V4L2 && I2C && MEDIA_CONTROLLER
81     + depends on VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API
82     depends on PCI_MSI && MTD_COMPLEX_MAPPINGS
83     depends on GPIOLIB || COMPILE_TEST
84     depends on SND
85     diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
86     index a9377727c11c..7f709cbdcd87 100644
87     --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
88     +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
89     @@ -1583,8 +1583,14 @@ err_disable_device:
90     static void nicvf_remove(struct pci_dev *pdev)
91     {
92     struct net_device *netdev = pci_get_drvdata(pdev);
93     - struct nicvf *nic = netdev_priv(netdev);
94     - struct net_device *pnetdev = nic->pnicvf->netdev;
95     + struct nicvf *nic;
96     + struct net_device *pnetdev;
97     +
98     + if (!netdev)
99     + return;
100     +
101     + nic = netdev_priv(netdev);
102     + pnetdev = nic->pnicvf->netdev;
103    
104     /* Check if this Qset is assigned to different VF.
105     * If yes, clean primary and all secondary Qsets.
106     diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
107     index 731423ca575d..8bead97373ab 100644
108     --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
109     +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
110     @@ -4934,26 +4934,41 @@ static void rem_slave_counters(struct mlx4_dev *dev, int slave)
111     struct res_counter *counter;
112     struct res_counter *tmp;
113     int err;
114     - int index;
115     + int *counters_arr = NULL;
116     + int i, j;
117    
118     err = move_all_busy(dev, slave, RES_COUNTER);
119     if (err)
120     mlx4_warn(dev, "rem_slave_counters: Could not move all counters - too busy for slave %d\n",
121     slave);
122    
123     - spin_lock_irq(mlx4_tlock(dev));
124     - list_for_each_entry_safe(counter, tmp, counter_list, com.list) {
125     - if (counter->com.owner == slave) {
126     - index = counter->com.res_id;
127     - rb_erase(&counter->com.node,
128     - &tracker->res_tree[RES_COUNTER]);
129     - list_del(&counter->com.list);
130     - kfree(counter);
131     - __mlx4_counter_free(dev, index);
132     + counters_arr = kmalloc_array(dev->caps.max_counters,
133     + sizeof(*counters_arr), GFP_KERNEL);
134     + if (!counters_arr)
135     + return;
136     +
137     + do {
138     + i = 0;
139     + j = 0;
140     + spin_lock_irq(mlx4_tlock(dev));
141     + list_for_each_entry_safe(counter, tmp, counter_list, com.list) {
142     + if (counter->com.owner == slave) {
143     + counters_arr[i++] = counter->com.res_id;
144     + rb_erase(&counter->com.node,
145     + &tracker->res_tree[RES_COUNTER]);
146     + list_del(&counter->com.list);
147     + kfree(counter);
148     + }
149     + }
150     + spin_unlock_irq(mlx4_tlock(dev));
151     +
152     + while (j < i) {
153     + __mlx4_counter_free(dev, counters_arr[j++]);
154     mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0);
155     }
156     - }
157     - spin_unlock_irq(mlx4_tlock(dev));
158     + } while (i);
159     +
160     + kfree(counters_arr);
161     }
162    
163     static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave)
164     diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
165     index 59874d666cff..443632df2010 100644
166     --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
167     +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
168     @@ -1332,6 +1332,42 @@ static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt)
169     return err;
170     }
171    
172     +static int mlx5e_refresh_tir_self_loopback_enable(struct mlx5_core_dev *mdev,
173     + u32 tirn)
174     +{
175     + void *in;
176     + int inlen;
177     + int err;
178     +
179     + inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
180     + in = mlx5_vzalloc(inlen);
181     + if (!in)
182     + return -ENOMEM;
183     +
184     + MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
185     +
186     + err = mlx5_core_modify_tir(mdev, tirn, in, inlen);
187     +
188     + kvfree(in);
189     +
190     + return err;
191     +}
192     +
193     +static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv)
194     +{
195     + int err;
196     + int i;
197     +
198     + for (i = 0; i < MLX5E_NUM_TT; i++) {
199     + err = mlx5e_refresh_tir_self_loopback_enable(priv->mdev,
200     + priv->tirn[i]);
201     + if (err)
202     + return err;
203     + }
204     +
205     + return 0;
206     +}
207     +
208     static int mlx5e_set_dev_port_mtu(struct net_device *netdev)
209     {
210     struct mlx5e_priv *priv = netdev_priv(netdev);
211     @@ -1367,13 +1403,20 @@ int mlx5e_open_locked(struct net_device *netdev)
212    
213     err = mlx5e_set_dev_port_mtu(netdev);
214     if (err)
215     - return err;
216     + goto err_clear_state_opened_flag;
217    
218     err = mlx5e_open_channels(priv);
219     if (err) {
220     netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n",
221     __func__, err);
222     - return err;
223     + goto err_clear_state_opened_flag;
224     + }
225     +
226     + err = mlx5e_refresh_tirs_self_loopback_enable(priv);
227     + if (err) {
228     + netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n",
229     + __func__, err);
230     + goto err_close_channels;
231     }
232    
233     mlx5e_update_carrier(priv);
234     @@ -1382,6 +1425,12 @@ int mlx5e_open_locked(struct net_device *netdev)
235     schedule_delayed_work(&priv->update_stats_work, 0);
236    
237     return 0;
238     +
239     +err_close_channels:
240     + mlx5e_close_channels(priv);
241     +err_clear_state_opened_flag:
242     + clear_bit(MLX5E_STATE_OPENED, &priv->state);
243     + return err;
244     }
245    
246     static int mlx5e_open(struct net_device *netdev)
247     @@ -1899,6 +1948,9 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
248     "Not creating net device, some required device capabilities are missing\n");
249     return -ENOTSUPP;
250     }
251     + if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable))
252     + mlx5_core_warn(mdev, "Self loop back prevention is not supported\n");
253     +
254     return 0;
255     }
256    
257     diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
258     index b4f21232019a..79ef799f88ab 100644
259     --- a/drivers/net/ethernet/realtek/r8169.c
260     +++ b/drivers/net/ethernet/realtek/r8169.c
261     @@ -7429,15 +7429,15 @@ process_pkt:
262    
263     rtl8169_rx_vlan_tag(desc, skb);
264    
265     + if (skb->pkt_type == PACKET_MULTICAST)
266     + dev->stats.multicast++;
267     +
268     napi_gro_receive(&tp->napi, skb);
269    
270     u64_stats_update_begin(&tp->rx_stats.syncp);
271     tp->rx_stats.packets++;
272     tp->rx_stats.bytes += pkt_size;
273     u64_stats_update_end(&tp->rx_stats.syncp);
274     -
275     - if (skb->pkt_type == PACKET_MULTICAST)
276     - dev->stats.multicast++;
277     }
278     release_descriptor:
279     desc->opts2 = 0;
280     diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
281     index 9c71295f2fef..85e640440bd9 100644
282     --- a/drivers/net/phy/broadcom.c
283     +++ b/drivers/net/phy/broadcom.c
284     @@ -675,7 +675,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = {
285     { PHY_ID_BCM5461, 0xfffffff0 },
286     { PHY_ID_BCM54616S, 0xfffffff0 },
287     { PHY_ID_BCM5464, 0xfffffff0 },
288     - { PHY_ID_BCM5482, 0xfffffff0 },
289     + { PHY_ID_BCM5481, 0xfffffff0 },
290     { PHY_ID_BCM5482, 0xfffffff0 },
291     { PHY_ID_BCM50610, 0xfffffff0 },
292     { PHY_ID_BCM50610M, 0xfffffff0 },
293     diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
294     index 2a7c1be23c4f..66e0853d1680 100644
295     --- a/drivers/net/usb/qmi_wwan.c
296     +++ b/drivers/net/usb/qmi_wwan.c
297     @@ -775,6 +775,7 @@ static const struct usb_device_id products[] = {
298     {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */
299     {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */
300     {QMI_FIXED_INTF(0x1bc7, 0x1201, 2)}, /* Telit LE920 */
301     + {QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */
302     {QMI_FIXED_INTF(0x0b3c, 0xc000, 4)}, /* Olivetti Olicard 100 */
303     {QMI_FIXED_INTF(0x0b3c, 0xc001, 4)}, /* Olivetti Olicard 120 */
304     {QMI_FIXED_INTF(0x0b3c, 0xc002, 4)}, /* Olivetti Olicard 140 */
305     diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
306     index 488c6f50df73..c9e309cd9d82 100644
307     --- a/drivers/net/vrf.c
308     +++ b/drivers/net/vrf.c
309     @@ -581,7 +581,6 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
310     {
311     struct net_vrf *vrf = netdev_priv(dev);
312     struct net_vrf_dev *vrf_ptr;
313     - int err;
314    
315     if (!data || !data[IFLA_VRF_TABLE])
316     return -EINVAL;
317     @@ -590,26 +589,16 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
318    
319     dev->priv_flags |= IFF_VRF_MASTER;
320    
321     - err = -ENOMEM;
322     vrf_ptr = kmalloc(sizeof(*dev->vrf_ptr), GFP_KERNEL);
323     if (!vrf_ptr)
324     - goto out_fail;
325     + return -ENOMEM;
326    
327     vrf_ptr->ifindex = dev->ifindex;
328     vrf_ptr->tb_id = vrf->tb_id;
329    
330     - err = register_netdevice(dev);
331     - if (err < 0)
332     - goto out_fail;
333     -
334     rcu_assign_pointer(dev->vrf_ptr, vrf_ptr);
335    
336     - return 0;
337     -
338     -out_fail:
339     - kfree(vrf_ptr);
340     - free_netdev(dev);
341     - return err;
342     + return register_netdev(dev);
343     }
344    
345     static size_t vrf_nl_getsize(const struct net_device *dev)
346     diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
347     index 938efe33be80..94eea1f43280 100644
348     --- a/fs/btrfs/ctree.h
349     +++ b/fs/btrfs/ctree.h
350     @@ -3398,7 +3398,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
351     int btrfs_free_extent(struct btrfs_trans_handle *trans,
352     struct btrfs_root *root,
353     u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
354     - u64 owner, u64 offset, int no_quota);
355     + u64 owner, u64 offset);
356    
357     int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len,
358     int delalloc);
359     @@ -3411,7 +3411,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
360     int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
361     struct btrfs_root *root,
362     u64 bytenr, u64 num_bytes, u64 parent,
363     - u64 root_objectid, u64 owner, u64 offset, int no_quota);
364     + u64 root_objectid, u64 owner, u64 offset);
365    
366     int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
367     struct btrfs_root *root);
368     diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
369     index ac3e81da6d4e..7832031fef68 100644
370     --- a/fs/btrfs/delayed-ref.c
371     +++ b/fs/btrfs/delayed-ref.c
372     @@ -197,6 +197,119 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
373     trans->delayed_ref_updates--;
374     }
375    
376     +static bool merge_ref(struct btrfs_trans_handle *trans,
377     + struct btrfs_delayed_ref_root *delayed_refs,
378     + struct btrfs_delayed_ref_head *head,
379     + struct btrfs_delayed_ref_node *ref,
380     + u64 seq)
381     +{
382     + struct btrfs_delayed_ref_node *next;
383     + bool done = false;
384     +
385     + next = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
386     + list);
387     + while (!done && &next->list != &head->ref_list) {
388     + int mod;
389     + struct btrfs_delayed_ref_node *next2;
390     +
391     + next2 = list_next_entry(next, list);
392     +
393     + if (next == ref)
394     + goto next;
395     +
396     + if (seq && next->seq >= seq)
397     + goto next;
398     +
399     + if (next->type != ref->type)
400     + goto next;
401     +
402     + if ((ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
403     + ref->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
404     + comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref),
405     + btrfs_delayed_node_to_tree_ref(next),
406     + ref->type))
407     + goto next;
408     + if ((ref->type == BTRFS_EXTENT_DATA_REF_KEY ||
409     + ref->type == BTRFS_SHARED_DATA_REF_KEY) &&
410     + comp_data_refs(btrfs_delayed_node_to_data_ref(ref),
411     + btrfs_delayed_node_to_data_ref(next)))
412     + goto next;
413     +
414     + if (ref->action == next->action) {
415     + mod = next->ref_mod;
416     + } else {
417     + if (ref->ref_mod < next->ref_mod) {
418     + swap(ref, next);
419     + done = true;
420     + }
421     + mod = -next->ref_mod;
422     + }
423     +
424     + drop_delayed_ref(trans, delayed_refs, head, next);
425     + ref->ref_mod += mod;
426     + if (ref->ref_mod == 0) {
427     + drop_delayed_ref(trans, delayed_refs, head, ref);
428     + done = true;
429     + } else {
430     + /*
431     + * Can't have multiples of the same ref on a tree block.
432     + */
433     + WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
434     + ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
435     + }
436     +next:
437     + next = next2;
438     + }
439     +
440     + return done;
441     +}
442     +
443     +void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
444     + struct btrfs_fs_info *fs_info,
445     + struct btrfs_delayed_ref_root *delayed_refs,
446     + struct btrfs_delayed_ref_head *head)
447     +{
448     + struct btrfs_delayed_ref_node *ref;
449     + u64 seq = 0;
450     +
451     + assert_spin_locked(&head->lock);
452     +
453     + if (list_empty(&head->ref_list))
454     + return;
455     +
456     + /* We don't have too many refs to merge for data. */
457     + if (head->is_data)
458     + return;
459     +
460     + spin_lock(&fs_info->tree_mod_seq_lock);
461     + if (!list_empty(&fs_info->tree_mod_seq_list)) {
462     + struct seq_list *elem;
463     +
464     + elem = list_first_entry(&fs_info->tree_mod_seq_list,
465     + struct seq_list, list);
466     + seq = elem->seq;
467     + }
468     + spin_unlock(&fs_info->tree_mod_seq_lock);
469     +
470     + ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
471     + list);
472     + while (&ref->list != &head->ref_list) {
473     + if (seq && ref->seq >= seq)
474     + goto next;
475     +
476     + if (merge_ref(trans, delayed_refs, head, ref, seq)) {
477     + if (list_empty(&head->ref_list))
478     + break;
479     + ref = list_first_entry(&head->ref_list,
480     + struct btrfs_delayed_ref_node,
481     + list);
482     + continue;
483     + }
484     +next:
485     + ref = list_next_entry(ref, list);
486     + }
487     +}
488     +
489     int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
490     struct btrfs_delayed_ref_root *delayed_refs,
491     u64 seq)
492     @@ -292,8 +405,7 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
493     exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
494     list);
495     /* No need to compare bytenr nor is_head */
496     - if (exist->type != ref->type || exist->no_quota != ref->no_quota ||
497     - exist->seq != ref->seq)
498     + if (exist->type != ref->type || exist->seq != ref->seq)
499     goto add_tail;
500    
501     if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY ||
502     @@ -524,7 +636,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
503     struct btrfs_delayed_ref_head *head_ref,
504     struct btrfs_delayed_ref_node *ref, u64 bytenr,
505     u64 num_bytes, u64 parent, u64 ref_root, int level,
506     - int action, int no_quota)
507     + int action)
508     {
509     struct btrfs_delayed_tree_ref *full_ref;
510     struct btrfs_delayed_ref_root *delayed_refs;
511     @@ -546,7 +658,6 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
512     ref->action = action;
513     ref->is_head = 0;
514     ref->in_tree = 1;
515     - ref->no_quota = no_quota;
516     ref->seq = seq;
517    
518     full_ref = btrfs_delayed_node_to_tree_ref(ref);
519     @@ -579,7 +690,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
520     struct btrfs_delayed_ref_head *head_ref,
521     struct btrfs_delayed_ref_node *ref, u64 bytenr,
522     u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
523     - u64 offset, int action, int no_quota)
524     + u64 offset, int action)
525     {
526     struct btrfs_delayed_data_ref *full_ref;
527     struct btrfs_delayed_ref_root *delayed_refs;
528     @@ -602,7 +713,6 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
529     ref->action = action;
530     ref->is_head = 0;
531     ref->in_tree = 1;
532     - ref->no_quota = no_quota;
533     ref->seq = seq;
534    
535     full_ref = btrfs_delayed_node_to_data_ref(ref);
536     @@ -633,17 +743,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
537     struct btrfs_trans_handle *trans,
538     u64 bytenr, u64 num_bytes, u64 parent,
539     u64 ref_root, int level, int action,
540     - struct btrfs_delayed_extent_op *extent_op,
541     - int no_quota)
542     + struct btrfs_delayed_extent_op *extent_op)
543     {
544     struct btrfs_delayed_tree_ref *ref;
545     struct btrfs_delayed_ref_head *head_ref;
546     struct btrfs_delayed_ref_root *delayed_refs;
547     struct btrfs_qgroup_extent_record *record = NULL;
548    
549     - if (!is_fstree(ref_root) || !fs_info->quota_enabled)
550     - no_quota = 0;
551     -
552     BUG_ON(extent_op && extent_op->is_data);
553     ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
554     if (!ref)
555     @@ -672,8 +778,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
556     bytenr, num_bytes, action, 0);
557    
558     add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
559     - num_bytes, parent, ref_root, level, action,
560     - no_quota);
561     + num_bytes, parent, ref_root, level, action);
562     spin_unlock(&delayed_refs->lock);
563    
564     return 0;
565     @@ -694,17 +799,13 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
566     u64 bytenr, u64 num_bytes,
567     u64 parent, u64 ref_root,
568     u64 owner, u64 offset, int action,
569     - struct btrfs_delayed_extent_op *extent_op,
570     - int no_quota)
571     + struct btrfs_delayed_extent_op *extent_op)
572     {
573     struct btrfs_delayed_data_ref *ref;
574     struct btrfs_delayed_ref_head *head_ref;
575     struct btrfs_delayed_ref_root *delayed_refs;
576     struct btrfs_qgroup_extent_record *record = NULL;
577    
578     - if (!is_fstree(ref_root) || !fs_info->quota_enabled)
579     - no_quota = 0;
580     -
581     BUG_ON(extent_op && !extent_op->is_data);
582     ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
583     if (!ref)
584     @@ -740,7 +841,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
585    
586     add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
587     num_bytes, parent, ref_root, owner, offset,
588     - action, no_quota);
589     + action);
590     spin_unlock(&delayed_refs->lock);
591    
592     return 0;
593     diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
594     index 13fb5e6090fe..930887a4275f 100644
595     --- a/fs/btrfs/delayed-ref.h
596     +++ b/fs/btrfs/delayed-ref.h
597     @@ -68,7 +68,6 @@ struct btrfs_delayed_ref_node {
598    
599     unsigned int action:8;
600     unsigned int type:8;
601     - unsigned int no_quota:1;
602     /* is this node still in the rbtree? */
603     unsigned int is_head:1;
604     unsigned int in_tree:1;
605     @@ -233,15 +232,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
606     struct btrfs_trans_handle *trans,
607     u64 bytenr, u64 num_bytes, u64 parent,
608     u64 ref_root, int level, int action,
609     - struct btrfs_delayed_extent_op *extent_op,
610     - int no_quota);
611     + struct btrfs_delayed_extent_op *extent_op);
612     int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
613     struct btrfs_trans_handle *trans,
614     u64 bytenr, u64 num_bytes,
615     u64 parent, u64 ref_root,
616     u64 owner, u64 offset, int action,
617     - struct btrfs_delayed_extent_op *extent_op,
618     - int no_quota);
619     + struct btrfs_delayed_extent_op *extent_op);
620     int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
621     struct btrfs_trans_handle *trans,
622     u64 bytenr, u64 num_bytes,
623     diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
624     index 601d7d45d164..cadacf643bd0 100644
625     --- a/fs/btrfs/extent-tree.c
626     +++ b/fs/btrfs/extent-tree.c
627     @@ -95,8 +95,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
628     struct btrfs_root *root,
629     u64 parent, u64 root_objectid,
630     u64 flags, struct btrfs_disk_key *key,
631     - int level, struct btrfs_key *ins,
632     - int no_quota);
633     + int level, struct btrfs_key *ins);
634     static int do_chunk_alloc(struct btrfs_trans_handle *trans,
635     struct btrfs_root *extent_root, u64 flags,
636     int force);
637     @@ -2009,8 +2008,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
638     int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
639     struct btrfs_root *root,
640     u64 bytenr, u64 num_bytes, u64 parent,
641     - u64 root_objectid, u64 owner, u64 offset,
642     - int no_quota)
643     + u64 root_objectid, u64 owner, u64 offset)
644     {
645     int ret;
646     struct btrfs_fs_info *fs_info = root->fs_info;
647     @@ -2022,12 +2020,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
648     ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
649     num_bytes,
650     parent, root_objectid, (int)owner,
651     - BTRFS_ADD_DELAYED_REF, NULL, no_quota);
652     + BTRFS_ADD_DELAYED_REF, NULL);
653     } else {
654     ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
655     num_bytes,
656     parent, root_objectid, owner, offset,
657     - BTRFS_ADD_DELAYED_REF, NULL, no_quota);
658     + BTRFS_ADD_DELAYED_REF, NULL);
659     }
660     return ret;
661     }
662     @@ -2048,15 +2046,11 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
663     u64 num_bytes = node->num_bytes;
664     u64 refs;
665     int ret;
666     - int no_quota = node->no_quota;
667    
668     path = btrfs_alloc_path();
669     if (!path)
670     return -ENOMEM;
671    
672     - if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled)
673     - no_quota = 1;
674     -
675     path->reada = 1;
676     path->leave_spinning = 1;
677     /* this will setup the path even if it fails to insert the back ref */
678     @@ -2291,8 +2285,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
679     parent, ref_root,
680     extent_op->flags_to_set,
681     &extent_op->key,
682     - ref->level, &ins,
683     - node->no_quota);
684     + ref->level, &ins);
685     } else if (node->action == BTRFS_ADD_DELAYED_REF) {
686     ret = __btrfs_inc_extent_ref(trans, root, node,
687     parent, ref_root,
688     @@ -2433,7 +2426,21 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
689     }
690     }
691    
692     + /*
693     + * We need to try and merge add/drops of the same ref since we
694     + * can run into issues with relocate dropping the implicit ref
695     + * and then it being added back again before the drop can
696     + * finish. If we merged anything we need to re-loop so we can
697     + * get a good ref.
698     + * Or we can get node references of the same type that weren't
699     + * merged when created due to bumps in the tree mod seq, and
700     + * we need to merge them to prevent adding an inline extent
701     + * backref before dropping it (triggering a BUG_ON at
702     + * insert_inline_extent_backref()).
703     + */
704     spin_lock(&locked_ref->lock);
705     + btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
706     + locked_ref);
707    
708     /*
709     * locked_ref is the head node, so we have to go one
710     @@ -3109,7 +3116,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
711     int level;
712     int ret = 0;
713     int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
714     - u64, u64, u64, u64, u64, u64, int);
715     + u64, u64, u64, u64, u64, u64);
716    
717    
718     if (btrfs_test_is_dummy_root(root))
719     @@ -3150,15 +3157,14 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
720     key.offset -= btrfs_file_extent_offset(buf, fi);
721     ret = process_func(trans, root, bytenr, num_bytes,
722     parent, ref_root, key.objectid,
723     - key.offset, 1);
724     + key.offset);
725     if (ret)
726     goto fail;
727     } else {
728     bytenr = btrfs_node_blockptr(buf, i);
729     num_bytes = root->nodesize;
730     ret = process_func(trans, root, bytenr, num_bytes,
731     - parent, ref_root, level - 1, 0,
732     - 1);
733     + parent, ref_root, level - 1, 0);
734     if (ret)
735     goto fail;
736     }
737     @@ -6233,7 +6239,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
738     int extent_slot = 0;
739     int found_extent = 0;
740     int num_to_del = 1;
741     - int no_quota = node->no_quota;
742     u32 item_size;
743     u64 refs;
744     u64 bytenr = node->bytenr;
745     @@ -6242,9 +6247,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
746     bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
747     SKINNY_METADATA);
748    
749     - if (!info->quota_enabled || !is_fstree(root_objectid))
750     - no_quota = 1;
751     -
752     path = btrfs_alloc_path();
753     if (!path)
754     return -ENOMEM;
755     @@ -6570,7 +6572,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
756     buf->start, buf->len,
757     parent, root->root_key.objectid,
758     btrfs_header_level(buf),
759     - BTRFS_DROP_DELAYED_REF, NULL, 0);
760     + BTRFS_DROP_DELAYED_REF, NULL);
761     BUG_ON(ret); /* -ENOMEM */
762     }
763    
764     @@ -6618,7 +6620,7 @@ out:
765     /* Can return -ENOMEM */
766     int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
767     u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
768     - u64 owner, u64 offset, int no_quota)
769     + u64 owner, u64 offset)
770     {
771     int ret;
772     struct btrfs_fs_info *fs_info = root->fs_info;
773     @@ -6641,13 +6643,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
774     ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
775     num_bytes,
776     parent, root_objectid, (int)owner,
777     - BTRFS_DROP_DELAYED_REF, NULL, no_quota);
778     + BTRFS_DROP_DELAYED_REF, NULL);
779     } else {
780     ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
781     num_bytes,
782     parent, root_objectid, owner,
783     offset, BTRFS_DROP_DELAYED_REF,
784     - NULL, no_quota);
785     + NULL);
786     }
787     return ret;
788     }
789     @@ -7429,8 +7431,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
790     struct btrfs_root *root,
791     u64 parent, u64 root_objectid,
792     u64 flags, struct btrfs_disk_key *key,
793     - int level, struct btrfs_key *ins,
794     - int no_quota)
795     + int level, struct btrfs_key *ins)
796     {
797     int ret;
798     struct btrfs_fs_info *fs_info = root->fs_info;
799     @@ -7520,7 +7521,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
800     ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
801     ins->offset, 0,
802     root_objectid, owner, offset,
803     - BTRFS_ADD_DELAYED_EXTENT, NULL, 0);
804     + BTRFS_ADD_DELAYED_EXTENT, NULL);
805     return ret;
806     }
807    
808     @@ -7734,7 +7735,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
809     ins.objectid, ins.offset,
810     parent, root_objectid, level,
811     BTRFS_ADD_DELAYED_EXTENT,
812     - extent_op, 0);
813     + extent_op);
814     if (ret)
815     goto out_free_delayed;
816     }
817     @@ -8282,7 +8283,7 @@ skip:
818     }
819     }
820     ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
821     - root->root_key.objectid, level - 1, 0, 0);
822     + root->root_key.objectid, level - 1, 0);
823     BUG_ON(ret); /* -ENOMEM */
824     }
825     btrfs_tree_unlock(next);
826     diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
827     index 8c6f247ba81d..e27ea7ae7f26 100644
828     --- a/fs/btrfs/file.c
829     +++ b/fs/btrfs/file.c
830     @@ -756,8 +756,16 @@ next_slot:
831     }
832    
833     btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
834     - if (key.objectid > ino ||
835     - key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
836     +
837     + if (key.objectid > ino)
838     + break;
839     + if (WARN_ON_ONCE(key.objectid < ino) ||
840     + key.type < BTRFS_EXTENT_DATA_KEY) {
841     + ASSERT(del_nr == 0);
842     + path->slots[0]++;
843     + goto next_slot;
844     + }
845     + if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
846     break;
847    
848     fi = btrfs_item_ptr(leaf, path->slots[0],
849     @@ -776,8 +784,8 @@ next_slot:
850     btrfs_file_extent_inline_len(leaf,
851     path->slots[0], fi);
852     } else {
853     - WARN_ON(1);
854     - extent_end = search_start;
855     + /* can't happen */
856     + BUG();
857     }
858    
859     /*
860     @@ -847,7 +855,7 @@ next_slot:
861     disk_bytenr, num_bytes, 0,
862     root->root_key.objectid,
863     new_key.objectid,
864     - start - extent_offset, 1);
865     + start - extent_offset);
866     BUG_ON(ret); /* -ENOMEM */
867     }
868     key.offset = start;
869     @@ -925,7 +933,7 @@ delete_extent_item:
870     disk_bytenr, num_bytes, 0,
871     root->root_key.objectid,
872     key.objectid, key.offset -
873     - extent_offset, 0);
874     + extent_offset);
875     BUG_ON(ret); /* -ENOMEM */
876     inode_sub_bytes(inode,
877     extent_end - key.offset);
878     @@ -1204,7 +1212,7 @@ again:
879    
880     ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
881     root->root_key.objectid,
882     - ino, orig_offset, 1);
883     + ino, orig_offset);
884     BUG_ON(ret); /* -ENOMEM */
885    
886     if (split == start) {
887     @@ -1231,7 +1239,7 @@ again:
888     del_nr++;
889     ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
890     0, root->root_key.objectid,
891     - ino, orig_offset, 0);
892     + ino, orig_offset);
893     BUG_ON(ret); /* -ENOMEM */
894     }
895     other_start = 0;
896     @@ -1248,7 +1256,7 @@ again:
897     del_nr++;
898     ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
899     0, root->root_key.objectid,
900     - ino, orig_offset, 0);
901     + ino, orig_offset);
902     BUG_ON(ret); /* -ENOMEM */
903     }
904     if (del_nr == 0) {
905     @@ -1868,8 +1876,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
906     struct btrfs_log_ctx ctx;
907     int ret = 0;
908     bool full_sync = 0;
909     - const u64 len = end - start + 1;
910     + u64 len;
911    
912     + /*
913     + * The range length can be represented by u64, we have to do the typecasts
914     + * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync()
915     + */
916     + len = (u64)end - (u64)start + 1;
917     trace_btrfs_sync_file(file, datasync);
918    
919     /*
920     @@ -2057,8 +2070,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
921     }
922     }
923     if (!full_sync) {
924     - ret = btrfs_wait_ordered_range(inode, start,
925     - end - start + 1);
926     + ret = btrfs_wait_ordered_range(inode, start, len);
927     if (ret) {
928     btrfs_end_transaction(trans, root);
929     goto out;
930     diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
931     index 611b66d73e80..396e3d5c4e83 100644
932     --- a/fs/btrfs/inode.c
933     +++ b/fs/btrfs/inode.c
934     @@ -1294,8 +1294,14 @@ next_slot:
935     num_bytes = 0;
936     btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
937    
938     - if (found_key.objectid > ino ||
939     - found_key.type > BTRFS_EXTENT_DATA_KEY ||
940     + if (found_key.objectid > ino)
941     + break;
942     + if (WARN_ON_ONCE(found_key.objectid < ino) ||
943     + found_key.type < BTRFS_EXTENT_DATA_KEY) {
944     + path->slots[0]++;
945     + goto next_slot;
946     + }
947     + if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
948     found_key.offset > end)
949     break;
950    
951     @@ -2573,7 +2579,7 @@ again:
952     ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
953     new->disk_len, 0,
954     backref->root_id, backref->inum,
955     - new->file_pos, 0); /* start - extent_offset */
956     + new->file_pos); /* start - extent_offset */
957     if (ret) {
958     btrfs_abort_transaction(trans, root, ret);
959     goto out_free_path;
960     @@ -4217,6 +4223,47 @@ static int truncate_space_check(struct btrfs_trans_handle *trans,
961    
962     }
963    
964     +static int truncate_inline_extent(struct inode *inode,
965     + struct btrfs_path *path,
966     + struct btrfs_key *found_key,
967     + const u64 item_end,
968     + const u64 new_size)
969     +{
970     + struct extent_buffer *leaf = path->nodes[0];
971     + int slot = path->slots[0];
972     + struct btrfs_file_extent_item *fi;
973     + u32 size = (u32)(new_size - found_key->offset);
974     + struct btrfs_root *root = BTRFS_I(inode)->root;
975     +
976     + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
977     +
978     + if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
979     + loff_t offset = new_size;
980     + loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE);
981     +
982     + /*
983     + * Zero out the remaining of the last page of our inline extent,
984     + * instead of directly truncating our inline extent here - that
985     + * would be much more complex (decompressing all the data, then
986     + * compressing the truncated data, which might be bigger than
987     + * the size of the inline extent, resize the extent, etc).
988     + * We release the path because to get the page we might need to
989     + * read the extent item from disk (data not in the page cache).
990     + */
991     + btrfs_release_path(path);
992     + return btrfs_truncate_page(inode, offset, page_end - offset, 0);
993     + }
994     +
995     + btrfs_set_file_extent_ram_bytes(leaf, fi, size);
996     + size = btrfs_file_extent_calc_inline_size(size);
997     + btrfs_truncate_item(root, path, size, 1);
998     +
999     + if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
1000     + inode_sub_bytes(inode, item_end + 1 - new_size);
1001     +
1002     + return 0;
1003     +}
1004     +
1005     /*
1006     * this can truncate away extent items, csum items and directory items.
1007     * It starts at a high offset and removes keys until it can't find
1008     @@ -4411,27 +4458,40 @@ search_again:
1009     * special encodings
1010     */
1011     if (!del_item &&
1012     - btrfs_file_extent_compression(leaf, fi) == 0 &&
1013     btrfs_file_extent_encryption(leaf, fi) == 0 &&
1014     btrfs_file_extent_other_encoding(leaf, fi) == 0) {
1015     - u32 size = new_size - found_key.offset;
1016     -
1017     - if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
1018     - inode_sub_bytes(inode, item_end + 1 -
1019     - new_size);
1020    
1021     /*
1022     - * update the ram bytes to properly reflect
1023     - * the new size of our item
1024     + * Need to release path in order to truncate a
1025     + * compressed extent. So delete any accumulated
1026     + * extent items so far.
1027     */
1028     - btrfs_set_file_extent_ram_bytes(leaf, fi, size);
1029     - size =
1030     - btrfs_file_extent_calc_inline_size(size);
1031     - btrfs_truncate_item(root, path, size, 1);
1032     + if (btrfs_file_extent_compression(leaf, fi) !=
1033     + BTRFS_COMPRESS_NONE && pending_del_nr) {
1034     + err = btrfs_del_items(trans, root, path,
1035     + pending_del_slot,
1036     + pending_del_nr);
1037     + if (err) {
1038     + btrfs_abort_transaction(trans,
1039     + root,
1040     + err);
1041     + goto error;
1042     + }
1043     + pending_del_nr = 0;
1044     + }
1045     +
1046     + err = truncate_inline_extent(inode, path,
1047     + &found_key,
1048     + item_end,
1049     + new_size);
1050     + if (err) {
1051     + btrfs_abort_transaction(trans,
1052     + root, err);
1053     + goto error;
1054     + }
1055     } else if (test_bit(BTRFS_ROOT_REF_COWS,
1056     &root->state)) {
1057     - inode_sub_bytes(inode, item_end + 1 -
1058     - found_key.offset);
1059     + inode_sub_bytes(inode, item_end + 1 - new_size);
1060     }
1061     }
1062     delete:
1063     @@ -4461,7 +4521,7 @@ delete:
1064     ret = btrfs_free_extent(trans, root, extent_start,
1065     extent_num_bytes, 0,
1066     btrfs_header_owner(leaf),
1067     - ino, extent_offset, 0);
1068     + ino, extent_offset);
1069     BUG_ON(ret);
1070     if (btrfs_should_throttle_delayed_refs(trans, root))
1071     btrfs_async_run_delayed_refs(root,
1072     diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
1073     index 8d20f3b1cab0..6548a36823bc 100644
1074     --- a/fs/btrfs/ioctl.c
1075     +++ b/fs/btrfs/ioctl.c
1076     @@ -3203,41 +3203,6 @@ out:
1077     return ret;
1078     }
1079    
1080     -/* Helper to check and see if this root currently has a ref on the given disk
1081     - * bytenr. If it does then we need to update the quota for this root. This
1082     - * doesn't do anything if quotas aren't enabled.
1083     - */
1084     -static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1085     - u64 disko)
1086     -{
1087     - struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
1088     - struct ulist *roots;
1089     - struct ulist_iterator uiter;
1090     - struct ulist_node *root_node = NULL;
1091     - int ret;
1092     -
1093     - if (!root->fs_info->quota_enabled)
1094     - return 1;
1095     -
1096     - btrfs_get_tree_mod_seq(root->fs_info, &tree_mod_seq_elem);
1097     - ret = btrfs_find_all_roots(trans, root->fs_info, disko,
1098     - tree_mod_seq_elem.seq, &roots);
1099     - if (ret < 0)
1100     - goto out;
1101     - ret = 0;
1102     - ULIST_ITER_INIT(&uiter);
1103     - while ((root_node = ulist_next(roots, &uiter))) {
1104     - if (root_node->val == root->objectid) {
1105     - ret = 1;
1106     - break;
1107     - }
1108     - }
1109     - ulist_free(roots);
1110     -out:
1111     - btrfs_put_tree_mod_seq(root->fs_info, &tree_mod_seq_elem);
1112     - return ret;
1113     -}
1114     -
1115     static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
1116     struct inode *inode,
1117     u64 endoff,
1118     @@ -3328,6 +3293,150 @@ static void clone_update_extent_map(struct inode *inode,
1119     &BTRFS_I(inode)->runtime_flags);
1120     }
1121    
1122     +/*
1123     + * Make sure we do not end up inserting an inline extent into a file that has
1124     + * already other (non-inline) extents. If a file has an inline extent it can
1125     + * not have any other extents and the (single) inline extent must start at the
1126     + * file offset 0. Failing to respect these rules will lead to file corruption,
1127     + * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc
1128     + *
1129     + * We can have extents that have been already written to disk or we can have
1130     + * dirty ranges still in delalloc, in which case the extent maps and items are
1131     + * created only when we run delalloc, and the delalloc ranges might fall outside
1132     + * the range we are currently locking in the inode's io tree. So we check the
1133     + * inode's i_size because of that (i_size updates are done while holding the
1134     + * i_mutex, which we are holding here).
1135     + * We also check to see if the inode has a size not greater than "datal" but has
1136     + * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are
1137     + * protected against such concurrent fallocate calls by the i_mutex).
1138     + *
1139     + * If the file has no extents but a size greater than datal, do not allow the
1140     + * copy because we would need turn the inline extent into a non-inline one (even
1141     + * with NO_HOLES enabled). If we find our destination inode only has one inline
1142     + * extent, just overwrite it with the source inline extent if its size is less
1143     + * than the source extent's size, or we could copy the source inline extent's
1144     + * data into the destination inode's inline extent if the later is greater then
1145     + * the former.
1146     + */
1147     +static int clone_copy_inline_extent(struct inode *src,
1148     + struct inode *dst,
1149     + struct btrfs_trans_handle *trans,
1150     + struct btrfs_path *path,
1151     + struct btrfs_key *new_key,
1152     + const u64 drop_start,
1153     + const u64 datal,
1154     + const u64 skip,
1155     + const u64 size,
1156     + char *inline_data)
1157     +{
1158     + struct btrfs_root *root = BTRFS_I(dst)->root;
1159     + const u64 aligned_end = ALIGN(new_key->offset + datal,
1160     + root->sectorsize);
1161     + int ret;
1162     + struct btrfs_key key;
1163     +
1164     + if (new_key->offset > 0)
1165     + return -EOPNOTSUPP;
1166     +
1167     + key.objectid = btrfs_ino(dst);
1168     + key.type = BTRFS_EXTENT_DATA_KEY;
1169     + key.offset = 0;
1170     + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1171     + if (ret < 0) {
1172     + return ret;
1173     + } else if (ret > 0) {
1174     + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
1175     + ret = btrfs_next_leaf(root, path);
1176     + if (ret < 0)
1177     + return ret;
1178     + else if (ret > 0)
1179     + goto copy_inline_extent;
1180     + }
1181     + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1182     + if (key.objectid == btrfs_ino(dst) &&
1183     + key.type == BTRFS_EXTENT_DATA_KEY) {
1184     + ASSERT(key.offset > 0);
1185     + return -EOPNOTSUPP;
1186     + }
1187     + } else if (i_size_read(dst) <= datal) {
1188     + struct btrfs_file_extent_item *ei;
1189     + u64 ext_len;
1190     +
1191     + /*
1192     + * If the file size is <= datal, make sure there are no other
1193     + * extents following (can happen do to an fallocate call with
1194     + * the flag FALLOC_FL_KEEP_SIZE).
1195     + */
1196     + ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1197     + struct btrfs_file_extent_item);
1198     + /*
1199     + * If it's an inline extent, it can not have other extents
1200     + * following it.
1201     + */
1202     + if (btrfs_file_extent_type(path->nodes[0], ei) ==
1203     + BTRFS_FILE_EXTENT_INLINE)
1204     + goto copy_inline_extent;
1205     +
1206     + ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
1207     + if (ext_len > aligned_end)
1208     + return -EOPNOTSUPP;
1209     +
1210     + ret = btrfs_next_item(root, path);
1211     + if (ret < 0) {
1212     + return ret;
1213     + } else if (ret == 0) {
1214     + btrfs_item_key_to_cpu(path->nodes[0], &key,
1215     + path->slots[0]);
1216     + if (key.objectid == btrfs_ino(dst) &&
1217     + key.type == BTRFS_EXTENT_DATA_KEY)
1218     + return -EOPNOTSUPP;
1219     + }
1220     + }
1221     +
1222     +copy_inline_extent:
1223     + /*
1224     + * We have no extent items, or we have an extent at offset 0 which may
1225     + * or may not be inlined. All these cases are dealt the same way.
1226     + */
1227     + if (i_size_read(dst) > datal) {
1228     + /*
1229     + * If the destination inode has an inline extent...
1230     + * This would require copying the data from the source inline
1231     + * extent into the beginning of the destination's inline extent.
1232     + * But this is really complex, both extents can be compressed
1233     + * or just one of them, which would require decompressing and
1234     + * re-compressing data (which could increase the new compressed
1235     + * size, not allowing the compressed data to fit anymore in an
1236     + * inline extent).
1237     + * So just don't support this case for now (it should be rare,
1238     + * we are not really saving space when cloning inline extents).
1239     + */
1240     + return -EOPNOTSUPP;
1241     + }
1242     +
1243     + btrfs_release_path(path);
1244     + ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1);
1245     + if (ret)
1246     + return ret;
1247     + ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
1248     + if (ret)
1249     + return ret;
1250     +
1251     + if (skip) {
1252     + const u32 start = btrfs_file_extent_calc_inline_size(0);
1253     +
1254     + memmove(inline_data + start, inline_data + start + skip, datal);
1255     + }
1256     +
1257     + write_extent_buffer(path->nodes[0], inline_data,
1258     + btrfs_item_ptr_offset(path->nodes[0],
1259     + path->slots[0]),
1260     + size);
1261     + inode_add_bytes(dst, datal);
1262     +
1263     + return 0;
1264     +}
1265     +
1266     /**
1267     * btrfs_clone() - clone a range from inode file to another
1268     *
1269     @@ -3352,9 +3461,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
1270     u32 nritems;
1271     int slot;
1272     int ret;
1273     - int no_quota;
1274     const u64 len = olen_aligned;
1275     - u64 last_disko = 0;
1276     u64 last_dest_end = destoff;
1277    
1278     ret = -ENOMEM;
1279     @@ -3400,7 +3507,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
1280    
1281     nritems = btrfs_header_nritems(path->nodes[0]);
1282     process_slot:
1283     - no_quota = 1;
1284     if (path->slots[0] >= nritems) {
1285     ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
1286     if (ret < 0)
1287     @@ -3552,35 +3658,13 @@ process_slot:
1288     btrfs_set_file_extent_num_bytes(leaf, extent,
1289     datal);
1290    
1291     - /*
1292     - * We need to look up the roots that point at
1293     - * this bytenr and see if the new root does. If
1294     - * it does not we need to make sure we update
1295     - * quotas appropriately.
1296     - */
1297     - if (disko && root != BTRFS_I(src)->root &&
1298     - disko != last_disko) {
1299     - no_quota = check_ref(trans, root,
1300     - disko);
1301     - if (no_quota < 0) {
1302     - btrfs_abort_transaction(trans,
1303     - root,
1304     - ret);
1305     - btrfs_end_transaction(trans,
1306     - root);
1307     - ret = no_quota;
1308     - goto out;
1309     - }
1310     - }
1311     -
1312     if (disko) {
1313     inode_add_bytes(inode, datal);
1314     ret = btrfs_inc_extent_ref(trans, root,
1315     disko, diskl, 0,
1316     root->root_key.objectid,
1317     btrfs_ino(inode),
1318     - new_key.offset - datao,
1319     - no_quota);
1320     + new_key.offset - datao);
1321     if (ret) {
1322     btrfs_abort_transaction(trans,
1323     root,
1324     @@ -3594,21 +3678,6 @@ process_slot:
1325     } else if (type == BTRFS_FILE_EXTENT_INLINE) {
1326     u64 skip = 0;
1327     u64 trim = 0;
1328     - u64 aligned_end = 0;
1329     -
1330     - /*
1331     - * Don't copy an inline extent into an offset
1332     - * greater than zero. Having an inline extent
1333     - * at such an offset results in chaos as btrfs
1334     - * isn't prepared for such cases. Just skip
1335     - * this case for the same reasons as commented
1336     - * at btrfs_ioctl_clone().
1337     - */
1338     - if (last_dest_end > 0) {
1339     - ret = -EOPNOTSUPP;
1340     - btrfs_end_transaction(trans, root);
1341     - goto out;
1342     - }
1343    
1344     if (off > key.offset) {
1345     skip = off - key.offset;
1346     @@ -3626,42 +3695,22 @@ process_slot:
1347     size -= skip + trim;
1348     datal -= skip + trim;
1349    
1350     - aligned_end = ALIGN(new_key.offset + datal,
1351     - root->sectorsize);
1352     - ret = btrfs_drop_extents(trans, root, inode,
1353     - drop_start,
1354     - aligned_end,
1355     - 1);
1356     + ret = clone_copy_inline_extent(src, inode,
1357     + trans, path,
1358     + &new_key,
1359     + drop_start,
1360     + datal,
1361     + skip, size, buf);
1362     if (ret) {
1363     if (ret != -EOPNOTSUPP)
1364     btrfs_abort_transaction(trans,
1365     - root, ret);
1366     - btrfs_end_transaction(trans, root);
1367     - goto out;
1368     - }
1369     -
1370     - ret = btrfs_insert_empty_item(trans, root, path,
1371     - &new_key, size);
1372     - if (ret) {
1373     - btrfs_abort_transaction(trans, root,
1374     - ret);
1375     + root,
1376     + ret);
1377     btrfs_end_transaction(trans, root);
1378     goto out;
1379     }
1380     -
1381     - if (skip) {
1382     - u32 start =
1383     - btrfs_file_extent_calc_inline_size(0);
1384     - memmove(buf+start, buf+start+skip,
1385     - datal);
1386     - }
1387     -
1388     leaf = path->nodes[0];
1389     slot = path->slots[0];
1390     - write_extent_buffer(leaf, buf,
1391     - btrfs_item_ptr_offset(leaf, slot),
1392     - size);
1393     - inode_add_bytes(inode, datal);
1394     }
1395    
1396     /* If we have an implicit hole (NO_HOLES feature). */
1397     diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
1398     index 303babeef505..ab507e3d536b 100644
1399     --- a/fs/btrfs/relocation.c
1400     +++ b/fs/btrfs/relocation.c
1401     @@ -1716,7 +1716,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1402     ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
1403     num_bytes, parent,
1404     btrfs_header_owner(leaf),
1405     - key.objectid, key.offset, 1);
1406     + key.objectid, key.offset);
1407     if (ret) {
1408     btrfs_abort_transaction(trans, root, ret);
1409     break;
1410     @@ -1724,7 +1724,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1411    
1412     ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1413     parent, btrfs_header_owner(leaf),
1414     - key.objectid, key.offset, 1);
1415     + key.objectid, key.offset);
1416     if (ret) {
1417     btrfs_abort_transaction(trans, root, ret);
1418     break;
1419     @@ -1900,23 +1900,21 @@ again:
1420    
1421     ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
1422     path->nodes[level]->start,
1423     - src->root_key.objectid, level - 1, 0,
1424     - 1);
1425     + src->root_key.objectid, level - 1, 0);
1426     BUG_ON(ret);
1427     ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
1428     0, dest->root_key.objectid, level - 1,
1429     - 0, 1);
1430     + 0);
1431     BUG_ON(ret);
1432    
1433     ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
1434     path->nodes[level]->start,
1435     - src->root_key.objectid, level - 1, 0,
1436     - 1);
1437     + src->root_key.objectid, level - 1, 0);
1438     BUG_ON(ret);
1439    
1440     ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
1441     0, dest->root_key.objectid, level - 1,
1442     - 0, 1);
1443     + 0);
1444     BUG_ON(ret);
1445    
1446     btrfs_unlock_up_safe(path, 0);
1447     @@ -2745,7 +2743,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
1448     node->eb->start, blocksize,
1449     upper->eb->start,
1450     btrfs_header_owner(upper->eb),
1451     - node->level, 0, 1);
1452     + node->level, 0);
1453     BUG_ON(ret);
1454    
1455     ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
1456     diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
1457     index a739b825bdd3..23bb2e4b911b 100644
1458     --- a/fs/btrfs/send.c
1459     +++ b/fs/btrfs/send.c
1460     @@ -2353,8 +2353,14 @@ static int send_subvol_begin(struct send_ctx *sctx)
1461     }
1462    
1463     TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
1464     - TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
1465     - sctx->send_root->root_item.uuid);
1466     +
1467     + if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid))
1468     + TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
1469     + sctx->send_root->root_item.received_uuid);
1470     + else
1471     + TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
1472     + sctx->send_root->root_item.uuid);
1473     +
1474     TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
1475     le64_to_cpu(sctx->send_root->root_item.ctransid));
1476     if (parent_root) {
1477     diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
1478     index 1bbaace73383..6f8af2de5912 100644
1479     --- a/fs/btrfs/tree-log.c
1480     +++ b/fs/btrfs/tree-log.c
1481     @@ -691,7 +691,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
1482     ret = btrfs_inc_extent_ref(trans, root,
1483     ins.objectid, ins.offset,
1484     0, root->root_key.objectid,
1485     - key->objectid, offset, 0);
1486     + key->objectid, offset);
1487     if (ret)
1488     goto out;
1489     } else {
1490     diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
1491     index 6f518c90e1c1..1fcd7b6e7564 100644
1492     --- a/fs/btrfs/xattr.c
1493     +++ b/fs/btrfs/xattr.c
1494     @@ -313,8 +313,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
1495     /* check to make sure this item is what we want */
1496     if (found_key.objectid != key.objectid)
1497     break;
1498     - if (found_key.type != BTRFS_XATTR_ITEM_KEY)
1499     + if (found_key.type > BTRFS_XATTR_ITEM_KEY)
1500     break;
1501     + if (found_key.type < BTRFS_XATTR_ITEM_KEY)
1502     + goto next;
1503    
1504     di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
1505     if (verify_dir_item(root, leaf, di))
1506     diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
1507     index 51cb02da75d9..fe2c982764e7 100644
1508     --- a/fs/ceph/mds_client.c
1509     +++ b/fs/ceph/mds_client.c
1510     @@ -1935,7 +1935,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1511    
1512     len = sizeof(*head) +
1513     pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
1514     - sizeof(struct timespec);
1515     + sizeof(struct ceph_timespec);
1516    
1517     /* calculate (max) length for cap releases */
1518     len += sizeof(struct ceph_mds_request_release) *
1519     diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
1520     index c711be8d6a3c..9c8d23316da1 100644
1521     --- a/fs/debugfs/inode.c
1522     +++ b/fs/debugfs/inode.c
1523     @@ -271,8 +271,12 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
1524     dput(dentry);
1525     dentry = ERR_PTR(-EEXIST);
1526     }
1527     - if (IS_ERR(dentry))
1528     +
1529     + if (IS_ERR(dentry)) {
1530     mutex_unlock(&d_inode(parent)->i_mutex);
1531     + simple_release_fs(&debugfs_mount, &debugfs_mount_count);
1532     + }
1533     +
1534     return dentry;
1535     }
1536    
1537     diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
1538     index 45731558138c..2fab243a4c9e 100644
1539     --- a/fs/ext4/crypto.c
1540     +++ b/fs/ext4/crypto.c
1541     @@ -411,7 +411,13 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
1542     ext4_lblk_t lblk = ex->ee_block;
1543     ext4_fsblk_t pblk = ext4_ext_pblock(ex);
1544     unsigned int len = ext4_ext_get_actual_len(ex);
1545     - int err = 0;
1546     + int ret, err = 0;
1547     +
1548     +#if 0
1549     + ext4_msg(inode->i_sb, KERN_CRIT,
1550     + "ext4_encrypted_zeroout ino %lu lblk %u len %u",
1551     + (unsigned long) inode->i_ino, lblk, len);
1552     +#endif
1553    
1554     BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE);
1555    
1556     @@ -437,17 +443,26 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
1557     goto errout;
1558     }
1559     bio->bi_bdev = inode->i_sb->s_bdev;
1560     - bio->bi_iter.bi_sector = pblk;
1561     - err = bio_add_page(bio, ciphertext_page,
1562     + bio->bi_iter.bi_sector =
1563     + pblk << (inode->i_sb->s_blocksize_bits - 9);
1564     + ret = bio_add_page(bio, ciphertext_page,
1565     inode->i_sb->s_blocksize, 0);
1566     - if (err) {
1567     + if (ret != inode->i_sb->s_blocksize) {
1568     + /* should never happen! */
1569     + ext4_msg(inode->i_sb, KERN_ERR,
1570     + "bio_add_page failed: %d", ret);
1571     + WARN_ON(1);
1572     bio_put(bio);
1573     + err = -EIO;
1574     goto errout;
1575     }
1576     err = submit_bio_wait(WRITE, bio);
1577     + if ((err == 0) && bio->bi_error)
1578     + err = -EIO;
1579     bio_put(bio);
1580     if (err)
1581     goto errout;
1582     + lblk++; pblk++;
1583     }
1584     err = 0;
1585     errout:
1586     diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
1587     index d41843181818..e770c1ee4613 100644
1588     --- a/fs/ext4/ext4_jbd2.c
1589     +++ b/fs/ext4/ext4_jbd2.c
1590     @@ -88,13 +88,13 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
1591     return 0;
1592     }
1593    
1594     + err = handle->h_err;
1595     if (!handle->h_transaction) {
1596     - err = jbd2_journal_stop(handle);
1597     - return handle->h_err ? handle->h_err : err;
1598     + rc = jbd2_journal_stop(handle);
1599     + return err ? err : rc;
1600     }
1601    
1602     sb = handle->h_transaction->t_journal->j_private;
1603     - err = handle->h_err;
1604     rc = jbd2_journal_stop(handle);
1605    
1606     if (!err)
1607     diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
1608     index 2553aa8b608d..7f486e350d15 100644
1609     --- a/fs/ext4/extents.c
1610     +++ b/fs/ext4/extents.c
1611     @@ -3558,6 +3558,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
1612     max_zeroout = sbi->s_extent_max_zeroout_kb >>
1613     (inode->i_sb->s_blocksize_bits - 10);
1614    
1615     + if (ext4_encrypted_inode(inode))
1616     + max_zeroout = 0;
1617     +
1618     /* If extent is less than s_max_zeroout_kb, zeroout directly */
1619     if (max_zeroout && (ee_len <= max_zeroout)) {
1620     err = ext4_ext_zeroout(inode, ex);
1621     diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
1622     index 84ba4d2b3a35..17fbe3882b8e 100644
1623     --- a/fs/ext4/page-io.c
1624     +++ b/fs/ext4/page-io.c
1625     @@ -425,6 +425,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
1626     struct buffer_head *bh, *head;
1627     int ret = 0;
1628     int nr_submitted = 0;
1629     + int nr_to_submit = 0;
1630    
1631     blocksize = 1 << inode->i_blkbits;
1632    
1633     @@ -477,11 +478,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
1634     unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
1635     }
1636     set_buffer_async_write(bh);
1637     + nr_to_submit++;
1638     } while ((bh = bh->b_this_page) != head);
1639    
1640     bh = head = page_buffers(page);
1641    
1642     - if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
1643     + if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) &&
1644     + nr_to_submit) {
1645     data_page = ext4_encrypt(inode, page);
1646     if (IS_ERR(data_page)) {
1647     ret = PTR_ERR(data_page);
1648     diff --git a/fs/ext4/super.c b/fs/ext4/super.c
1649     index a63c7b0a10cf..df84bd256c9f 100644
1650     --- a/fs/ext4/super.c
1651     +++ b/fs/ext4/super.c
1652     @@ -394,9 +394,13 @@ static void ext4_handle_error(struct super_block *sb)
1653     smp_wmb();
1654     sb->s_flags |= MS_RDONLY;
1655     }
1656     - if (test_opt(sb, ERRORS_PANIC))
1657     + if (test_opt(sb, ERRORS_PANIC)) {
1658     + if (EXT4_SB(sb)->s_journal &&
1659     + !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
1660     + return;
1661     panic("EXT4-fs (device %s): panic forced after error\n",
1662     sb->s_id);
1663     + }
1664     }
1665    
1666     #define ext4_error_ratelimit(sb) \
1667     @@ -585,8 +589,12 @@ void __ext4_abort(struct super_block *sb, const char *function,
1668     jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
1669     save_error_info(sb, function, line);
1670     }
1671     - if (test_opt(sb, ERRORS_PANIC))
1672     + if (test_opt(sb, ERRORS_PANIC)) {
1673     + if (EXT4_SB(sb)->s_journal &&
1674     + !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
1675     + return;
1676     panic("EXT4-fs panic from previous error\n");
1677     + }
1678     }
1679    
1680     void __ext4_msg(struct super_block *sb,
1681     diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
1682     index 8270fe9e3641..37023d0bdae4 100644
1683     --- a/fs/jbd2/journal.c
1684     +++ b/fs/jbd2/journal.c
1685     @@ -2071,8 +2071,12 @@ static void __journal_abort_soft (journal_t *journal, int errno)
1686    
1687     __jbd2_journal_abort_hard(journal);
1688    
1689     - if (errno)
1690     + if (errno) {
1691     jbd2_journal_update_sb_errno(journal);
1692     + write_lock(&journal->j_state_lock);
1693     + journal->j_flags |= JBD2_REC_ERR;
1694     + write_unlock(&journal->j_state_lock);
1695     + }
1696     }
1697    
1698     /**
1699     diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
1700     index 326d9e10d833..ffdf9b9e88ab 100644
1701     --- a/fs/nfs/inode.c
1702     +++ b/fs/nfs/inode.c
1703     @@ -1824,7 +1824,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1704     if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
1705     nfsi->attr_gencount = fattr->gencount;
1706     }
1707     - invalid &= ~NFS_INO_INVALID_ATTR;
1708     +
1709     + /* Don't declare attrcache up to date if there were no attrs! */
1710     + if (fattr->valid != 0)
1711     + invalid &= ~NFS_INO_INVALID_ATTR;
1712     +
1713     /* Don't invalidate the data if we were to blame */
1714     if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
1715     || S_ISLNK(inode->i_mode)))
1716     diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
1717     index 223bedda64ae..10410e8b5853 100644
1718     --- a/fs/nfs/nfs4client.c
1719     +++ b/fs/nfs/nfs4client.c
1720     @@ -33,7 +33,7 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
1721     return ret;
1722     idr_preload(GFP_KERNEL);
1723     spin_lock(&nn->nfs_client_lock);
1724     - ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT);
1725     + ret = idr_alloc(&nn->cb_ident_idr, clp, 1, 0, GFP_NOWAIT);
1726     if (ret >= 0)
1727     clp->cl_cb_ident = ret;
1728     spin_unlock(&nn->nfs_client_lock);
1729     diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
1730     index 8abe27165ad0..abf5caea20c9 100644
1731     --- a/fs/nfs/pnfs.c
1732     +++ b/fs/nfs/pnfs.c
1733     @@ -872,33 +872,38 @@ send_layoutget(struct pnfs_layout_hdr *lo,
1734    
1735     dprintk("--> %s\n", __func__);
1736    
1737     - lgp = kzalloc(sizeof(*lgp), gfp_flags);
1738     - if (lgp == NULL)
1739     - return NULL;
1740     + /*
1741     + * Synchronously retrieve layout information from server and
1742     + * store in lseg. If we race with a concurrent seqid morphing
1743     + * op, then re-send the LAYOUTGET.
1744     + */
1745     + do {
1746     + lgp = kzalloc(sizeof(*lgp), gfp_flags);
1747     + if (lgp == NULL)
1748     + return NULL;
1749     +
1750     + i_size = i_size_read(ino);
1751     +
1752     + lgp->args.minlength = PAGE_CACHE_SIZE;
1753     + if (lgp->args.minlength > range->length)
1754     + lgp->args.minlength = range->length;
1755     + if (range->iomode == IOMODE_READ) {
1756     + if (range->offset >= i_size)
1757     + lgp->args.minlength = 0;
1758     + else if (i_size - range->offset < lgp->args.minlength)
1759     + lgp->args.minlength = i_size - range->offset;
1760     + }
1761     + lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
1762     + lgp->args.range = *range;
1763     + lgp->args.type = server->pnfs_curr_ld->id;
1764     + lgp->args.inode = ino;
1765     + lgp->args.ctx = get_nfs_open_context(ctx);
1766     + lgp->gfp_flags = gfp_flags;
1767     + lgp->cred = lo->plh_lc_cred;
1768    
1769     - i_size = i_size_read(ino);
1770     + lseg = nfs4_proc_layoutget(lgp, gfp_flags);
1771     + } while (lseg == ERR_PTR(-EAGAIN));
1772    
1773     - lgp->args.minlength = PAGE_CACHE_SIZE;
1774     - if (lgp->args.minlength > range->length)
1775     - lgp->args.minlength = range->length;
1776     - if (range->iomode == IOMODE_READ) {
1777     - if (range->offset >= i_size)
1778     - lgp->args.minlength = 0;
1779     - else if (i_size - range->offset < lgp->args.minlength)
1780     - lgp->args.minlength = i_size - range->offset;
1781     - }
1782     - lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
1783     - lgp->args.range = *range;
1784     - lgp->args.type = server->pnfs_curr_ld->id;
1785     - lgp->args.inode = ino;
1786     - lgp->args.ctx = get_nfs_open_context(ctx);
1787     - lgp->gfp_flags = gfp_flags;
1788     - lgp->cred = lo->plh_lc_cred;
1789     -
1790     - /* Synchronously retrieve layout information from server and
1791     - * store in lseg.
1792     - */
1793     - lseg = nfs4_proc_layoutget(lgp, gfp_flags);
1794     if (IS_ERR(lseg)) {
1795     switch (PTR_ERR(lseg)) {
1796     case -ENOMEM:
1797     @@ -1687,6 +1692,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
1798     /* existing state ID, make sure the sequence number matches. */
1799     if (pnfs_layout_stateid_blocked(lo, &res->stateid)) {
1800     dprintk("%s forget reply due to sequence\n", __func__);
1801     + status = -EAGAIN;
1802     goto out_forget_reply;
1803     }
1804     pnfs_set_layout_stateid(lo, &res->stateid, false);
1805     diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
1806     index 0f1d5691b795..0dea0c254ddf 100644
1807     --- a/fs/nfsd/nfs4state.c
1808     +++ b/fs/nfsd/nfs4state.c
1809     @@ -765,16 +765,68 @@ void nfs4_unhash_stid(struct nfs4_stid *s)
1810     s->sc_type = 0;
1811     }
1812    
1813     -static void
1814     +/**
1815     + * nfs4_get_existing_delegation - Discover if this delegation already exists
1816     + * @clp: a pointer to the nfs4_client we're granting a delegation to
1817     + * @fp: a pointer to the nfs4_file we're granting a delegation on
1818     + *
1819     + * Return:
1820     + * On success: NULL if an existing delegation was not found.
1821     + *
1822     + * On error: -EAGAIN if one was previously granted to this nfs4_client
1823     + * for this nfs4_file.
1824     + *
1825     + */
1826     +
1827     +static int
1828     +nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp)
1829     +{
1830     + struct nfs4_delegation *searchdp = NULL;
1831     + struct nfs4_client *searchclp = NULL;
1832     +
1833     + lockdep_assert_held(&state_lock);
1834     + lockdep_assert_held(&fp->fi_lock);
1835     +
1836     + list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) {
1837     + searchclp = searchdp->dl_stid.sc_client;
1838     + if (clp == searchclp) {
1839     + return -EAGAIN;
1840     + }
1841     + }
1842     + return 0;
1843     +}
1844     +
1845     +/**
1846     + * hash_delegation_locked - Add a delegation to the appropriate lists
1847     + * @dp: a pointer to the nfs4_delegation we are adding.
1848     + * @fp: a pointer to the nfs4_file we're granting a delegation on
1849     + *
1850     + * Return:
1851     + * On success: NULL if the delegation was successfully hashed.
1852     + *
1853     + * On error: -EAGAIN if one was previously granted to this
1854     + * nfs4_client for this nfs4_file. Delegation is not hashed.
1855     + *
1856     + */
1857     +
1858     +static int
1859     hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
1860     {
1861     + int status;
1862     + struct nfs4_client *clp = dp->dl_stid.sc_client;
1863     +
1864     lockdep_assert_held(&state_lock);
1865     lockdep_assert_held(&fp->fi_lock);
1866    
1867     + status = nfs4_get_existing_delegation(clp, fp);
1868     + if (status)
1869     + return status;
1870     + ++fp->fi_delegees;
1871     atomic_inc(&dp->dl_stid.sc_count);
1872     dp->dl_stid.sc_type = NFS4_DELEG_STID;
1873     list_add(&dp->dl_perfile, &fp->fi_delegations);
1874     - list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
1875     + list_add(&dp->dl_perclnt, &clp->cl_delegations);
1876     + return 0;
1877     }
1878    
1879     static bool
1880     @@ -3360,6 +3412,7 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
1881     stp->st_access_bmap = 0;
1882     stp->st_deny_bmap = 0;
1883     stp->st_openstp = NULL;
1884     + init_rwsem(&stp->st_rwsem);
1885     spin_lock(&oo->oo_owner.so_client->cl_lock);
1886     list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
1887     spin_lock(&fp->fi_lock);
1888     @@ -3945,6 +3998,18 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
1889     return fl;
1890     }
1891    
1892     +/**
1893     + * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer
1894     + * @dp: a pointer to the nfs4_delegation we're adding.
1895     + *
1896     + * Return:
1897     + * On success: Return code will be 0 on success.
1898     + *
1899     + * On error: -EAGAIN if there was an existing delegation.
1900     + * nonzero if there is an error in other cases.
1901     + *
1902     + */
1903     +
1904     static int nfs4_setlease(struct nfs4_delegation *dp)
1905     {
1906     struct nfs4_file *fp = dp->dl_stid.sc_file;
1907     @@ -3976,16 +4041,19 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
1908     goto out_unlock;
1909     /* Race breaker */
1910     if (fp->fi_deleg_file) {
1911     - status = 0;
1912     - ++fp->fi_delegees;
1913     - hash_delegation_locked(dp, fp);
1914     + status = hash_delegation_locked(dp, fp);
1915     goto out_unlock;
1916     }
1917     fp->fi_deleg_file = filp;
1918     - fp->fi_delegees = 1;
1919     - hash_delegation_locked(dp, fp);
1920     + fp->fi_delegees = 0;
1921     + status = hash_delegation_locked(dp, fp);
1922     spin_unlock(&fp->fi_lock);
1923     spin_unlock(&state_lock);
1924     + if (status) {
1925     + /* Should never happen, this is a new fi_deleg_file */
1926     + WARN_ON_ONCE(1);
1927     + goto out_fput;
1928     + }
1929     return 0;
1930     out_unlock:
1931     spin_unlock(&fp->fi_lock);
1932     @@ -4005,6 +4073,15 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
1933     if (fp->fi_had_conflict)
1934     return ERR_PTR(-EAGAIN);
1935    
1936     + spin_lock(&state_lock);
1937     + spin_lock(&fp->fi_lock);
1938     + status = nfs4_get_existing_delegation(clp, fp);
1939     + spin_unlock(&fp->fi_lock);
1940     + spin_unlock(&state_lock);
1941     +
1942     + if (status)
1943     + return ERR_PTR(status);
1944     +
1945     dp = alloc_init_deleg(clp, fh, odstate);
1946     if (!dp)
1947     return ERR_PTR(-ENOMEM);
1948     @@ -4023,9 +4100,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
1949     status = -EAGAIN;
1950     goto out_unlock;
1951     }
1952     - ++fp->fi_delegees;
1953     - hash_delegation_locked(dp, fp);
1954     - status = 0;
1955     + status = hash_delegation_locked(dp, fp);
1956     out_unlock:
1957     spin_unlock(&fp->fi_lock);
1958     spin_unlock(&state_lock);
1959     @@ -4187,15 +4262,20 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1960     */
1961     if (stp) {
1962     /* Stateid was found, this is an OPEN upgrade */
1963     + down_read(&stp->st_rwsem);
1964     status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
1965     - if (status)
1966     + if (status) {
1967     + up_read(&stp->st_rwsem);
1968     goto out;
1969     + }
1970     } else {
1971     stp = open->op_stp;
1972     open->op_stp = NULL;
1973     init_open_stateid(stp, fp, open);
1974     + down_read(&stp->st_rwsem);
1975     status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
1976     if (status) {
1977     + up_read(&stp->st_rwsem);
1978     release_open_stateid(stp);
1979     goto out;
1980     }
1981     @@ -4207,6 +4287,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1982     }
1983     update_stateid(&stp->st_stid.sc_stateid);
1984     memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
1985     + up_read(&stp->st_rwsem);
1986    
1987     if (nfsd4_has_session(&resp->cstate)) {
1988     if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
1989     @@ -4819,10 +4900,13 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
1990     * revoked delegations are kept only for free_stateid.
1991     */
1992     return nfserr_bad_stateid;
1993     + down_write(&stp->st_rwsem);
1994     status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
1995     - if (status)
1996     - return status;
1997     - return nfs4_check_fh(current_fh, &stp->st_stid);
1998     + if (status == nfs_ok)
1999     + status = nfs4_check_fh(current_fh, &stp->st_stid);
2000     + if (status != nfs_ok)
2001     + up_write(&stp->st_rwsem);
2002     + return status;
2003     }
2004    
2005     /*
2006     @@ -4869,6 +4953,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
2007     return status;
2008     oo = openowner(stp->st_stateowner);
2009     if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
2010     + up_write(&stp->st_rwsem);
2011     nfs4_put_stid(&stp->st_stid);
2012     return nfserr_bad_stateid;
2013     }
2014     @@ -4899,11 +4984,14 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2015     goto out;
2016     oo = openowner(stp->st_stateowner);
2017     status = nfserr_bad_stateid;
2018     - if (oo->oo_flags & NFS4_OO_CONFIRMED)
2019     + if (oo->oo_flags & NFS4_OO_CONFIRMED) {
2020     + up_write(&stp->st_rwsem);
2021     goto put_stateid;
2022     + }
2023     oo->oo_flags |= NFS4_OO_CONFIRMED;
2024     update_stateid(&stp->st_stid.sc_stateid);
2025     memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
2026     + up_write(&stp->st_rwsem);
2027     dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
2028     __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
2029    
2030     @@ -4982,6 +5070,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
2031     memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
2032     status = nfs_ok;
2033     put_stateid:
2034     + up_write(&stp->st_rwsem);
2035     nfs4_put_stid(&stp->st_stid);
2036     out:
2037     nfsd4_bump_seqid(cstate, status);
2038     @@ -5035,6 +5124,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2039     goto out;
2040     update_stateid(&stp->st_stid.sc_stateid);
2041     memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
2042     + up_write(&stp->st_rwsem);
2043    
2044     nfsd4_close_open_stateid(stp);
2045    
2046     @@ -5260,6 +5350,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
2047     stp->st_access_bmap = 0;
2048     stp->st_deny_bmap = open_stp->st_deny_bmap;
2049     stp->st_openstp = open_stp;
2050     + init_rwsem(&stp->st_rwsem);
2051     list_add(&stp->st_locks, &open_stp->st_locks);
2052     list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
2053     spin_lock(&fp->fi_lock);
2054     @@ -5428,6 +5519,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2055     &open_stp, nn);
2056     if (status)
2057     goto out;
2058     + up_write(&open_stp->st_rwsem);
2059     open_sop = openowner(open_stp->st_stateowner);
2060     status = nfserr_bad_stateid;
2061     if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
2062     @@ -5435,6 +5527,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2063     goto out;
2064     status = lookup_or_create_lock_state(cstate, open_stp, lock,
2065     &lock_stp, &new);
2066     + if (status == nfs_ok)
2067     + down_write(&lock_stp->st_rwsem);
2068     } else {
2069     status = nfs4_preprocess_seqid_op(cstate,
2070     lock->lk_old_lock_seqid,
2071     @@ -5540,6 +5634,8 @@ out:
2072     seqid_mutating_err(ntohl(status)))
2073     lock_sop->lo_owner.so_seqid++;
2074    
2075     + up_write(&lock_stp->st_rwsem);
2076     +
2077     /*
2078     * If this is a new, never-before-used stateid, and we are
2079     * returning an error, then just go ahead and release it.
2080     @@ -5709,6 +5805,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2081     fput:
2082     fput(filp);
2083     put_stateid:
2084     + up_write(&stp->st_rwsem);
2085     nfs4_put_stid(&stp->st_stid);
2086     out:
2087     nfsd4_bump_seqid(cstate, status);
2088     diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
2089     index 583ffc13cae2..31bde12feefe 100644
2090     --- a/fs/nfsd/state.h
2091     +++ b/fs/nfsd/state.h
2092     @@ -534,15 +534,16 @@ struct nfs4_file {
2093     * Better suggestions welcome.
2094     */
2095     struct nfs4_ol_stateid {
2096     - struct nfs4_stid st_stid; /* must be first field */
2097     - struct list_head st_perfile;
2098     - struct list_head st_perstateowner;
2099     - struct list_head st_locks;
2100     - struct nfs4_stateowner * st_stateowner;
2101     - struct nfs4_clnt_odstate * st_clnt_odstate;
2102     - unsigned char st_access_bmap;
2103     - unsigned char st_deny_bmap;
2104     - struct nfs4_ol_stateid * st_openstp;
2105     + struct nfs4_stid st_stid;
2106     + struct list_head st_perfile;
2107     + struct list_head st_perstateowner;
2108     + struct list_head st_locks;
2109     + struct nfs4_stateowner *st_stateowner;
2110     + struct nfs4_clnt_odstate *st_clnt_odstate;
2111     + unsigned char st_access_bmap;
2112     + unsigned char st_deny_bmap;
2113     + struct nfs4_ol_stateid *st_openstp;
2114     + struct rw_semaphore st_rwsem;
2115     };
2116    
2117     static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
2118     diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
2119     index b7dfac226b1e..12bfa9ca5583 100644
2120     --- a/fs/ocfs2/namei.c
2121     +++ b/fs/ocfs2/namei.c
2122     @@ -374,6 +374,8 @@ static int ocfs2_mknod(struct inode *dir,
2123     mlog_errno(status);
2124     goto leave;
2125     }
2126     + /* update inode->i_mode after mask with "umask". */
2127     + inode->i_mode = mode;
2128    
2129     handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
2130     S_ISDIR(mode),
2131     diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
2132     index f1f32af6d9b9..3e4ff3f1d314 100644
2133     --- a/include/linux/ipv6.h
2134     +++ b/include/linux/ipv6.h
2135     @@ -227,7 +227,7 @@ struct ipv6_pinfo {
2136     struct ipv6_ac_socklist *ipv6_ac_list;
2137     struct ipv6_fl_socklist __rcu *ipv6_fl_list;
2138    
2139     - struct ipv6_txoptions *opt;
2140     + struct ipv6_txoptions __rcu *opt;
2141     struct sk_buff *pktoptions;
2142     struct sk_buff *rxpmtu;
2143     struct inet6_cork cork;
2144     diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
2145     index df07e78487d5..1abeb820a630 100644
2146     --- a/include/linux/jbd2.h
2147     +++ b/include/linux/jbd2.h
2148     @@ -1046,6 +1046,7 @@ struct journal_s
2149     #define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file
2150     * data write error in ordered
2151     * mode */
2152     +#define JBD2_REC_ERR 0x080 /* The errno in the sb has been recorded */
2153    
2154     /*
2155     * Function declarations for the journaling transaction and buffer
2156     diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
2157     index dd2097455a2e..1565324eb620 100644
2158     --- a/include/linux/mlx5/mlx5_ifc.h
2159     +++ b/include/linux/mlx5/mlx5_ifc.h
2160     @@ -453,26 +453,28 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
2161     u8 lro_cap[0x1];
2162     u8 lro_psh_flag[0x1];
2163     u8 lro_time_stamp[0x1];
2164     - u8 reserved_0[0x6];
2165     + u8 reserved_0[0x3];
2166     + u8 self_lb_en_modifiable[0x1];
2167     + u8 reserved_1[0x2];
2168     u8 max_lso_cap[0x5];
2169     - u8 reserved_1[0x4];
2170     + u8 reserved_2[0x4];
2171     u8 rss_ind_tbl_cap[0x4];
2172     - u8 reserved_2[0x3];
2173     + u8 reserved_3[0x3];
2174     u8 tunnel_lso_const_out_ip_id[0x1];
2175     - u8 reserved_3[0x2];
2176     + u8 reserved_4[0x2];
2177     u8 tunnel_statless_gre[0x1];
2178     u8 tunnel_stateless_vxlan[0x1];
2179    
2180     - u8 reserved_4[0x20];
2181     + u8 reserved_5[0x20];
2182    
2183     - u8 reserved_5[0x10];
2184     + u8 reserved_6[0x10];
2185     u8 lro_min_mss_size[0x10];
2186    
2187     - u8 reserved_6[0x120];
2188     + u8 reserved_7[0x120];
2189    
2190     u8 lro_timer_supported_periods[4][0x20];
2191    
2192     - u8 reserved_7[0x600];
2193     + u8 reserved_8[0x600];
2194     };
2195    
2196     struct mlx5_ifc_roce_cap_bits {
2197     @@ -4051,9 +4053,11 @@ struct mlx5_ifc_modify_tis_in_bits {
2198     };
2199    
2200     struct mlx5_ifc_modify_tir_bitmask_bits {
2201     - u8 reserved[0x20];
2202     + u8 reserved_0[0x20];
2203    
2204     - u8 reserved1[0x1f];
2205     + u8 reserved_1[0x1b];
2206     + u8 self_lb_en[0x1];
2207     + u8 reserved_2[0x3];
2208     u8 lro[0x1];
2209     };
2210    
2211     diff --git a/include/net/af_unix.h b/include/net/af_unix.h
2212     index b36d837c701e..2a91a0561a47 100644
2213     --- a/include/net/af_unix.h
2214     +++ b/include/net/af_unix.h
2215     @@ -62,6 +62,7 @@ struct unix_sock {
2216     #define UNIX_GC_CANDIDATE 0
2217     #define UNIX_GC_MAYBE_CYCLE 1
2218     struct socket_wq peer_wq;
2219     + wait_queue_t peer_wake;
2220     };
2221    
2222     static inline struct unix_sock *unix_sk(const struct sock *sk)
2223     diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
2224     index aaf9700fc9e5..fb961a576abe 100644
2225     --- a/include/net/ip6_fib.h
2226     +++ b/include/net/ip6_fib.h
2227     @@ -167,7 +167,8 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
2228    
2229     static inline u32 rt6_get_cookie(const struct rt6_info *rt)
2230     {
2231     - if (rt->rt6i_flags & RTF_PCPU || unlikely(rt->dst.flags & DST_NOCACHE))
2232     + if (rt->rt6i_flags & RTF_PCPU ||
2233     + (unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from))
2234     rt = (struct rt6_info *)(rt->dst.from);
2235    
2236     return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
2237     diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
2238     index fa915fa0f703..d49a8f8fae45 100644
2239     --- a/include/net/ip6_tunnel.h
2240     +++ b/include/net/ip6_tunnel.h
2241     @@ -90,11 +90,12 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
2242     err = ip6_local_out_sk(sk, skb);
2243    
2244     if (net_xmit_eval(err) == 0) {
2245     - struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
2246     + struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats);
2247     u64_stats_update_begin(&tstats->syncp);
2248     tstats->tx_bytes += pkt_len;
2249     tstats->tx_packets++;
2250     u64_stats_update_end(&tstats->syncp);
2251     + put_cpu_ptr(tstats);
2252     } else {
2253     stats->tx_errors++;
2254     stats->tx_aborted_errors++;
2255     diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
2256     index f6dafec9102c..62a750a6a8f8 100644
2257     --- a/include/net/ip_tunnels.h
2258     +++ b/include/net/ip_tunnels.h
2259     @@ -287,12 +287,13 @@ static inline void iptunnel_xmit_stats(int err,
2260     struct pcpu_sw_netstats __percpu *stats)
2261     {
2262     if (err > 0) {
2263     - struct pcpu_sw_netstats *tstats = this_cpu_ptr(stats);
2264     + struct pcpu_sw_netstats *tstats = get_cpu_ptr(stats);
2265    
2266     u64_stats_update_begin(&tstats->syncp);
2267     tstats->tx_bytes += err;
2268     tstats->tx_packets++;
2269     u64_stats_update_end(&tstats->syncp);
2270     + put_cpu_ptr(tstats);
2271     } else if (err < 0) {
2272     err_stats->tx_errors++;
2273     err_stats->tx_aborted_errors++;
2274     diff --git a/include/net/ipv6.h b/include/net/ipv6.h
2275     index 711cca428cc8..b14e1581c477 100644
2276     --- a/include/net/ipv6.h
2277     +++ b/include/net/ipv6.h
2278     @@ -205,6 +205,7 @@ extern rwlock_t ip6_ra_lock;
2279     */
2280    
2281     struct ipv6_txoptions {
2282     + atomic_t refcnt;
2283     /* Length of this structure */
2284     int tot_len;
2285    
2286     @@ -217,7 +218,7 @@ struct ipv6_txoptions {
2287     struct ipv6_opt_hdr *dst0opt;
2288     struct ipv6_rt_hdr *srcrt; /* Routing Header */
2289     struct ipv6_opt_hdr *dst1opt;
2290     -
2291     + struct rcu_head rcu;
2292     /* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */
2293     };
2294    
2295     @@ -252,6 +253,24 @@ struct ipv6_fl_socklist {
2296     struct rcu_head rcu;
2297     };
2298    
2299     +static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
2300     +{
2301     + struct ipv6_txoptions *opt;
2302     +
2303     + rcu_read_lock();
2304     + opt = rcu_dereference(np->opt);
2305     + if (opt && !atomic_inc_not_zero(&opt->refcnt))
2306     + opt = NULL;
2307     + rcu_read_unlock();
2308     + return opt;
2309     +}
2310     +
2311     +static inline void txopt_put(struct ipv6_txoptions *opt)
2312     +{
2313     + if (opt && atomic_dec_and_test(&opt->refcnt))
2314     + kfree_rcu(opt, rcu);
2315     +}
2316     +
2317     struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label);
2318     struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
2319     struct ip6_flowlabel *fl,
2320     @@ -490,6 +509,7 @@ struct ip6_create_arg {
2321     u32 user;
2322     const struct in6_addr *src;
2323     const struct in6_addr *dst;
2324     + int iif;
2325     u8 ecn;
2326     };
2327    
2328     diff --git a/include/net/ndisc.h b/include/net/ndisc.h
2329     index aba5695fadb0..b3a7751251b4 100644
2330     --- a/include/net/ndisc.h
2331     +++ b/include/net/ndisc.h
2332     @@ -182,8 +182,7 @@ int ndisc_rcv(struct sk_buff *skb);
2333    
2334     void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
2335     const struct in6_addr *solicit,
2336     - const struct in6_addr *daddr, const struct in6_addr *saddr,
2337     - struct sk_buff *oskb);
2338     + const struct in6_addr *daddr, const struct in6_addr *saddr);
2339    
2340     void ndisc_send_rs(struct net_device *dev,
2341     const struct in6_addr *saddr, const struct in6_addr *daddr);
2342     diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
2343     index 444faa89a55f..f1ad8f8fd4f1 100644
2344     --- a/include/net/sch_generic.h
2345     +++ b/include/net/sch_generic.h
2346     @@ -61,6 +61,9 @@ struct Qdisc {
2347     */
2348     #define TCQ_F_WARN_NONWC (1 << 16)
2349     #define TCQ_F_CPUSTATS 0x20 /* run using percpu statistics */
2350     +#define TCQ_F_NOPARENT 0x40 /* root of its hierarchy :
2351     + * qdisc_tree_decrease_qlen() should stop.
2352     + */
2353     u32 limit;
2354     const struct Qdisc_ops *ops;
2355     struct qdisc_size_table __rcu *stab;
2356     diff --git a/include/net/switchdev.h b/include/net/switchdev.h
2357     index 319baab3b48e..731c40e34bf2 100644
2358     --- a/include/net/switchdev.h
2359     +++ b/include/net/switchdev.h
2360     @@ -272,7 +272,7 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb,
2361     struct net_device *filter_dev,
2362     int idx)
2363     {
2364     - return -EOPNOTSUPP;
2365     + return idx;
2366     }
2367    
2368     static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
2369     diff --git a/kernel/.gitignore b/kernel/.gitignore
2370     index 790d83c7d160..b3097bde4e9c 100644
2371     --- a/kernel/.gitignore
2372     +++ b/kernel/.gitignore
2373     @@ -5,4 +5,3 @@ config_data.h
2374     config_data.gz
2375     timeconst.h
2376     hz.bc
2377     -x509_certificate_list
2378     diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
2379     index 29ace107f236..7a0decf47110 100644
2380     --- a/kernel/bpf/arraymap.c
2381     +++ b/kernel/bpf/arraymap.c
2382     @@ -104,7 +104,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
2383     /* all elements already exist */
2384     return -EEXIST;
2385    
2386     - memcpy(array->value + array->elem_size * index, value, array->elem_size);
2387     + memcpy(array->value + array->elem_size * index, value, map->value_size);
2388     return 0;
2389     }
2390    
2391     diff --git a/net/core/neighbour.c b/net/core/neighbour.c
2392     index 2b515ba7e94f..c169bba44e05 100644
2393     --- a/net/core/neighbour.c
2394     +++ b/net/core/neighbour.c
2395     @@ -2215,7 +2215,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2396     ndm->ndm_pad2 = 0;
2397     ndm->ndm_flags = pn->flags | NTF_PROXY;
2398     ndm->ndm_type = RTN_UNICAST;
2399     - ndm->ndm_ifindex = pn->dev->ifindex;
2400     + ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2401     ndm->ndm_state = NUD_NONE;
2402    
2403     if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2404     @@ -2290,7 +2290,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2405     if (h > s_h)
2406     s_idx = 0;
2407     for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2408     - if (dev_net(n->dev) != net)
2409     + if (pneigh_net(n) != net)
2410     continue;
2411     if (idx < s_idx)
2412     goto next;
2413     diff --git a/net/core/scm.c b/net/core/scm.c
2414     index 3b6899b7d810..8a1741b14302 100644
2415     --- a/net/core/scm.c
2416     +++ b/net/core/scm.c
2417     @@ -305,6 +305,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
2418     err = put_user(cmlen, &cm->cmsg_len);
2419     if (!err) {
2420     cmlen = CMSG_SPACE(i*sizeof(int));
2421     + if (msg->msg_controllen < cmlen)
2422     + cmlen = msg->msg_controllen;
2423     msg->msg_control += cmlen;
2424     msg->msg_controllen -= cmlen;
2425     }
2426     diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
2427     index 5165571f397a..a0490508d213 100644
2428     --- a/net/dccp/ipv6.c
2429     +++ b/net/dccp/ipv6.c
2430     @@ -202,7 +202,9 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
2431     security_req_classify_flow(req, flowi6_to_flowi(&fl6));
2432    
2433    
2434     - final_p = fl6_update_dst(&fl6, np->opt, &final);
2435     + rcu_read_lock();
2436     + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
2437     + rcu_read_unlock();
2438    
2439     dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
2440     if (IS_ERR(dst)) {
2441     @@ -219,7 +221,10 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
2442     &ireq->ir_v6_loc_addr,
2443     &ireq->ir_v6_rmt_addr);
2444     fl6.daddr = ireq->ir_v6_rmt_addr;
2445     - err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
2446     + rcu_read_lock();
2447     + err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
2448     + np->tclass);
2449     + rcu_read_unlock();
2450     err = net_xmit_eval(err);
2451     }
2452    
2453     @@ -415,6 +420,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
2454     {
2455     struct inet_request_sock *ireq = inet_rsk(req);
2456     struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
2457     + struct ipv6_txoptions *opt;
2458     struct inet_sock *newinet;
2459     struct dccp6_sock *newdp6;
2460     struct sock *newsk;
2461     @@ -534,13 +540,15 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
2462     * Yes, keeping reference count would be much more clever, but we make
2463     * one more one thing there: reattach optmem to newsk.
2464     */
2465     - if (np->opt != NULL)
2466     - newnp->opt = ipv6_dup_options(newsk, np->opt);
2467     -
2468     + opt = rcu_dereference(np->opt);
2469     + if (opt) {
2470     + opt = ipv6_dup_options(newsk, opt);
2471     + RCU_INIT_POINTER(newnp->opt, opt);
2472     + }
2473     inet_csk(newsk)->icsk_ext_hdr_len = 0;
2474     - if (newnp->opt != NULL)
2475     - inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
2476     - newnp->opt->opt_flen);
2477     + if (opt)
2478     + inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
2479     + opt->opt_flen;
2480    
2481     dccp_sync_mss(newsk, dst_mtu(dst));
2482    
2483     @@ -793,6 +801,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
2484     struct ipv6_pinfo *np = inet6_sk(sk);
2485     struct dccp_sock *dp = dccp_sk(sk);
2486     struct in6_addr *saddr = NULL, *final_p, final;
2487     + struct ipv6_txoptions *opt;
2488     struct flowi6 fl6;
2489     struct dst_entry *dst;
2490     int addr_type;
2491     @@ -892,7 +901,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
2492     fl6.fl6_sport = inet->inet_sport;
2493     security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
2494    
2495     - final_p = fl6_update_dst(&fl6, np->opt, &final);
2496     + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
2497     + final_p = fl6_update_dst(&fl6, opt, &final);
2498    
2499     dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
2500     if (IS_ERR(dst)) {
2501     @@ -912,9 +922,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
2502     __ip6_dst_store(sk, dst, NULL, NULL);
2503    
2504     icsk->icsk_ext_hdr_len = 0;
2505     - if (np->opt != NULL)
2506     - icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
2507     - np->opt->opt_nflen);
2508     + if (opt)
2509     + icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
2510    
2511     inet->inet_dport = usin->sin6_port;
2512    
2513     diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
2514     index 8e8203d5c520..ef7e2c4342cb 100644
2515     --- a/net/ipv4/ipmr.c
2516     +++ b/net/ipv4/ipmr.c
2517     @@ -134,7 +134,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2518     struct mfc_cache *c, struct rtmsg *rtm);
2519     static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
2520     int cmd);
2521     -static void mroute_clean_tables(struct mr_table *mrt);
2522     +static void mroute_clean_tables(struct mr_table *mrt, bool all);
2523     static void ipmr_expire_process(unsigned long arg);
2524    
2525     #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
2526     @@ -350,7 +350,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
2527     static void ipmr_free_table(struct mr_table *mrt)
2528     {
2529     del_timer_sync(&mrt->ipmr_expire_timer);
2530     - mroute_clean_tables(mrt);
2531     + mroute_clean_tables(mrt, true);
2532     kfree(mrt);
2533     }
2534    
2535     @@ -1208,7 +1208,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
2536     * Close the multicast socket, and clear the vif tables etc
2537     */
2538    
2539     -static void mroute_clean_tables(struct mr_table *mrt)
2540     +static void mroute_clean_tables(struct mr_table *mrt, bool all)
2541     {
2542     int i;
2543     LIST_HEAD(list);
2544     @@ -1217,8 +1217,9 @@ static void mroute_clean_tables(struct mr_table *mrt)
2545     /* Shut down all active vif entries */
2546    
2547     for (i = 0; i < mrt->maxvif; i++) {
2548     - if (!(mrt->vif_table[i].flags & VIFF_STATIC))
2549     - vif_delete(mrt, i, 0, &list);
2550     + if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
2551     + continue;
2552     + vif_delete(mrt, i, 0, &list);
2553     }
2554     unregister_netdevice_many(&list);
2555    
2556     @@ -1226,7 +1227,7 @@ static void mroute_clean_tables(struct mr_table *mrt)
2557    
2558     for (i = 0; i < MFC_LINES; i++) {
2559     list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
2560     - if (c->mfc_flags & MFC_STATIC)
2561     + if (!all && (c->mfc_flags & MFC_STATIC))
2562     continue;
2563     list_del_rcu(&c->list);
2564     mroute_netlink_event(mrt, c, RTM_DELROUTE);
2565     @@ -1261,7 +1262,7 @@ static void mrtsock_destruct(struct sock *sk)
2566     NETCONFA_IFINDEX_ALL,
2567     net->ipv4.devconf_all);
2568     RCU_INIT_POINTER(mrt->mroute_sk, NULL);
2569     - mroute_clean_tables(mrt);
2570     + mroute_clean_tables(mrt, false);
2571     }
2572     }
2573     rtnl_unlock();
2574     diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
2575     index a8f515bb19c4..0a2b61dbcd4e 100644
2576     --- a/net/ipv4/tcp_input.c
2577     +++ b/net/ipv4/tcp_input.c
2578     @@ -4457,19 +4457,34 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
2579     int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
2580     {
2581     struct sk_buff *skb;
2582     + int err = -ENOMEM;
2583     + int data_len = 0;
2584     bool fragstolen;
2585    
2586     if (size == 0)
2587     return 0;
2588    
2589     - skb = alloc_skb(size, sk->sk_allocation);
2590     + if (size > PAGE_SIZE) {
2591     + int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
2592     +
2593     + data_len = npages << PAGE_SHIFT;
2594     + size = data_len + (size & ~PAGE_MASK);
2595     + }
2596     + skb = alloc_skb_with_frags(size - data_len, data_len,
2597     + PAGE_ALLOC_COSTLY_ORDER,
2598     + &err, sk->sk_allocation);
2599     if (!skb)
2600     goto err;
2601    
2602     + skb_put(skb, size - data_len);
2603     + skb->data_len = data_len;
2604     + skb->len = size;
2605     +
2606     if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
2607     goto err_free;
2608    
2609     - if (memcpy_from_msg(skb_put(skb, size), msg, size))
2610     + err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2611     + if (err)
2612     goto err_free;
2613    
2614     TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
2615     @@ -4485,7 +4500,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
2616     err_free:
2617     kfree_skb(skb);
2618     err:
2619     - return -ENOMEM;
2620     + return err;
2621     +
2622     }
2623    
2624     static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
2625     @@ -5643,6 +5659,7 @@ discard:
2626     }
2627    
2628     tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
2629     + tp->copied_seq = tp->rcv_nxt;
2630     tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
2631    
2632     /* RFC1323: The window in SYN & SYN/ACK segments is
2633     diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
2634     index 93898e093d4e..a7739c83aa84 100644
2635     --- a/net/ipv4/tcp_ipv4.c
2636     +++ b/net/ipv4/tcp_ipv4.c
2637     @@ -922,7 +922,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
2638     }
2639    
2640     md5sig = rcu_dereference_protected(tp->md5sig_info,
2641     - sock_owned_by_user(sk));
2642     + sock_owned_by_user(sk) ||
2643     + lockdep_is_held(&sk->sk_lock.slock));
2644     if (!md5sig) {
2645     md5sig = kmalloc(sizeof(*md5sig), gfp);
2646     if (!md5sig)
2647     diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
2648     index 7149ebc820c7..04f0a052b524 100644
2649     --- a/net/ipv4/tcp_timer.c
2650     +++ b/net/ipv4/tcp_timer.c
2651     @@ -176,6 +176,18 @@ static int tcp_write_timeout(struct sock *sk)
2652     syn_set = true;
2653     } else {
2654     if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
2655     + /* Some middle-boxes may black-hole Fast Open _after_
2656     + * the handshake. Therefore we conservatively disable
2657     + * Fast Open on this path on recurring timeouts with
2658     + * few or zero bytes acked after Fast Open.
2659     + */
2660     + if (tp->syn_data_acked &&
2661     + tp->bytes_acked <= tp->rx_opt.mss_clamp) {
2662     + tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
2663     + if (icsk->icsk_retransmits == sysctl_tcp_retries1)
2664     + NET_INC_STATS_BH(sock_net(sk),
2665     + LINUX_MIB_TCPFASTOPENACTIVEFAIL);
2666     + }
2667     /* Black hole detection */
2668     tcp_mtu_probing(icsk, sk);
2669    
2670     diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
2671     index dd00828863a0..3939dd290c44 100644
2672     --- a/net/ipv6/addrconf.c
2673     +++ b/net/ipv6/addrconf.c
2674     @@ -3628,7 +3628,7 @@ static void addrconf_dad_work(struct work_struct *w)
2675    
2676     /* send a neighbour solicitation for our addr */
2677     addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
2678     - ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any, NULL);
2679     + ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
2680     out:
2681     in6_ifa_put(ifp);
2682     rtnl_unlock();
2683     diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
2684     index 44bb66bde0e2..38d66ddfb937 100644
2685     --- a/net/ipv6/af_inet6.c
2686     +++ b/net/ipv6/af_inet6.c
2687     @@ -428,9 +428,11 @@ void inet6_destroy_sock(struct sock *sk)
2688    
2689     /* Free tx options */
2690    
2691     - opt = xchg(&np->opt, NULL);
2692     - if (opt)
2693     - sock_kfree_s(sk, opt, opt->tot_len);
2694     + opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
2695     + if (opt) {
2696     + atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
2697     + txopt_put(opt);
2698     + }
2699     }
2700     EXPORT_SYMBOL_GPL(inet6_destroy_sock);
2701    
2702     @@ -659,7 +661,10 @@ int inet6_sk_rebuild_header(struct sock *sk)
2703     fl6.fl6_sport = inet->inet_sport;
2704     security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
2705    
2706     - final_p = fl6_update_dst(&fl6, np->opt, &final);
2707     + rcu_read_lock();
2708     + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt),
2709     + &final);
2710     + rcu_read_unlock();
2711    
2712     dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
2713     if (IS_ERR(dst)) {
2714     diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
2715     index 9aadd57808a5..a42a673aa547 100644
2716     --- a/net/ipv6/datagram.c
2717     +++ b/net/ipv6/datagram.c
2718     @@ -167,8 +167,10 @@ ipv4_connected:
2719    
2720     security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
2721    
2722     - opt = flowlabel ? flowlabel->opt : np->opt;
2723     + rcu_read_lock();
2724     + opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
2725     final_p = fl6_update_dst(&fl6, opt, &final);
2726     + rcu_read_unlock();
2727    
2728     dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
2729     err = 0;
2730     diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
2731     index ce203b0402be..ea7c4d64a00a 100644
2732     --- a/net/ipv6/exthdrs.c
2733     +++ b/net/ipv6/exthdrs.c
2734     @@ -727,6 +727,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
2735     *((char **)&opt2->dst1opt) += dif;
2736     if (opt2->srcrt)
2737     *((char **)&opt2->srcrt) += dif;
2738     + atomic_set(&opt2->refcnt, 1);
2739     }
2740     return opt2;
2741     }
2742     @@ -790,7 +791,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
2743     return ERR_PTR(-ENOBUFS);
2744    
2745     memset(opt2, 0, tot_len);
2746     -
2747     + atomic_set(&opt2->refcnt, 1);
2748     opt2->tot_len = tot_len;
2749     p = (char *)(opt2 + 1);
2750    
2751     diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
2752     index 6927f3fb5597..9beed302eb36 100644
2753     --- a/net/ipv6/inet6_connection_sock.c
2754     +++ b/net/ipv6/inet6_connection_sock.c
2755     @@ -77,7 +77,9 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
2756     memset(fl6, 0, sizeof(*fl6));
2757     fl6->flowi6_proto = IPPROTO_TCP;
2758     fl6->daddr = ireq->ir_v6_rmt_addr;
2759     - final_p = fl6_update_dst(fl6, np->opt, &final);
2760     + rcu_read_lock();
2761     + final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
2762     + rcu_read_unlock();
2763     fl6->saddr = ireq->ir_v6_loc_addr;
2764     fl6->flowi6_oif = ireq->ir_iif;
2765     fl6->flowi6_mark = ireq->ir_mark;
2766     @@ -207,7 +209,9 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
2767     fl6->fl6_dport = inet->inet_dport;
2768     security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
2769    
2770     - final_p = fl6_update_dst(fl6, np->opt, &final);
2771     + rcu_read_lock();
2772     + final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
2773     + rcu_read_unlock();
2774    
2775     dst = __inet6_csk_dst_check(sk, np->dst_cookie);
2776     if (!dst) {
2777     @@ -240,7 +244,8 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
2778     /* Restore final destination back after routing done */
2779     fl6.daddr = sk->sk_v6_daddr;
2780    
2781     - res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
2782     + res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
2783     + np->tclass);
2784     rcu_read_unlock();
2785     return res;
2786     }
2787     diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
2788     index eabffbb89795..137fca42aaa6 100644
2789     --- a/net/ipv6/ip6_tunnel.c
2790     +++ b/net/ipv6/ip6_tunnel.c
2791     @@ -177,7 +177,7 @@ void ip6_tnl_dst_reset(struct ip6_tnl *t)
2792     int i;
2793    
2794     for_each_possible_cpu(i)
2795     - ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL);
2796     + ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
2797     }
2798     EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
2799    
2800     diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
2801     index 0e004cc42a22..35eee72ab4af 100644
2802     --- a/net/ipv6/ip6mr.c
2803     +++ b/net/ipv6/ip6mr.c
2804     @@ -118,7 +118,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2805     int cmd);
2806     static int ip6mr_rtm_dumproute(struct sk_buff *skb,
2807     struct netlink_callback *cb);
2808     -static void mroute_clean_tables(struct mr6_table *mrt);
2809     +static void mroute_clean_tables(struct mr6_table *mrt, bool all);
2810     static void ipmr_expire_process(unsigned long arg);
2811    
2812     #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
2813     @@ -334,7 +334,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
2814     static void ip6mr_free_table(struct mr6_table *mrt)
2815     {
2816     del_timer_sync(&mrt->ipmr_expire_timer);
2817     - mroute_clean_tables(mrt);
2818     + mroute_clean_tables(mrt, true);
2819     kfree(mrt);
2820     }
2821    
2822     @@ -1542,7 +1542,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
2823     * Close the multicast socket, and clear the vif tables etc
2824     */
2825    
2826     -static void mroute_clean_tables(struct mr6_table *mrt)
2827     +static void mroute_clean_tables(struct mr6_table *mrt, bool all)
2828     {
2829     int i;
2830     LIST_HEAD(list);
2831     @@ -1552,8 +1552,9 @@ static void mroute_clean_tables(struct mr6_table *mrt)
2832     * Shut down all active vif entries
2833     */
2834     for (i = 0; i < mrt->maxvif; i++) {
2835     - if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
2836     - mif6_delete(mrt, i, &list);
2837     + if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
2838     + continue;
2839     + mif6_delete(mrt, i, &list);
2840     }
2841     unregister_netdevice_many(&list);
2842    
2843     @@ -1562,7 +1563,7 @@ static void mroute_clean_tables(struct mr6_table *mrt)
2844     */
2845     for (i = 0; i < MFC6_LINES; i++) {
2846     list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
2847     - if (c->mfc_flags & MFC_STATIC)
2848     + if (!all && (c->mfc_flags & MFC_STATIC))
2849     continue;
2850     write_lock_bh(&mrt_lock);
2851     list_del(&c->list);
2852     @@ -1625,7 +1626,7 @@ int ip6mr_sk_done(struct sock *sk)
2853     net->ipv6.devconf_all);
2854     write_unlock_bh(&mrt_lock);
2855    
2856     - mroute_clean_tables(mrt);
2857     + mroute_clean_tables(mrt, false);
2858     err = 0;
2859     break;
2860     }
2861     diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
2862     index 63e6956917c9..4449ad1f8114 100644
2863     --- a/net/ipv6/ipv6_sockglue.c
2864     +++ b/net/ipv6/ipv6_sockglue.c
2865     @@ -111,7 +111,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
2866     icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
2867     }
2868     }
2869     - opt = xchg(&inet6_sk(sk)->opt, opt);
2870     + opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt,
2871     + opt);
2872     sk_dst_reset(sk);
2873    
2874     return opt;
2875     @@ -231,9 +232,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
2876     sk->sk_socket->ops = &inet_dgram_ops;
2877     sk->sk_family = PF_INET;
2878     }
2879     - opt = xchg(&np->opt, NULL);
2880     - if (opt)
2881     - sock_kfree_s(sk, opt, opt->tot_len);
2882     + opt = xchg((__force struct ipv6_txoptions **)&np->opt,
2883     + NULL);
2884     + if (opt) {
2885     + atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
2886     + txopt_put(opt);
2887     + }
2888     pktopt = xchg(&np->pktoptions, NULL);
2889     kfree_skb(pktopt);
2890    
2891     @@ -403,7 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
2892     if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
2893     break;
2894    
2895     - opt = ipv6_renew_options(sk, np->opt, optname,
2896     + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
2897     + opt = ipv6_renew_options(sk, opt, optname,
2898     (struct ipv6_opt_hdr __user *)optval,
2899     optlen);
2900     if (IS_ERR(opt)) {
2901     @@ -432,8 +437,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
2902     retv = 0;
2903     opt = ipv6_update_options(sk, opt);
2904     sticky_done:
2905     - if (opt)
2906     - sock_kfree_s(sk, opt, opt->tot_len);
2907     + if (opt) {
2908     + atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
2909     + txopt_put(opt);
2910     + }
2911     break;
2912     }
2913    
2914     @@ -486,6 +493,7 @@ sticky_done:
2915     break;
2916    
2917     memset(opt, 0, sizeof(*opt));
2918     + atomic_set(&opt->refcnt, 1);
2919     opt->tot_len = sizeof(*opt) + optlen;
2920     retv = -EFAULT;
2921     if (copy_from_user(opt+1, optval, optlen))
2922     @@ -502,8 +510,10 @@ update:
2923     retv = 0;
2924     opt = ipv6_update_options(sk, opt);
2925     done:
2926     - if (opt)
2927     - sock_kfree_s(sk, opt, opt->tot_len);
2928     + if (opt) {
2929     + atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
2930     + txopt_put(opt);
2931     + }
2932     break;
2933     }
2934     case IPV6_UNICAST_HOPS:
2935     @@ -1110,10 +1120,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
2936     case IPV6_RTHDR:
2937     case IPV6_DSTOPTS:
2938     {
2939     + struct ipv6_txoptions *opt;
2940    
2941     lock_sock(sk);
2942     - len = ipv6_getsockopt_sticky(sk, np->opt,
2943     - optname, optval, len);
2944     + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
2945     + len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len);
2946     release_sock(sk);
2947     /* check if ipv6_getsockopt_sticky() returns err code */
2948     if (len < 0)
2949     diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
2950     index 083b2927fc67..41e3b5ee8d0b 100644
2951     --- a/net/ipv6/mcast.c
2952     +++ b/net/ipv6/mcast.c
2953     @@ -1651,7 +1651,6 @@ out:
2954     if (!err) {
2955     ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
2956     ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
2957     - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
2958     } else {
2959     IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
2960     }
2961     @@ -2014,7 +2013,6 @@ out:
2962     if (!err) {
2963     ICMP6MSGOUT_INC_STATS(net, idev, type);
2964     ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
2965     - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len);
2966     } else
2967     IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
2968    
2969     diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
2970     index 64a71354b069..9ad46cd7930d 100644
2971     --- a/net/ipv6/ndisc.c
2972     +++ b/net/ipv6/ndisc.c
2973     @@ -553,8 +553,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
2974    
2975     void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
2976     const struct in6_addr *solicit,
2977     - const struct in6_addr *daddr, const struct in6_addr *saddr,
2978     - struct sk_buff *oskb)
2979     + const struct in6_addr *daddr, const struct in6_addr *saddr)
2980     {
2981     struct sk_buff *skb;
2982     struct in6_addr addr_buf;
2983     @@ -590,9 +589,6 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
2984     ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
2985     dev->dev_addr);
2986    
2987     - if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE) && oskb)
2988     - skb_dst_copy(skb, oskb);
2989     -
2990     ndisc_send_skb(skb, daddr, saddr);
2991     }
2992    
2993     @@ -679,12 +675,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
2994     "%s: trying to ucast probe in NUD_INVALID: %pI6\n",
2995     __func__, target);
2996     }
2997     - ndisc_send_ns(dev, neigh, target, target, saddr, skb);
2998     + ndisc_send_ns(dev, neigh, target, target, saddr);
2999     } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
3000     neigh_app_ns(neigh);
3001     } else {
3002     addrconf_addr_solict_mult(target, &mcaddr);
3003     - ndisc_send_ns(dev, NULL, target, &mcaddr, saddr, skb);
3004     + ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
3005     }
3006     }
3007    
3008     diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
3009     index c7196ad1d69f..dc50143f50f2 100644
3010     --- a/net/ipv6/netfilter/nf_conntrack_reasm.c
3011     +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
3012     @@ -190,7 +190,7 @@ static void nf_ct_frag6_expire(unsigned long data)
3013     /* Creation primitives. */
3014     static inline struct frag_queue *fq_find(struct net *net, __be32 id,
3015     u32 user, struct in6_addr *src,
3016     - struct in6_addr *dst, u8 ecn)
3017     + struct in6_addr *dst, int iif, u8 ecn)
3018     {
3019     struct inet_frag_queue *q;
3020     struct ip6_create_arg arg;
3021     @@ -200,6 +200,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
3022     arg.user = user;
3023     arg.src = src;
3024     arg.dst = dst;
3025     + arg.iif = iif;
3026     arg.ecn = ecn;
3027    
3028     local_bh_disable();
3029     @@ -603,7 +604,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
3030     fhdr = (struct frag_hdr *)skb_transport_header(clone);
3031    
3032     fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
3033     - ip6_frag_ecn(hdr));
3034     + skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
3035     if (fq == NULL) {
3036     pr_debug("Can't find and can't create new queue\n");
3037     goto ret_orig;
3038     diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
3039     index fdbada1569a3..fe977299551e 100644
3040     --- a/net/ipv6/raw.c
3041     +++ b/net/ipv6/raw.c
3042     @@ -732,6 +732,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,
3043    
3044     static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
3045     {
3046     + struct ipv6_txoptions *opt_to_free = NULL;
3047     struct ipv6_txoptions opt_space;
3048     DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
3049     struct in6_addr *daddr, *final_p, final;
3050     @@ -838,8 +839,10 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
3051     if (!(opt->opt_nflen|opt->opt_flen))
3052     opt = NULL;
3053     }
3054     - if (!opt)
3055     - opt = np->opt;
3056     + if (!opt) {
3057     + opt = txopt_get(np);
3058     + opt_to_free = opt;
3059     + }
3060     if (flowlabel)
3061     opt = fl6_merge_options(&opt_space, flowlabel, opt);
3062     opt = ipv6_fixup_options(&opt_space, opt);
3063     @@ -905,6 +908,7 @@ done:
3064     dst_release(dst);
3065     out:
3066     fl6_sock_release(flowlabel);
3067     + txopt_put(opt_to_free);
3068     return err < 0 ? err : len;
3069     do_confirm:
3070     dst_confirm(dst);
3071     diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
3072     index f1159bb76e0a..04013a910ce5 100644
3073     --- a/net/ipv6/reassembly.c
3074     +++ b/net/ipv6/reassembly.c
3075     @@ -108,7 +108,10 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
3076     return fq->id == arg->id &&
3077     fq->user == arg->user &&
3078     ipv6_addr_equal(&fq->saddr, arg->src) &&
3079     - ipv6_addr_equal(&fq->daddr, arg->dst);
3080     + ipv6_addr_equal(&fq->daddr, arg->dst) &&
3081     + (arg->iif == fq->iif ||
3082     + !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
3083     + IPV6_ADDR_LINKLOCAL)));
3084     }
3085     EXPORT_SYMBOL(ip6_frag_match);
3086    
3087     @@ -180,7 +183,7 @@ static void ip6_frag_expire(unsigned long data)
3088    
3089     static struct frag_queue *
3090     fq_find(struct net *net, __be32 id, const struct in6_addr *src,
3091     - const struct in6_addr *dst, u8 ecn)
3092     + const struct in6_addr *dst, int iif, u8 ecn)
3093     {
3094     struct inet_frag_queue *q;
3095     struct ip6_create_arg arg;
3096     @@ -190,6 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src,
3097     arg.user = IP6_DEFRAG_LOCAL_DELIVER;
3098     arg.src = src;
3099     arg.dst = dst;
3100     + arg.iif = iif;
3101     arg.ecn = ecn;
3102    
3103     hash = inet6_hash_frag(id, src, dst);
3104     @@ -551,7 +555,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
3105     }
3106    
3107     fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
3108     - ip6_frag_ecn(hdr));
3109     + skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
3110     if (fq) {
3111     int ret;
3112    
3113     diff --git a/net/ipv6/route.c b/net/ipv6/route.c
3114     index 946880ad48ac..fd0e6746d0cf 100644
3115     --- a/net/ipv6/route.c
3116     +++ b/net/ipv6/route.c
3117     @@ -403,6 +403,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
3118     }
3119     }
3120    
3121     +static bool __rt6_check_expired(const struct rt6_info *rt)
3122     +{
3123     + if (rt->rt6i_flags & RTF_EXPIRES)
3124     + return time_after(jiffies, rt->dst.expires);
3125     + else
3126     + return false;
3127     +}
3128     +
3129     static bool rt6_check_expired(const struct rt6_info *rt)
3130     {
3131     if (rt->rt6i_flags & RTF_EXPIRES) {
3132     @@ -538,7 +546,7 @@ static void rt6_probe_deferred(struct work_struct *w)
3133     container_of(w, struct __rt6_probe_work, work);
3134    
3135     addrconf_addr_solict_mult(&work->target, &mcaddr);
3136     - ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
3137     + ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
3138     dev_put(work->dev);
3139     kfree(work);
3140     }
3141     @@ -1270,7 +1278,8 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
3142    
3143     static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
3144     {
3145     - if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3146     + if (!__rt6_check_expired(rt) &&
3147     + rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3148     rt6_check((struct rt6_info *)(rt->dst.from), cookie))
3149     return &rt->dst;
3150     else
3151     @@ -1290,7 +1299,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
3152    
3153     rt6_dst_from_metrics_check(rt);
3154    
3155     - if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
3156     + if (rt->rt6i_flags & RTF_PCPU ||
3157     + (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
3158     return rt6_dst_from_check(rt, cookie);
3159     else
3160     return rt6_check(rt, cookie);
3161     @@ -1340,6 +1350,12 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
3162     rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
3163     }
3164    
3165     +static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
3166     +{
3167     + return !(rt->rt6i_flags & RTF_CACHE) &&
3168     + (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
3169     +}
3170     +
3171     static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
3172     const struct ipv6hdr *iph, u32 mtu)
3173     {
3174     @@ -1353,7 +1369,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
3175     if (mtu >= dst_mtu(dst))
3176     return;
3177    
3178     - if (rt6->rt6i_flags & RTF_CACHE) {
3179     + if (!rt6_cache_allowed_for_pmtu(rt6)) {
3180     rt6_do_update_pmtu(rt6, mtu);
3181     } else {
3182     const struct in6_addr *daddr, *saddr;
3183     diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
3184     index 0909f4e0d53c..f30bfdcdea54 100644
3185     --- a/net/ipv6/syncookies.c
3186     +++ b/net/ipv6/syncookies.c
3187     @@ -225,7 +225,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
3188     memset(&fl6, 0, sizeof(fl6));
3189     fl6.flowi6_proto = IPPROTO_TCP;
3190     fl6.daddr = ireq->ir_v6_rmt_addr;
3191     - final_p = fl6_update_dst(&fl6, np->opt, &final);
3192     + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
3193     fl6.saddr = ireq->ir_v6_loc_addr;
3194     fl6.flowi6_oif = sk->sk_bound_dev_if;
3195     fl6.flowi6_mark = ireq->ir_mark;
3196     diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
3197     index 97d9314ea361..9e9b77bd2d0a 100644
3198     --- a/net/ipv6/tcp_ipv6.c
3199     +++ b/net/ipv6/tcp_ipv6.c
3200     @@ -120,6 +120,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
3201     struct ipv6_pinfo *np = inet6_sk(sk);
3202     struct tcp_sock *tp = tcp_sk(sk);
3203     struct in6_addr *saddr = NULL, *final_p, final;
3204     + struct ipv6_txoptions *opt;
3205     struct flowi6 fl6;
3206     struct dst_entry *dst;
3207     int addr_type;
3208     @@ -235,7 +236,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
3209     fl6.fl6_dport = usin->sin6_port;
3210     fl6.fl6_sport = inet->inet_sport;
3211    
3212     - final_p = fl6_update_dst(&fl6, np->opt, &final);
3213     + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
3214     + final_p = fl6_update_dst(&fl6, opt, &final);
3215    
3216     security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
3217    
3218     @@ -263,9 +265,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
3219     tcp_fetch_timewait_stamp(sk, dst);
3220    
3221     icsk->icsk_ext_hdr_len = 0;
3222     - if (np->opt)
3223     - icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
3224     - np->opt->opt_nflen);
3225     + if (opt)
3226     + icsk->icsk_ext_hdr_len = opt->opt_flen +
3227     + opt->opt_nflen;
3228    
3229     tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
3230    
3231     @@ -461,7 +463,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
3232     fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
3233    
3234     skb_set_queue_mapping(skb, queue_mapping);
3235     - err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
3236     + err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
3237     + np->tclass);
3238     err = net_xmit_eval(err);
3239     }
3240    
3241     @@ -991,6 +994,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
3242     struct inet_request_sock *ireq;
3243     struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
3244     struct tcp6_sock *newtcp6sk;
3245     + struct ipv6_txoptions *opt;
3246     struct inet_sock *newinet;
3247     struct tcp_sock *newtp;
3248     struct sock *newsk;
3249     @@ -1126,13 +1130,15 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
3250     but we make one more one thing there: reattach optmem
3251     to newsk.
3252     */
3253     - if (np->opt)
3254     - newnp->opt = ipv6_dup_options(newsk, np->opt);
3255     -
3256     + opt = rcu_dereference(np->opt);
3257     + if (opt) {
3258     + opt = ipv6_dup_options(newsk, opt);
3259     + RCU_INIT_POINTER(newnp->opt, opt);
3260     + }
3261     inet_csk(newsk)->icsk_ext_hdr_len = 0;
3262     - if (newnp->opt)
3263     - inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
3264     - newnp->opt->opt_flen);
3265     + if (opt)
3266     + inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
3267     + opt->opt_flen;
3268    
3269     tcp_ca_openreq_child(newsk, dst);
3270    
3271     diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
3272     index 0aba654f5b91..8379fc2f4b1d 100644
3273     --- a/net/ipv6/udp.c
3274     +++ b/net/ipv6/udp.c
3275     @@ -1107,6 +1107,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
3276     DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
3277     struct in6_addr *daddr, *final_p, final;
3278     struct ipv6_txoptions *opt = NULL;
3279     + struct ipv6_txoptions *opt_to_free = NULL;
3280     struct ip6_flowlabel *flowlabel = NULL;
3281     struct flowi6 fl6;
3282     struct dst_entry *dst;
3283     @@ -1260,8 +1261,10 @@ do_udp_sendmsg:
3284     opt = NULL;
3285     connected = 0;
3286     }
3287     - if (!opt)
3288     - opt = np->opt;
3289     + if (!opt) {
3290     + opt = txopt_get(np);
3291     + opt_to_free = opt;
3292     + }
3293     if (flowlabel)
3294     opt = fl6_merge_options(&opt_space, flowlabel, opt);
3295     opt = ipv6_fixup_options(&opt_space, opt);
3296     @@ -1370,6 +1373,7 @@ release_dst:
3297     out:
3298     dst_release(dst);
3299     fl6_sock_release(flowlabel);
3300     + txopt_put(opt_to_free);
3301     if (!err)
3302     return len;
3303     /*
3304     diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
3305     index d1ded3777815..0ce9da948ad7 100644
3306     --- a/net/l2tp/l2tp_ip6.c
3307     +++ b/net/l2tp/l2tp_ip6.c
3308     @@ -486,6 +486,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
3309     DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name);
3310     struct in6_addr *daddr, *final_p, final;
3311     struct ipv6_pinfo *np = inet6_sk(sk);
3312     + struct ipv6_txoptions *opt_to_free = NULL;
3313     struct ipv6_txoptions *opt = NULL;
3314     struct ip6_flowlabel *flowlabel = NULL;
3315     struct dst_entry *dst = NULL;
3316     @@ -575,8 +576,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
3317     opt = NULL;
3318     }
3319    
3320     - if (opt == NULL)
3321     - opt = np->opt;
3322     + if (!opt) {
3323     + opt = txopt_get(np);
3324     + opt_to_free = opt;
3325     + }
3326     if (flowlabel)
3327     opt = fl6_merge_options(&opt_space, flowlabel, opt);
3328     opt = ipv6_fixup_options(&opt_space, opt);
3329     @@ -631,6 +634,7 @@ done:
3330     dst_release(dst);
3331     out:
3332     fl6_sock_release(flowlabel);
3333     + txopt_put(opt_to_free);
3334    
3335     return err < 0 ? err : len;
3336    
3337     diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
3338     index a7a80a6b77b0..653d073bae45 100644
3339     --- a/net/openvswitch/dp_notify.c
3340     +++ b/net/openvswitch/dp_notify.c
3341     @@ -58,7 +58,7 @@ void ovs_dp_notify_wq(struct work_struct *work)
3342     struct hlist_node *n;
3343    
3344     hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
3345     - if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
3346     + if (vport->ops->type == OVS_VPORT_TYPE_INTERNAL)
3347     continue;
3348    
3349     if (!(vport->dev->priv_flags & IFF_OVS_DATAPATH))
3350     diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
3351     index f7e8dcce7ada..ac14c488669c 100644
3352     --- a/net/openvswitch/vport-netdev.c
3353     +++ b/net/openvswitch/vport-netdev.c
3354     @@ -180,9 +180,13 @@ void ovs_netdev_tunnel_destroy(struct vport *vport)
3355     if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
3356     ovs_netdev_detach_dev(vport);
3357    
3358     - /* Early release so we can unregister the device */
3359     + /* We can be invoked by both explicit vport deletion and
3360     + * underlying netdev deregistration; delete the link only
3361     + * if it's not already shutting down.
3362     + */
3363     + if (vport->dev->reg_state == NETREG_REGISTERED)
3364     + rtnl_delete_link(vport->dev);
3365     dev_put(vport->dev);
3366     - rtnl_delete_link(vport->dev);
3367     vport->dev = NULL;
3368     rtnl_unlock();
3369    
3370     diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
3371     index 27b2898f275c..4695a36eeca3 100644
3372     --- a/net/packet/af_packet.c
3373     +++ b/net/packet/af_packet.c
3374     @@ -1741,6 +1741,20 @@ static void fanout_release(struct sock *sk)
3375     kfree_rcu(po->rollover, rcu);
3376     }
3377    
3378     +static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
3379     + struct sk_buff *skb)
3380     +{
3381     + /* Earlier code assumed this would be a VLAN pkt, double-check
3382     + * this now that we have the actual packet in hand. We can only
3383     + * do this check on Ethernet devices.
3384     + */
3385     + if (unlikely(dev->type != ARPHRD_ETHER))
3386     + return false;
3387     +
3388     + skb_reset_mac_header(skb);
3389     + return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
3390     +}
3391     +
3392     static const struct proto_ops packet_ops;
3393    
3394     static const struct proto_ops packet_ops_spkt;
3395     @@ -1902,18 +1916,10 @@ retry:
3396     goto retry;
3397     }
3398    
3399     - if (len > (dev->mtu + dev->hard_header_len + extra_len)) {
3400     - /* Earlier code assumed this would be a VLAN pkt,
3401     - * double-check this now that we have the actual
3402     - * packet in hand.
3403     - */
3404     - struct ethhdr *ehdr;
3405     - skb_reset_mac_header(skb);
3406     - ehdr = eth_hdr(skb);
3407     - if (ehdr->h_proto != htons(ETH_P_8021Q)) {
3408     - err = -EMSGSIZE;
3409     - goto out_unlock;
3410     - }
3411     + if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
3412     + !packet_extra_vlan_len_allowed(dev, skb)) {
3413     + err = -EMSGSIZE;
3414     + goto out_unlock;
3415     }
3416    
3417     skb->protocol = proto;
3418     @@ -2332,6 +2338,15 @@ static bool ll_header_truncated(const struct net_device *dev, int len)
3419     return false;
3420     }
3421    
3422     +static void tpacket_set_protocol(const struct net_device *dev,
3423     + struct sk_buff *skb)
3424     +{
3425     + if (dev->type == ARPHRD_ETHER) {
3426     + skb_reset_mac_header(skb);
3427     + skb->protocol = eth_hdr(skb)->h_proto;
3428     + }
3429     +}
3430     +
3431     static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
3432     void *frame, struct net_device *dev, int size_max,
3433     __be16 proto, unsigned char *addr, int hlen)
3434     @@ -2368,8 +2383,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
3435     skb_reserve(skb, hlen);
3436     skb_reset_network_header(skb);
3437    
3438     - if (!packet_use_direct_xmit(po))
3439     - skb_probe_transport_header(skb, 0);
3440     if (unlikely(po->tp_tx_has_off)) {
3441     int off_min, off_max, off;
3442     off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
3443     @@ -2415,6 +2428,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
3444     dev->hard_header_len);
3445     if (unlikely(err))
3446     return err;
3447     + if (!skb->protocol)
3448     + tpacket_set_protocol(dev, skb);
3449    
3450     data += dev->hard_header_len;
3451     to_write -= dev->hard_header_len;
3452     @@ -2449,6 +2464,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
3453     len = ((to_write > len_max) ? len_max : to_write);
3454     }
3455    
3456     + skb_probe_transport_header(skb, 0);
3457     +
3458     return tp_len;
3459     }
3460    
3461     @@ -2493,12 +2510,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
3462     if (unlikely(!(dev->flags & IFF_UP)))
3463     goto out_put;
3464    
3465     - reserve = dev->hard_header_len + VLAN_HLEN;
3466     + if (po->sk.sk_socket->type == SOCK_RAW)
3467     + reserve = dev->hard_header_len;
3468     size_max = po->tx_ring.frame_size
3469     - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
3470    
3471     - if (size_max > dev->mtu + reserve)
3472     - size_max = dev->mtu + reserve;
3473     + if (size_max > dev->mtu + reserve + VLAN_HLEN)
3474     + size_max = dev->mtu + reserve + VLAN_HLEN;
3475    
3476     do {
3477     ph = packet_current_frame(po, &po->tx_ring,
3478     @@ -2525,18 +2543,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
3479     tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
3480     addr, hlen);
3481     if (likely(tp_len >= 0) &&
3482     - tp_len > dev->mtu + dev->hard_header_len) {
3483     - struct ethhdr *ehdr;
3484     - /* Earlier code assumed this would be a VLAN pkt,
3485     - * double-check this now that we have the actual
3486     - * packet in hand.
3487     - */
3488     + tp_len > dev->mtu + reserve &&
3489     + !packet_extra_vlan_len_allowed(dev, skb))
3490     + tp_len = -EMSGSIZE;
3491    
3492     - skb_reset_mac_header(skb);
3493     - ehdr = eth_hdr(skb);
3494     - if (ehdr->h_proto != htons(ETH_P_8021Q))
3495     - tp_len = -EMSGSIZE;
3496     - }
3497     if (unlikely(tp_len < 0)) {
3498     if (po->tp_loss) {
3499     __packet_set_status(po, ph,
3500     @@ -2757,18 +2767,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
3501    
3502     sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
3503    
3504     - if (!gso_type && (len > dev->mtu + reserve + extra_len)) {
3505     - /* Earlier code assumed this would be a VLAN pkt,
3506     - * double-check this now that we have the actual
3507     - * packet in hand.
3508     - */
3509     - struct ethhdr *ehdr;
3510     - skb_reset_mac_header(skb);
3511     - ehdr = eth_hdr(skb);
3512     - if (ehdr->h_proto != htons(ETH_P_8021Q)) {
3513     - err = -EMSGSIZE;
3514     - goto out_free;
3515     - }
3516     + if (!gso_type && (len > dev->mtu + reserve + extra_len) &&
3517     + !packet_extra_vlan_len_allowed(dev, skb)) {
3518     + err = -EMSGSIZE;
3519     + goto out_free;
3520     }
3521    
3522     skb->protocol = proto;
3523     @@ -2799,8 +2801,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
3524     len += vnet_hdr_len;
3525     }
3526    
3527     - if (!packet_use_direct_xmit(po))
3528     - skb_probe_transport_header(skb, reserve);
3529     + skb_probe_transport_header(skb, reserve);
3530     +
3531     if (unlikely(extra_len == 4))
3532     skb->no_fcs = 1;
3533    
3534     diff --git a/net/rds/connection.c b/net/rds/connection.c
3535     index 49adeef8090c..9b2de5e67d79 100644
3536     --- a/net/rds/connection.c
3537     +++ b/net/rds/connection.c
3538     @@ -190,12 +190,6 @@ new_conn:
3539     }
3540     }
3541    
3542     - if (trans == NULL) {
3543     - kmem_cache_free(rds_conn_slab, conn);
3544     - conn = ERR_PTR(-ENODEV);
3545     - goto out;
3546     - }
3547     -
3548     conn->c_trans = trans;
3549    
3550     ret = trans->conn_alloc(conn, gfp);
3551     diff --git a/net/rds/send.c b/net/rds/send.c
3552     index 4df61a515b83..859de6f32521 100644
3553     --- a/net/rds/send.c
3554     +++ b/net/rds/send.c
3555     @@ -1009,11 +1009,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
3556     release_sock(sk);
3557     }
3558    
3559     - /* racing with another thread binding seems ok here */
3560     + lock_sock(sk);
3561     if (daddr == 0 || rs->rs_bound_addr == 0) {
3562     + release_sock(sk);
3563     ret = -ENOTCONN; /* XXX not a great errno */
3564     goto out;
3565     }
3566     + release_sock(sk);
3567    
3568     if (payload_len > rds_sk_sndbuf(rs)) {
3569     ret = -EMSGSIZE;
3570     diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
3571     index f43c8f33f09e..7ec667dd4ce1 100644
3572     --- a/net/sched/sch_api.c
3573     +++ b/net/sched/sch_api.c
3574     @@ -253,7 +253,8 @@ int qdisc_set_default(const char *name)
3575     }
3576    
3577     /* We know handle. Find qdisc among all qdisc's attached to device
3578     - (root qdisc, all its children, children of children etc.)
3579     + * (root qdisc, all its children, children of children etc.)
3580     + * Note: caller either uses rtnl or rcu_read_lock()
3581     */
3582    
3583     static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
3584     @@ -264,7 +265,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
3585     root->handle == handle)
3586     return root;
3587    
3588     - list_for_each_entry(q, &root->list, list) {
3589     + list_for_each_entry_rcu(q, &root->list, list) {
3590     if (q->handle == handle)
3591     return q;
3592     }
3593     @@ -277,15 +278,18 @@ void qdisc_list_add(struct Qdisc *q)
3594     struct Qdisc *root = qdisc_dev(q)->qdisc;
3595    
3596     WARN_ON_ONCE(root == &noop_qdisc);
3597     - list_add_tail(&q->list, &root->list);
3598     + ASSERT_RTNL();
3599     + list_add_tail_rcu(&q->list, &root->list);
3600     }
3601     }
3602     EXPORT_SYMBOL(qdisc_list_add);
3603    
3604     void qdisc_list_del(struct Qdisc *q)
3605     {
3606     - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
3607     - list_del(&q->list);
3608     + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
3609     + ASSERT_RTNL();
3610     + list_del_rcu(&q->list);
3611     + }
3612     }
3613     EXPORT_SYMBOL(qdisc_list_del);
3614    
3615     @@ -750,14 +754,18 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
3616     if (n == 0)
3617     return;
3618     drops = max_t(int, n, 0);
3619     + rcu_read_lock();
3620     while ((parentid = sch->parent)) {
3621     if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
3622     - return;
3623     + break;
3624    
3625     + if (sch->flags & TCQ_F_NOPARENT)
3626     + break;
3627     + /* TODO: perform the search on a per txq basis */
3628     sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
3629     if (sch == NULL) {
3630     - WARN_ON(parentid != TC_H_ROOT);
3631     - return;
3632     + WARN_ON_ONCE(parentid != TC_H_ROOT);
3633     + break;
3634     }
3635     cops = sch->ops->cl_ops;
3636     if (cops->qlen_notify) {
3637     @@ -768,6 +776,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
3638     sch->q.qlen -= n;
3639     __qdisc_qstats_drop(sch, drops);
3640     }
3641     + rcu_read_unlock();
3642     }
3643     EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
3644    
3645     @@ -941,7 +950,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
3646     }
3647     lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
3648     if (!netif_is_multiqueue(dev))
3649     - sch->flags |= TCQ_F_ONETXQUEUE;
3650     + sch->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
3651     }
3652    
3653     sch->handle = handle;
3654     diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
3655     index cb5d4ad32946..e82a1ad80aa5 100644
3656     --- a/net/sched/sch_generic.c
3657     +++ b/net/sched/sch_generic.c
3658     @@ -737,7 +737,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
3659     return;
3660     }
3661     if (!netif_is_multiqueue(dev))
3662     - qdisc->flags |= TCQ_F_ONETXQUEUE;
3663     + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
3664     dev_queue->qdisc_sleeping = qdisc;
3665     }
3666    
3667     diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
3668     index f3cbaecd283a..3e82f047caaf 100644
3669     --- a/net/sched/sch_mq.c
3670     +++ b/net/sched/sch_mq.c
3671     @@ -63,7 +63,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
3672     if (qdisc == NULL)
3673     goto err;
3674     priv->qdiscs[ntx] = qdisc;
3675     - qdisc->flags |= TCQ_F_ONETXQUEUE;
3676     + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
3677     }
3678    
3679     sch->flags |= TCQ_F_MQROOT;
3680     @@ -156,7 +156,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
3681    
3682     *old = dev_graft_qdisc(dev_queue, new);
3683     if (new)
3684     - new->flags |= TCQ_F_ONETXQUEUE;
3685     + new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
3686     if (dev->flags & IFF_UP)
3687     dev_activate(dev);
3688     return 0;
3689     diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
3690     index 3811a745452c..ad70ecf57ce7 100644
3691     --- a/net/sched/sch_mqprio.c
3692     +++ b/net/sched/sch_mqprio.c
3693     @@ -132,7 +132,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
3694     goto err;
3695     }
3696     priv->qdiscs[i] = qdisc;
3697     - qdisc->flags |= TCQ_F_ONETXQUEUE;
3698     + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
3699     }
3700    
3701     /* If the mqprio options indicate that hardware should own
3702     @@ -209,7 +209,7 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
3703     *old = dev_graft_qdisc(dev_queue, new);
3704    
3705     if (new)
3706     - new->flags |= TCQ_F_ONETXQUEUE;
3707     + new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
3708    
3709     if (dev->flags & IFF_UP)
3710     dev_activate(dev);
3711     diff --git a/net/sctp/auth.c b/net/sctp/auth.c
3712     index 4f15b7d730e1..1543e39f47c3 100644
3713     --- a/net/sctp/auth.c
3714     +++ b/net/sctp/auth.c
3715     @@ -809,8 +809,8 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
3716     if (!has_sha1)
3717     return -EINVAL;
3718    
3719     - memcpy(ep->auth_hmacs_list->hmac_ids, &hmacs->shmac_idents[0],
3720     - hmacs->shmac_num_idents * sizeof(__u16));
3721     + for (i = 0; i < hmacs->shmac_num_idents; i++)
3722     + ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]);
3723     ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) +
3724     hmacs->shmac_num_idents * sizeof(__u16));
3725     return 0;
3726     diff --git a/net/sctp/socket.c b/net/sctp/socket.c
3727     index 17bef01b9aa3..3ec88be0faec 100644
3728     --- a/net/sctp/socket.c
3729     +++ b/net/sctp/socket.c
3730     @@ -7375,6 +7375,13 @@ struct proto sctp_prot = {
3731    
3732     #if IS_ENABLED(CONFIG_IPV6)
3733    
3734     +#include <net/transp_v6.h>
3735     +static void sctp_v6_destroy_sock(struct sock *sk)
3736     +{
3737     + sctp_destroy_sock(sk);
3738     + inet6_destroy_sock(sk);
3739     +}
3740     +
3741     struct proto sctpv6_prot = {
3742     .name = "SCTPv6",
3743     .owner = THIS_MODULE,
3744     @@ -7384,7 +7391,7 @@ struct proto sctpv6_prot = {
3745     .accept = sctp_accept,
3746     .ioctl = sctp_ioctl,
3747     .init = sctp_init_sock,
3748     - .destroy = sctp_destroy_sock,
3749     + .destroy = sctp_v6_destroy_sock,
3750     .shutdown = sctp_shutdown,
3751     .setsockopt = sctp_setsockopt,
3752     .getsockopt = sctp_getsockopt,
3753     diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
3754     index cd7c5f131e72..86f2e7c44694 100644
3755     --- a/net/tipc/udp_media.c
3756     +++ b/net/tipc/udp_media.c
3757     @@ -159,8 +159,11 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
3758     struct sk_buff *clone;
3759     struct rtable *rt;
3760    
3761     - if (skb_headroom(skb) < UDP_MIN_HEADROOM)
3762     - pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
3763     + if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
3764     + err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
3765     + if (err)
3766     + goto tx_error;
3767     + }
3768    
3769     clone = skb_clone(skb, GFP_ATOMIC);
3770     skb_set_inner_protocol(clone, htons(ETH_P_TIPC));
3771     diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
3772     index 94f658235fb4..128b0982c96b 100644
3773     --- a/net/unix/af_unix.c
3774     +++ b/net/unix/af_unix.c
3775     @@ -326,6 +326,118 @@ found:
3776     return s;
3777     }
3778    
3779     +/* Support code for asymmetrically connected dgram sockets
3780     + *
3781     + * If a datagram socket is connected to a socket not itself connected
3782     + * to the first socket (eg, /dev/log), clients may only enqueue more
3783     + * messages if the present receive queue of the server socket is not
3784     + * "too large". This means there's a second writeability condition
3785     + * poll and sendmsg need to test. The dgram recv code will do a wake
3786     + * up on the peer_wait wait queue of a socket upon reception of a
3787     + * datagram which needs to be propagated to sleeping would-be writers
3788     + * since these might not have sent anything so far. This can't be
3789     + * accomplished via poll_wait because the lifetime of the server
3790     + * socket might be less than that of its clients if these break their
3791     + * association with it or if the server socket is closed while clients
3792     + * are still connected to it and there's no way to inform "a polling
3793     + * implementation" that it should let go of a certain wait queue
3794     + *
3795     + * In order to propagate a wake up, a wait_queue_t of the client
3796     + * socket is enqueued on the peer_wait queue of the server socket
3797     + * whose wake function does a wake_up on the ordinary client socket
3798     + * wait queue. This connection is established whenever a write (or
3799     + * poll for write) hit the flow control condition and broken when the
3800     + * association to the server socket is dissolved or after a wake up
3801     + * was relayed.
3802     + */
3803     +
3804     +static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
3805     + void *key)
3806     +{
3807     + struct unix_sock *u;
3808     + wait_queue_head_t *u_sleep;
3809     +
3810     + u = container_of(q, struct unix_sock, peer_wake);
3811     +
3812     + __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
3813     + q);
3814     + u->peer_wake.private = NULL;
3815     +
3816     + /* relaying can only happen while the wq still exists */
3817     + u_sleep = sk_sleep(&u->sk);
3818     + if (u_sleep)
3819     + wake_up_interruptible_poll(u_sleep, key);
3820     +
3821     + return 0;
3822     +}
3823     +
3824     +static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
3825     +{
3826     + struct unix_sock *u, *u_other;
3827     + int rc;
3828     +
3829     + u = unix_sk(sk);
3830     + u_other = unix_sk(other);
3831     + rc = 0;
3832     + spin_lock(&u_other->peer_wait.lock);
3833     +
3834     + if (!u->peer_wake.private) {
3835     + u->peer_wake.private = other;
3836     + __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
3837     +
3838     + rc = 1;
3839     + }
3840     +
3841     + spin_unlock(&u_other->peer_wait.lock);
3842     + return rc;
3843     +}
3844     +
3845     +static void unix_dgram_peer_wake_disconnect(struct sock *sk,
3846     + struct sock *other)
3847     +{
3848     + struct unix_sock *u, *u_other;
3849     +
3850     + u = unix_sk(sk);
3851     + u_other = unix_sk(other);
3852     + spin_lock(&u_other->peer_wait.lock);
3853     +
3854     + if (u->peer_wake.private == other) {
3855     + __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
3856     + u->peer_wake.private = NULL;
3857     + }
3858     +
3859     + spin_unlock(&u_other->peer_wait.lock);
3860     +}
3861     +
3862     +static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
3863     + struct sock *other)
3864     +{
3865     + unix_dgram_peer_wake_disconnect(sk, other);
3866     + wake_up_interruptible_poll(sk_sleep(sk),
3867     + POLLOUT |
3868     + POLLWRNORM |
3869     + POLLWRBAND);
3870     +}
3871     +
3872     +/* preconditions:
3873     + * - unix_peer(sk) == other
3874     + * - association is stable
3875     + */
3876     +static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
3877     +{
3878     + int connected;
3879     +
3880     + connected = unix_dgram_peer_wake_connect(sk, other);
3881     +
3882     + if (unix_recvq_full(other))
3883     + return 1;
3884     +
3885     + if (connected)
3886     + unix_dgram_peer_wake_disconnect(sk, other);
3887     +
3888     + return 0;
3889     +}
3890     +
3891     static inline int unix_writable(struct sock *sk)
3892     {
3893     return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
3894     @@ -430,6 +542,8 @@ static void unix_release_sock(struct sock *sk, int embrion)
3895     skpair->sk_state_change(skpair);
3896     sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
3897     }
3898     +
3899     + unix_dgram_peer_wake_disconnect(sk, skpair);
3900     sock_put(skpair); /* It may now die */
3901     unix_peer(sk) = NULL;
3902     }
3903     @@ -440,6 +554,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
3904     if (state == TCP_LISTEN)
3905     unix_release_sock(skb->sk, 1);
3906     /* passed fds are erased in the kfree_skb hook */
3907     + UNIXCB(skb).consumed = skb->len;
3908     kfree_skb(skb);
3909     }
3910    
3911     @@ -664,6 +779,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
3912     INIT_LIST_HEAD(&u->link);
3913     mutex_init(&u->readlock); /* single task reading lock */
3914     init_waitqueue_head(&u->peer_wait);
3915     + init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
3916     unix_insert_socket(unix_sockets_unbound(sk), sk);
3917     out:
3918     if (sk == NULL)
3919     @@ -1031,6 +1147,8 @@ restart:
3920     if (unix_peer(sk)) {
3921     struct sock *old_peer = unix_peer(sk);
3922     unix_peer(sk) = other;
3923     + unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
3924     +
3925     unix_state_double_unlock(sk, other);
3926    
3927     if (other != old_peer)
3928     @@ -1432,6 +1550,14 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
3929     return err;
3930     }
3931    
3932     +static bool unix_passcred_enabled(const struct socket *sock,
3933     + const struct sock *other)
3934     +{
3935     + return test_bit(SOCK_PASSCRED, &sock->flags) ||
3936     + !other->sk_socket ||
3937     + test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
3938     +}
3939     +
3940     /*
3941     * Some apps rely on write() giving SCM_CREDENTIALS
3942     * We include credentials if source or destination socket
3943     @@ -1442,14 +1568,41 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
3944     {
3945     if (UNIXCB(skb).pid)
3946     return;
3947     - if (test_bit(SOCK_PASSCRED, &sock->flags) ||
3948     - !other->sk_socket ||
3949     - test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
3950     + if (unix_passcred_enabled(sock, other)) {
3951     UNIXCB(skb).pid = get_pid(task_tgid(current));
3952     current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
3953     }
3954     }
3955    
3956     +static int maybe_init_creds(struct scm_cookie *scm,
3957     + struct socket *socket,
3958     + const struct sock *other)
3959     +{
3960     + int err;
3961     + struct msghdr msg = { .msg_controllen = 0 };
3962     +
3963     + err = scm_send(socket, &msg, scm, false);
3964     + if (err)
3965     + return err;
3966     +
3967     + if (unix_passcred_enabled(socket, other)) {
3968     + scm->pid = get_pid(task_tgid(current));
3969     + current_uid_gid(&scm->creds.uid, &scm->creds.gid);
3970     + }
3971     + return err;
3972     +}
3973     +
3974     +static bool unix_skb_scm_eq(struct sk_buff *skb,
3975     + struct scm_cookie *scm)
3976     +{
3977     + const struct unix_skb_parms *u = &UNIXCB(skb);
3978     +
3979     + return u->pid == scm->pid &&
3980     + uid_eq(u->uid, scm->creds.uid) &&
3981     + gid_eq(u->gid, scm->creds.gid) &&
3982     + unix_secdata_eq(scm, skb);
3983     +}
3984     +
3985     /*
3986     * Send AF_UNIX data.
3987     */
3988     @@ -1470,6 +1623,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
3989     struct scm_cookie scm;
3990     int max_level;
3991     int data_len = 0;
3992     + int sk_locked;
3993    
3994     wait_for_unix_gc();
3995     err = scm_send(sock, msg, &scm, false);
3996     @@ -1548,12 +1702,14 @@ restart:
3997     goto out_free;
3998     }
3999    
4000     + sk_locked = 0;
4001     unix_state_lock(other);
4002     +restart_locked:
4003     err = -EPERM;
4004     if (!unix_may_send(sk, other))
4005     goto out_unlock;
4006    
4007     - if (sock_flag(other, SOCK_DEAD)) {
4008     + if (unlikely(sock_flag(other, SOCK_DEAD))) {
4009     /*
4010     * Check with 1003.1g - what should
4011     * datagram error
4012     @@ -1561,10 +1717,14 @@ restart:
4013     unix_state_unlock(other);
4014     sock_put(other);
4015    
4016     + if (!sk_locked)
4017     + unix_state_lock(sk);
4018     +
4019     err = 0;
4020     - unix_state_lock(sk);
4021     if (unix_peer(sk) == other) {
4022     unix_peer(sk) = NULL;
4023     + unix_dgram_peer_wake_disconnect_wakeup(sk, other);
4024     +
4025     unix_state_unlock(sk);
4026    
4027     unix_dgram_disconnected(sk, other);
4028     @@ -1590,21 +1750,38 @@ restart:
4029     goto out_unlock;
4030     }
4031    
4032     - if (unix_peer(other) != sk && unix_recvq_full(other)) {
4033     - if (!timeo) {
4034     - err = -EAGAIN;
4035     - goto out_unlock;
4036     + if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
4037     + if (timeo) {
4038     + timeo = unix_wait_for_peer(other, timeo);
4039     +
4040     + err = sock_intr_errno(timeo);
4041     + if (signal_pending(current))
4042     + goto out_free;
4043     +
4044     + goto restart;
4045     }
4046    
4047     - timeo = unix_wait_for_peer(other, timeo);
4048     + if (!sk_locked) {
4049     + unix_state_unlock(other);
4050     + unix_state_double_lock(sk, other);
4051     + }
4052    
4053     - err = sock_intr_errno(timeo);
4054     - if (signal_pending(current))
4055     - goto out_free;
4056     + if (unix_peer(sk) != other ||
4057     + unix_dgram_peer_wake_me(sk, other)) {
4058     + err = -EAGAIN;
4059     + sk_locked = 1;
4060     + goto out_unlock;
4061     + }
4062    
4063     - goto restart;
4064     + if (!sk_locked) {
4065     + sk_locked = 1;
4066     + goto restart_locked;
4067     + }
4068     }
4069    
4070     + if (unlikely(sk_locked))
4071     + unix_state_unlock(sk);
4072     +
4073     if (sock_flag(other, SOCK_RCVTSTAMP))
4074     __net_timestamp(skb);
4075     maybe_add_creds(skb, sock, other);
4076     @@ -1618,6 +1795,8 @@ restart:
4077     return len;
4078    
4079     out_unlock:
4080     + if (sk_locked)
4081     + unix_state_unlock(sk);
4082     unix_state_unlock(other);
4083     out_free:
4084     kfree_skb(skb);
4085     @@ -1739,8 +1918,10 @@ out_err:
4086     static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
4087     int offset, size_t size, int flags)
4088     {
4089     - int err = 0;
4090     - bool send_sigpipe = true;
4091     + int err;
4092     + bool send_sigpipe = false;
4093     + bool init_scm = true;
4094     + struct scm_cookie scm;
4095     struct sock *other, *sk = socket->sk;
4096     struct sk_buff *skb, *newskb = NULL, *tail = NULL;
4097    
4098     @@ -1758,7 +1939,7 @@ alloc_skb:
4099     newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
4100     &err, 0);
4101     if (!newskb)
4102     - return err;
4103     + goto err;
4104     }
4105    
4106     /* we must acquire readlock as we modify already present
4107     @@ -1767,12 +1948,12 @@ alloc_skb:
4108     err = mutex_lock_interruptible(&unix_sk(other)->readlock);
4109     if (err) {
4110     err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
4111     - send_sigpipe = false;
4112     goto err;
4113     }
4114    
4115     if (sk->sk_shutdown & SEND_SHUTDOWN) {
4116     err = -EPIPE;
4117     + send_sigpipe = true;
4118     goto err_unlock;
4119     }
4120    
4121     @@ -1781,23 +1962,34 @@ alloc_skb:
4122     if (sock_flag(other, SOCK_DEAD) ||
4123     other->sk_shutdown & RCV_SHUTDOWN) {
4124     err = -EPIPE;
4125     + send_sigpipe = true;
4126     goto err_state_unlock;
4127     }
4128    
4129     + if (init_scm) {
4130     + err = maybe_init_creds(&scm, socket, other);
4131     + if (err)
4132     + goto err_state_unlock;
4133     + init_scm = false;
4134     + }
4135     +
4136     skb = skb_peek_tail(&other->sk_receive_queue);
4137     if (tail && tail == skb) {
4138     skb = newskb;
4139     - } else if (!skb) {
4140     - if (newskb)
4141     + } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
4142     + if (newskb) {
4143     skb = newskb;
4144     - else
4145     + } else {
4146     + tail = skb;
4147     goto alloc_skb;
4148     + }
4149     } else if (newskb) {
4150     /* this is fast path, we don't necessarily need to
4151     * call to kfree_skb even though with newskb == NULL
4152     * this - does no harm
4153     */
4154     consume_skb(newskb);
4155     + newskb = NULL;
4156     }
4157    
4158     if (skb_append_pagefrags(skb, page, offset, size)) {
4159     @@ -1810,14 +2002,20 @@ alloc_skb:
4160     skb->truesize += size;
4161     atomic_add(size, &sk->sk_wmem_alloc);
4162    
4163     - if (newskb)
4164     + if (newskb) {
4165     + err = unix_scm_to_skb(&scm, skb, false);
4166     + if (err)
4167     + goto err_state_unlock;
4168     + spin_lock(&other->sk_receive_queue.lock);
4169     __skb_queue_tail(&other->sk_receive_queue, newskb);
4170     + spin_unlock(&other->sk_receive_queue.lock);
4171     + }
4172    
4173     unix_state_unlock(other);
4174     mutex_unlock(&unix_sk(other)->readlock);
4175    
4176     other->sk_data_ready(other);
4177     -
4178     + scm_destroy(&scm);
4179     return size;
4180    
4181     err_state_unlock:
4182     @@ -1828,6 +2026,8 @@ err:
4183     kfree_skb(newskb);
4184     if (send_sigpipe && !(flags & MSG_NOSIGNAL))
4185     send_sig(SIGPIPE, current, 0);
4186     + if (!init_scm)
4187     + scm_destroy(&scm);
4188     return err;
4189     }
4190    
4191     @@ -2071,6 +2271,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
4192    
4193     do {
4194     int chunk;
4195     + bool drop_skb;
4196     struct sk_buff *skb, *last;
4197    
4198     unix_state_lock(sk);
4199     @@ -2130,10 +2331,7 @@ unlock:
4200    
4201     if (check_creds) {
4202     /* Never glue messages from different writers */
4203     - if ((UNIXCB(skb).pid != scm.pid) ||
4204     - !uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
4205     - !gid_eq(UNIXCB(skb).gid, scm.creds.gid) ||
4206     - !unix_secdata_eq(&scm, skb))
4207     + if (!unix_skb_scm_eq(skb, &scm))
4208     break;
4209     } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
4210     /* Copy credentials */
4211     @@ -2151,7 +2349,11 @@ unlock:
4212     }
4213    
4214     chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
4215     + skb_get(skb);
4216     chunk = state->recv_actor(skb, skip, chunk, state);
4217     + drop_skb = !unix_skb_len(skb);
4218     + /* skb is only safe to use if !drop_skb */
4219     + consume_skb(skb);
4220     if (chunk < 0) {
4221     if (copied == 0)
4222     copied = -EFAULT;
4223     @@ -2160,6 +2362,18 @@ unlock:
4224     copied += chunk;
4225     size -= chunk;
4226    
4227     + if (drop_skb) {
4228     + /* the skb was touched by a concurrent reader;
4229     + * we should not expect anything from this skb
4230     + * anymore and assume it invalid - we can be
4231     + * sure it was dropped from the socket queue
4232     + *
4233     + * let's report a short read
4234     + */
4235     + err = 0;
4236     + break;
4237     + }
4238     +
4239     /* Mark read part of skb as used */
4240     if (!(flags & MSG_PEEK)) {
4241     UNIXCB(skb).consumed += chunk;
4242     @@ -2453,14 +2667,16 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
4243     return mask;
4244    
4245     writable = unix_writable(sk);
4246     - other = unix_peer_get(sk);
4247     - if (other) {
4248     - if (unix_peer(other) != sk) {
4249     - sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
4250     - if (unix_recvq_full(other))
4251     - writable = 0;
4252     - }
4253     - sock_put(other);
4254     + if (writable) {
4255     + unix_state_lock(sk);
4256     +
4257     + other = unix_peer(sk);
4258     + if (other && unix_peer(other) != sk &&
4259     + unix_recvq_full(other) &&
4260     + unix_dgram_peer_wake_me(sk, other))
4261     + writable = 0;
4262     +
4263     + unix_state_unlock(sk);
4264     }
4265    
4266     if (writable)
4267     diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
4268     index edfc1b8d553e..656ce39bddbc 100644
4269     --- a/sound/pci/Kconfig
4270     +++ b/sound/pci/Kconfig
4271     @@ -25,7 +25,7 @@ config SND_ALS300
4272     select SND_PCM
4273     select SND_AC97_CODEC
4274     select SND_OPL3_LIB
4275     - select ZONE_DMA
4276     + depends on ZONE_DMA
4277     help
4278     Say 'Y' or 'M' to include support for Avance Logic ALS300/ALS300+
4279    
4280     @@ -50,7 +50,7 @@ config SND_ALI5451
4281     tristate "ALi M5451 PCI Audio Controller"
4282     select SND_MPU401_UART
4283     select SND_AC97_CODEC
4284     - select ZONE_DMA
4285     + depends on ZONE_DMA
4286     help
4287     Say Y here to include support for the integrated AC97 sound
4288     device on motherboards using the ALi M5451 Audio Controller
4289     @@ -155,7 +155,7 @@ config SND_AZT3328
4290     select SND_PCM
4291     select SND_RAWMIDI
4292     select SND_AC97_CODEC
4293     - select ZONE_DMA
4294     + depends on ZONE_DMA
4295     help
4296     Say Y here to include support for Aztech AZF3328 (PCI168)
4297     soundcards.
4298     @@ -463,7 +463,7 @@ config SND_EMU10K1
4299     select SND_HWDEP
4300     select SND_RAWMIDI
4301     select SND_AC97_CODEC
4302     - select ZONE_DMA
4303     + depends on ZONE_DMA
4304     help
4305     Say Y to include support for Sound Blaster PCI 512, Live!,
4306     Audigy and E-mu APS (partially supported) soundcards.
4307     @@ -479,7 +479,7 @@ config SND_EMU10K1X
4308     tristate "Emu10k1X (Dell OEM Version)"
4309     select SND_AC97_CODEC
4310     select SND_RAWMIDI
4311     - select ZONE_DMA
4312     + depends on ZONE_DMA
4313     help
4314     Say Y here to include support for the Dell OEM version of the
4315     Sound Blaster Live!.
4316     @@ -513,7 +513,7 @@ config SND_ES1938
4317     select SND_OPL3_LIB
4318     select SND_MPU401_UART
4319     select SND_AC97_CODEC
4320     - select ZONE_DMA
4321     + depends on ZONE_DMA
4322     help
4323     Say Y here to include support for soundcards based on ESS Solo-1
4324     (ES1938, ES1946, ES1969) chips.
4325     @@ -525,7 +525,7 @@ config SND_ES1968
4326     tristate "ESS ES1968/1978 (Maestro-1/2/2E)"
4327     select SND_MPU401_UART
4328     select SND_AC97_CODEC
4329     - select ZONE_DMA
4330     + depends on ZONE_DMA
4331     help
4332     Say Y here to include support for soundcards based on ESS Maestro
4333     1/2/2E chips.
4334     @@ -612,7 +612,7 @@ config SND_ICE1712
4335     select SND_MPU401_UART
4336     select SND_AC97_CODEC
4337     select BITREVERSE
4338     - select ZONE_DMA
4339     + depends on ZONE_DMA
4340     help
4341     Say Y here to include support for soundcards based on the
4342     ICE1712 (Envy24) chip.
4343     @@ -700,7 +700,7 @@ config SND_LX6464ES
4344     config SND_MAESTRO3
4345     tristate "ESS Allegro/Maestro3"
4346     select SND_AC97_CODEC
4347     - select ZONE_DMA
4348     + depends on ZONE_DMA
4349     help
4350     Say Y here to include support for soundcards based on ESS Maestro 3
4351     (Allegro) chips.
4352     @@ -806,7 +806,7 @@ config SND_SIS7019
4353     tristate "SiS 7019 Audio Accelerator"
4354     depends on X86_32
4355     select SND_AC97_CODEC
4356     - select ZONE_DMA
4357     + depends on ZONE_DMA
4358     help
4359     Say Y here to include support for the SiS 7019 Audio Accelerator.
4360    
4361     @@ -818,7 +818,7 @@ config SND_SONICVIBES
4362     select SND_OPL3_LIB
4363     select SND_MPU401_UART
4364     select SND_AC97_CODEC
4365     - select ZONE_DMA
4366     + depends on ZONE_DMA
4367     help
4368     Say Y here to include support for soundcards based on the S3
4369     SonicVibes chip.
4370     @@ -830,7 +830,7 @@ config SND_TRIDENT
4371     tristate "Trident 4D-Wave DX/NX; SiS 7018"
4372     select SND_MPU401_UART
4373     select SND_AC97_CODEC
4374     - select ZONE_DMA
4375     + depends on ZONE_DMA
4376     help
4377     Say Y here to include support for soundcards based on Trident
4378     4D-Wave DX/NX or SiS 7018 chips.
4379     diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
4380     index acbfbe087ee8..f22f5c409447 100644
4381     --- a/sound/pci/hda/patch_hdmi.c
4382     +++ b/sound/pci/hda/patch_hdmi.c
4383     @@ -50,8 +50,9 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
4384     #define is_haswell(codec) ((codec)->core.vendor_id == 0x80862807)
4385     #define is_broadwell(codec) ((codec)->core.vendor_id == 0x80862808)
4386     #define is_skylake(codec) ((codec)->core.vendor_id == 0x80862809)
4387     +#define is_broxton(codec) ((codec)->core.vendor_id == 0x8086280a)
4388     #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
4389     - || is_skylake(codec))
4390     + || is_skylake(codec) || is_broxton(codec))
4391    
4392     #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
4393     #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
4394     diff --git a/tools/net/Makefile b/tools/net/Makefile
4395     index ee577ea03ba5..ddf888010652 100644
4396     --- a/tools/net/Makefile
4397     +++ b/tools/net/Makefile
4398     @@ -4,6 +4,9 @@ CC = gcc
4399     LEX = flex
4400     YACC = bison
4401    
4402     +CFLAGS += -Wall -O2
4403     +CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
4404     +
4405     %.yacc.c: %.y
4406     $(YACC) -o $@ -d $<
4407    
4408     @@ -12,15 +15,13 @@ YACC = bison
4409    
4410     all : bpf_jit_disasm bpf_dbg bpf_asm
4411    
4412     -bpf_jit_disasm : CFLAGS = -Wall -O2 -DPACKAGE='bpf_jit_disasm'
4413     +bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm'
4414     bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl
4415     bpf_jit_disasm : bpf_jit_disasm.o
4416    
4417     -bpf_dbg : CFLAGS = -Wall -O2
4418     bpf_dbg : LDLIBS = -lreadline
4419     bpf_dbg : bpf_dbg.o
4420    
4421     -bpf_asm : CFLAGS = -Wall -O2 -I.
4422     bpf_asm : LDLIBS =
4423     bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o
4424     bpf_exp.lex.o : bpf_exp.yacc.c