Annotation of /trunk/kernel-alx/patches-4.9/0271-4.9.172-all-fixes.patch
Parent Directory | Revision Log
Revision 3348 -
(hide annotations)
(download)
Tue Jun 18 09:42:01 2019 UTC (5 years, 3 months ago) by niro
File size: 92276 byte(s)
Tue Jun 18 09:42:01 2019 UTC (5 years, 3 months ago) by niro
File size: 92276 byte(s)
-linux-4.9.172
1 | niro | 3348 | diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt |
2 | index c708a50b060e..a1472b48ee22 100644 | ||
3 | --- a/Documentation/kernel-parameters.txt | ||
4 | +++ b/Documentation/kernel-parameters.txt | ||
5 | @@ -2758,6 +2758,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | ||
6 | |||
7 | nohugeiomap [KNL,x86] Disable kernel huge I/O mappings. | ||
8 | |||
9 | + nospectre_v1 [PPC] Disable mitigations for Spectre Variant 1 (bounds | ||
10 | + check bypass). With this option data leaks are possible | ||
11 | + in the system. | ||
12 | + | ||
13 | nosmt [KNL,S390] Disable symmetric multithreading (SMT). | ||
14 | Equivalent to smt=1. | ||
15 | |||
16 | @@ -2765,7 +2769,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | ||
17 | nosmt=force: Force disable SMT, cannot be undone | ||
18 | via the sysfs control file. | ||
19 | |||
20 | - nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2 | ||
21 | + nospectre_v2 [X86,PPC_FSL_BOOK3E] Disable all mitigations for the Spectre variant 2 | ||
22 | (indirect branch prediction) vulnerability. System may | ||
23 | allow data leaks with this option, which is equivalent | ||
24 | to spectre_v2=off. | ||
25 | diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt | ||
26 | index dbdc4130e149..0335285f3918 100644 | ||
27 | --- a/Documentation/networking/ip-sysctl.txt | ||
28 | +++ b/Documentation/networking/ip-sysctl.txt | ||
29 | @@ -405,6 +405,7 @@ tcp_min_rtt_wlen - INTEGER | ||
30 | minimum RTT when it is moved to a longer path (e.g., due to traffic | ||
31 | engineering). A longer window makes the filter more resistant to RTT | ||
32 | inflations such as transient congestion. The unit is seconds. | ||
33 | + Possible values: 0 - 86400 (1 day) | ||
34 | Default: 300 | ||
35 | |||
36 | tcp_moderate_rcvbuf - BOOLEAN | ||
37 | diff --git a/Makefile b/Makefile | ||
38 | index dbdef749e1c8..75cba5fbdb46 100644 | ||
39 | --- a/Makefile | ||
40 | +++ b/Makefile | ||
41 | @@ -1,6 +1,6 @@ | ||
42 | VERSION = 4 | ||
43 | PATCHLEVEL = 9 | ||
44 | -SUBLEVEL = 171 | ||
45 | +SUBLEVEL = 172 | ||
46 | EXTRAVERSION = | ||
47 | NAME = Roaring Lionus | ||
48 | |||
49 | diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S | ||
50 | index 2d7f2bb0d66a..a67ed746b0e3 100644 | ||
51 | --- a/arch/arm/boot/compressed/head.S | ||
52 | +++ b/arch/arm/boot/compressed/head.S | ||
53 | @@ -1383,7 +1383,21 @@ ENTRY(efi_stub_entry) | ||
54 | |||
55 | @ Preserve return value of efi_entry() in r4 | ||
56 | mov r4, r0 | ||
57 | - bl cache_clean_flush | ||
58 | + | ||
59 | + @ our cache maintenance code relies on CP15 barrier instructions | ||
60 | + @ but since we arrived here with the MMU and caches configured | ||
61 | + @ by UEFI, we must check that the CP15BEN bit is set in SCTLR. | ||
62 | + @ Note that this bit is RAO/WI on v6 and earlier, so the ISB in | ||
63 | + @ the enable path will be executed on v7+ only. | ||
64 | + mrc p15, 0, r1, c1, c0, 0 @ read SCTLR | ||
65 | + tst r1, #(1 << 5) @ CP15BEN bit set? | ||
66 | + bne 0f | ||
67 | + orr r1, r1, #(1 << 5) @ CP15 barrier instructions | ||
68 | + mcr p15, 0, r1, c1, c0, 0 @ write SCTLR | ||
69 | + ARM( .inst 0xf57ff06f @ v7+ isb ) | ||
70 | + THUMB( isb ) | ||
71 | + | ||
72 | +0: bl cache_clean_flush | ||
73 | bl cache_off | ||
74 | |||
75 | @ Set parameters for booting zImage according to boot protocol | ||
76 | diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S | ||
77 | index 7913a5cf6806..b9c788790c0f 100644 | ||
78 | --- a/arch/mips/kernel/scall64-o32.S | ||
79 | +++ b/arch/mips/kernel/scall64-o32.S | ||
80 | @@ -125,7 +125,7 @@ trace_a_syscall: | ||
81 | subu t1, v0, __NR_O32_Linux | ||
82 | move a1, v0 | ||
83 | bnez t1, 1f /* __NR_syscall at offset 0 */ | ||
84 | - lw a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ | ||
85 | + ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ | ||
86 | .set pop | ||
87 | |||
88 | 1: jal syscall_trace_enter | ||
89 | diff --git a/drivers/block/loop.c b/drivers/block/loop.c | ||
90 | index 28ce17405aab..9f840d9fdfcb 100644 | ||
91 | --- a/drivers/block/loop.c | ||
92 | +++ b/drivers/block/loop.c | ||
93 | @@ -82,7 +82,6 @@ | ||
94 | |||
95 | static DEFINE_IDR(loop_index_idr); | ||
96 | static DEFINE_MUTEX(loop_index_mutex); | ||
97 | -static DEFINE_MUTEX(loop_ctl_mutex); | ||
98 | |||
99 | static int max_part; | ||
100 | static int part_shift; | ||
101 | @@ -1034,7 +1033,7 @@ static int loop_clr_fd(struct loop_device *lo) | ||
102 | */ | ||
103 | if (atomic_read(&lo->lo_refcnt) > 1) { | ||
104 | lo->lo_flags |= LO_FLAGS_AUTOCLEAR; | ||
105 | - mutex_unlock(&loop_ctl_mutex); | ||
106 | + mutex_unlock(&lo->lo_ctl_mutex); | ||
107 | return 0; | ||
108 | } | ||
109 | |||
110 | @@ -1083,12 +1082,12 @@ static int loop_clr_fd(struct loop_device *lo) | ||
111 | if (!part_shift) | ||
112 | lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; | ||
113 | loop_unprepare_queue(lo); | ||
114 | - mutex_unlock(&loop_ctl_mutex); | ||
115 | + mutex_unlock(&lo->lo_ctl_mutex); | ||
116 | /* | ||
117 | - * Need not hold loop_ctl_mutex to fput backing file. | ||
118 | - * Calling fput holding loop_ctl_mutex triggers a circular | ||
119 | + * Need not hold lo_ctl_mutex to fput backing file. | ||
120 | + * Calling fput holding lo_ctl_mutex triggers a circular | ||
121 | * lock dependency possibility warning as fput can take | ||
122 | - * bd_mutex which is usually taken before loop_ctl_mutex. | ||
123 | + * bd_mutex which is usually taken before lo_ctl_mutex. | ||
124 | */ | ||
125 | fput(filp); | ||
126 | return 0; | ||
127 | @@ -1351,7 +1350,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, | ||
128 | struct loop_device *lo = bdev->bd_disk->private_data; | ||
129 | int err; | ||
130 | |||
131 | - mutex_lock_nested(&loop_ctl_mutex, 1); | ||
132 | + mutex_lock_nested(&lo->lo_ctl_mutex, 1); | ||
133 | switch (cmd) { | ||
134 | case LOOP_SET_FD: | ||
135 | err = loop_set_fd(lo, mode, bdev, arg); | ||
136 | @@ -1360,7 +1359,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, | ||
137 | err = loop_change_fd(lo, bdev, arg); | ||
138 | break; | ||
139 | case LOOP_CLR_FD: | ||
140 | - /* loop_clr_fd would have unlocked loop_ctl_mutex on success */ | ||
141 | + /* loop_clr_fd would have unlocked lo_ctl_mutex on success */ | ||
142 | err = loop_clr_fd(lo); | ||
143 | if (!err) | ||
144 | goto out_unlocked; | ||
145 | @@ -1396,7 +1395,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, | ||
146 | default: | ||
147 | err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; | ||
148 | } | ||
149 | - mutex_unlock(&loop_ctl_mutex); | ||
150 | + mutex_unlock(&lo->lo_ctl_mutex); | ||
151 | |||
152 | out_unlocked: | ||
153 | return err; | ||
154 | @@ -1529,16 +1528,16 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, | ||
155 | |||
156 | switch(cmd) { | ||
157 | case LOOP_SET_STATUS: | ||
158 | - mutex_lock(&loop_ctl_mutex); | ||
159 | + mutex_lock(&lo->lo_ctl_mutex); | ||
160 | err = loop_set_status_compat( | ||
161 | lo, (const struct compat_loop_info __user *) arg); | ||
162 | - mutex_unlock(&loop_ctl_mutex); | ||
163 | + mutex_unlock(&lo->lo_ctl_mutex); | ||
164 | break; | ||
165 | case LOOP_GET_STATUS: | ||
166 | - mutex_lock(&loop_ctl_mutex); | ||
167 | + mutex_lock(&lo->lo_ctl_mutex); | ||
168 | err = loop_get_status_compat( | ||
169 | lo, (struct compat_loop_info __user *) arg); | ||
170 | - mutex_unlock(&loop_ctl_mutex); | ||
171 | + mutex_unlock(&lo->lo_ctl_mutex); | ||
172 | break; | ||
173 | case LOOP_SET_CAPACITY: | ||
174 | case LOOP_CLR_FD: | ||
175 | @@ -1582,7 +1581,7 @@ static void __lo_release(struct loop_device *lo) | ||
176 | if (atomic_dec_return(&lo->lo_refcnt)) | ||
177 | return; | ||
178 | |||
179 | - mutex_lock(&loop_ctl_mutex); | ||
180 | + mutex_lock(&lo->lo_ctl_mutex); | ||
181 | if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { | ||
182 | /* | ||
183 | * In autoclear mode, stop the loop thread | ||
184 | @@ -1599,7 +1598,7 @@ static void __lo_release(struct loop_device *lo) | ||
185 | loop_flush(lo); | ||
186 | } | ||
187 | |||
188 | - mutex_unlock(&loop_ctl_mutex); | ||
189 | + mutex_unlock(&lo->lo_ctl_mutex); | ||
190 | } | ||
191 | |||
192 | static void lo_release(struct gendisk *disk, fmode_t mode) | ||
193 | @@ -1645,10 +1644,10 @@ static int unregister_transfer_cb(int id, void *ptr, void *data) | ||
194 | struct loop_device *lo = ptr; | ||
195 | struct loop_func_table *xfer = data; | ||
196 | |||
197 | - mutex_lock(&loop_ctl_mutex); | ||
198 | + mutex_lock(&lo->lo_ctl_mutex); | ||
199 | if (lo->lo_encryption == xfer) | ||
200 | loop_release_xfer(lo); | ||
201 | - mutex_unlock(&loop_ctl_mutex); | ||
202 | + mutex_unlock(&lo->lo_ctl_mutex); | ||
203 | return 0; | ||
204 | } | ||
205 | |||
206 | @@ -1814,6 +1813,7 @@ static int loop_add(struct loop_device **l, int i) | ||
207 | if (!part_shift) | ||
208 | disk->flags |= GENHD_FL_NO_PART_SCAN; | ||
209 | disk->flags |= GENHD_FL_EXT_DEVT; | ||
210 | + mutex_init(&lo->lo_ctl_mutex); | ||
211 | atomic_set(&lo->lo_refcnt, 0); | ||
212 | lo->lo_number = i; | ||
213 | spin_lock_init(&lo->lo_lock); | ||
214 | @@ -1926,19 +1926,19 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, | ||
215 | ret = loop_lookup(&lo, parm); | ||
216 | if (ret < 0) | ||
217 | break; | ||
218 | - mutex_lock(&loop_ctl_mutex); | ||
219 | + mutex_lock(&lo->lo_ctl_mutex); | ||
220 | if (lo->lo_state != Lo_unbound) { | ||
221 | ret = -EBUSY; | ||
222 | - mutex_unlock(&loop_ctl_mutex); | ||
223 | + mutex_unlock(&lo->lo_ctl_mutex); | ||
224 | break; | ||
225 | } | ||
226 | if (atomic_read(&lo->lo_refcnt) > 0) { | ||
227 | ret = -EBUSY; | ||
228 | - mutex_unlock(&loop_ctl_mutex); | ||
229 | + mutex_unlock(&lo->lo_ctl_mutex); | ||
230 | break; | ||
231 | } | ||
232 | lo->lo_disk->private_data = NULL; | ||
233 | - mutex_unlock(&loop_ctl_mutex); | ||
234 | + mutex_unlock(&lo->lo_ctl_mutex); | ||
235 | idr_remove(&loop_index_idr, lo->lo_number); | ||
236 | loop_remove(lo); | ||
237 | break; | ||
238 | diff --git a/drivers/block/loop.h b/drivers/block/loop.h | ||
239 | index a923e74495ce..60f0fd2c0c65 100644 | ||
240 | --- a/drivers/block/loop.h | ||
241 | +++ b/drivers/block/loop.h | ||
242 | @@ -55,6 +55,7 @@ struct loop_device { | ||
243 | |||
244 | spinlock_t lo_lock; | ||
245 | int lo_state; | ||
246 | + struct mutex lo_ctl_mutex; | ||
247 | struct kthread_worker worker; | ||
248 | struct task_struct *worker_task; | ||
249 | bool use_dio; | ||
250 | diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c | ||
251 | index d032032337e7..f37a6ef4f544 100644 | ||
252 | --- a/drivers/dma/sh/rcar-dmac.c | ||
253 | +++ b/drivers/dma/sh/rcar-dmac.c | ||
254 | @@ -1311,6 +1311,7 @@ static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan, | ||
255 | enum dma_status status; | ||
256 | unsigned long flags; | ||
257 | unsigned int residue; | ||
258 | + bool cyclic; | ||
259 | |||
260 | status = dma_cookie_status(chan, cookie, txstate); | ||
261 | if (status == DMA_COMPLETE || !txstate) | ||
262 | @@ -1318,10 +1319,11 @@ static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan, | ||
263 | |||
264 | spin_lock_irqsave(&rchan->lock, flags); | ||
265 | residue = rcar_dmac_chan_get_residue(rchan, cookie); | ||
266 | + cyclic = rchan->desc.running ? rchan->desc.running->cyclic : false; | ||
267 | spin_unlock_irqrestore(&rchan->lock, flags); | ||
268 | |||
269 | /* if there's no residue, the cookie is complete */ | ||
270 | - if (!residue) | ||
271 | + if (!residue && !cyclic) | ||
272 | return DMA_COMPLETE; | ||
273 | |||
274 | dma_set_residue(txstate, residue); | ||
275 | diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c | ||
276 | index c7e6c9839c9a..51d34e7275ab 100644 | ||
277 | --- a/drivers/gpu/drm/vc4/vc4_crtc.c | ||
278 | +++ b/drivers/gpu/drm/vc4/vc4_crtc.c | ||
279 | @@ -846,7 +846,7 @@ static void | ||
280 | vc4_crtc_reset(struct drm_crtc *crtc) | ||
281 | { | ||
282 | if (crtc->state) | ||
283 | - __drm_atomic_helper_crtc_destroy_state(crtc->state); | ||
284 | + vc4_crtc_destroy_state(crtc, crtc->state); | ||
285 | |||
286 | crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL); | ||
287 | if (crtc->state) | ||
288 | diff --git a/drivers/hwtracing/intel_th/gth.c b/drivers/hwtracing/intel_th/gth.c | ||
289 | index b0502e2782c1..98a4cb5d4993 100644 | ||
290 | --- a/drivers/hwtracing/intel_th/gth.c | ||
291 | +++ b/drivers/hwtracing/intel_th/gth.c | ||
292 | @@ -605,7 +605,7 @@ static void intel_th_gth_unassign(struct intel_th_device *thdev, | ||
293 | othdev->output.port = -1; | ||
294 | othdev->output.active = false; | ||
295 | gth->output[port].output = NULL; | ||
296 | - for (master = 0; master < TH_CONFIGURABLE_MASTERS; master++) | ||
297 | + for (master = 0; master <= TH_CONFIGURABLE_MASTERS; master++) | ||
298 | if (gth->master[master] == port) | ||
299 | gth->master[master] = -1; | ||
300 | spin_unlock(>h->gth_lock); | ||
301 | diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c | ||
302 | index 46b64970058e..49d55a0322f6 100644 | ||
303 | --- a/drivers/infiniband/sw/rdmavt/mr.c | ||
304 | +++ b/drivers/infiniband/sw/rdmavt/mr.c | ||
305 | @@ -497,11 +497,6 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) | ||
306 | if (unlikely(mapped_segs == mr->mr.max_segs)) | ||
307 | return -ENOMEM; | ||
308 | |||
309 | - if (mr->mr.length == 0) { | ||
310 | - mr->mr.user_base = addr; | ||
311 | - mr->mr.iova = addr; | ||
312 | - } | ||
313 | - | ||
314 | m = mapped_segs / RVT_SEGSZ; | ||
315 | n = mapped_segs % RVT_SEGSZ; | ||
316 | mr->mr.map[m]->segs[n].vaddr = (void *)addr; | ||
317 | @@ -518,17 +513,24 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) | ||
318 | * @sg_nents: number of entries in sg | ||
319 | * @sg_offset: offset in bytes into sg | ||
320 | * | ||
321 | + * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages. | ||
322 | + * | ||
323 | * Return: number of sg elements mapped to the memory region | ||
324 | */ | ||
325 | int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, | ||
326 | int sg_nents, unsigned int *sg_offset) | ||
327 | { | ||
328 | struct rvt_mr *mr = to_imr(ibmr); | ||
329 | + int ret; | ||
330 | |||
331 | mr->mr.length = 0; | ||
332 | mr->mr.page_shift = PAGE_SHIFT; | ||
333 | - return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, | ||
334 | - rvt_set_page); | ||
335 | + ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page); | ||
336 | + mr->mr.user_base = ibmr->iova; | ||
337 | + mr->mr.iova = ibmr->iova; | ||
338 | + mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr; | ||
339 | + mr->mr.length = (size_t)ibmr->length; | ||
340 | + return ret; | ||
341 | } | ||
342 | |||
343 | /** | ||
344 | @@ -559,6 +561,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, | ||
345 | ibmr->rkey = key; | ||
346 | mr->mr.lkey = key; | ||
347 | mr->mr.access_flags = access; | ||
348 | + mr->mr.iova = ibmr->iova; | ||
349 | atomic_set(&mr->mr.lkey_invalid, 0); | ||
350 | |||
351 | return 0; | ||
352 | diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c | ||
353 | index f798f427a46f..275f957604f7 100644 | ||
354 | --- a/drivers/input/rmi4/rmi_f11.c | ||
355 | +++ b/drivers/input/rmi4/rmi_f11.c | ||
356 | @@ -1198,7 +1198,7 @@ static int rmi_f11_initialize(struct rmi_function *fn) | ||
357 | ctrl->ctrl0_11[11] = ctrl->ctrl0_11[11] & ~BIT(0); | ||
358 | |||
359 | rc = f11_write_control_regs(fn, &f11->sens_query, | ||
360 | - &f11->dev_controls, fn->fd.query_base_addr); | ||
361 | + &f11->dev_controls, fn->fd.control_base_addr); | ||
362 | if (rc) | ||
363 | dev_warn(&fn->dev, "Failed to write control registers\n"); | ||
364 | |||
365 | diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c | ||
366 | index 2aae6f88dca0..a52663745051 100644 | ||
367 | --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c | ||
368 | +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c | ||
369 | @@ -58,6 +58,8 @@ static int __init fm10k_init_module(void) | ||
370 | /* create driver workqueue */ | ||
371 | fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, | ||
372 | fm10k_driver_name); | ||
373 | + if (!fm10k_workqueue) | ||
374 | + return -ENOMEM; | ||
375 | |||
376 | fm10k_dbg_init(); | ||
377 | |||
378 | diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | ||
379 | index d5e8ac86c195..54872f8f2f7d 100644 | ||
380 | --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | ||
381 | +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | ||
382 | @@ -1365,7 +1365,7 @@ static int mlx5e_get_module_info(struct net_device *netdev, | ||
383 | break; | ||
384 | case MLX5_MODULE_ID_SFP: | ||
385 | modinfo->type = ETH_MODULE_SFF_8472; | ||
386 | - modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; | ||
387 | + modinfo->eeprom_len = MLX5_EEPROM_PAGE_LENGTH; | ||
388 | break; | ||
389 | default: | ||
390 | netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n", | ||
391 | diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c | ||
392 | index 43d7c8378fb4..0bad09d06206 100644 | ||
393 | --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c | ||
394 | +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c | ||
395 | @@ -368,10 +368,6 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, | ||
396 | size -= offset + size - MLX5_EEPROM_PAGE_LENGTH; | ||
397 | |||
398 | i2c_addr = MLX5_I2C_ADDR_LOW; | ||
399 | - if (offset >= MLX5_EEPROM_PAGE_LENGTH) { | ||
400 | - i2c_addr = MLX5_I2C_ADDR_HIGH; | ||
401 | - offset -= MLX5_EEPROM_PAGE_LENGTH; | ||
402 | - } | ||
403 | |||
404 | MLX5_SET(mcia_reg, in, l, 0); | ||
405 | MLX5_SET(mcia_reg, in, module, module_num); | ||
406 | diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c | ||
407 | index cc847e0cac2d..e3ed70a24029 100644 | ||
408 | --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c | ||
409 | +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c | ||
410 | @@ -2059,11 +2059,11 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev, | ||
411 | if (err) | ||
412 | return err; | ||
413 | |||
414 | + mlxsw_sp_port->link.autoneg = autoneg; | ||
415 | + | ||
416 | if (!netif_running(dev)) | ||
417 | return 0; | ||
418 | |||
419 | - mlxsw_sp_port->link.autoneg = autoneg; | ||
420 | - | ||
421 | mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); | ||
422 | mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true); | ||
423 | |||
424 | diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | ||
425 | index b46b56ad7517..2c04a0739fd6 100644 | ||
426 | --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | ||
427 | +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | ||
428 | @@ -1796,8 +1796,6 @@ static int stmmac_open(struct net_device *dev) | ||
429 | struct stmmac_priv *priv = netdev_priv(dev); | ||
430 | int ret; | ||
431 | |||
432 | - stmmac_check_ether_addr(priv); | ||
433 | - | ||
434 | if (priv->hw->pcs != STMMAC_PCS_RGMII && | ||
435 | priv->hw->pcs != STMMAC_PCS_TBI && | ||
436 | priv->hw->pcs != STMMAC_PCS_RTBI) { | ||
437 | @@ -3355,6 +3353,8 @@ int stmmac_dvr_probe(struct device *device, | ||
438 | if (ret) | ||
439 | goto error_hw_init; | ||
440 | |||
441 | + stmmac_check_ether_addr(priv); | ||
442 | + | ||
443 | ndev->netdev_ops = &stmmac_netdev_ops; | ||
444 | |||
445 | ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | | ||
446 | diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c | ||
447 | index cfd81eb1b532..ddceed3c5a4a 100644 | ||
448 | --- a/drivers/net/slip/slhc.c | ||
449 | +++ b/drivers/net/slip/slhc.c | ||
450 | @@ -153,7 +153,7 @@ out_fail: | ||
451 | void | ||
452 | slhc_free(struct slcompress *comp) | ||
453 | { | ||
454 | - if ( comp == NULLSLCOMPR ) | ||
455 | + if ( IS_ERR_OR_NULL(comp) ) | ||
456 | return; | ||
457 | |||
458 | if ( comp->tstate != NULLSLSTATE ) | ||
459 | diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c | ||
460 | index b8874faaa813..3eb6d48c3148 100644 | ||
461 | --- a/drivers/net/team/team.c | ||
462 | +++ b/drivers/net/team/team.c | ||
463 | @@ -1163,6 +1163,12 @@ static int team_port_add(struct team *team, struct net_device *port_dev) | ||
464 | return -EINVAL; | ||
465 | } | ||
466 | |||
467 | + if (netdev_has_upper_dev(dev, port_dev)) { | ||
468 | + netdev_err(dev, "Device %s is already an upper device of the team interface\n", | ||
469 | + portname); | ||
470 | + return -EBUSY; | ||
471 | + } | ||
472 | + | ||
473 | if (port_dev->features & NETIF_F_VLAN_CHALLENGED && | ||
474 | vlan_uses_dev(dev)) { | ||
475 | netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n", | ||
476 | diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c | ||
477 | index e9d6cf146fcc..c17b254e4f64 100644 | ||
478 | --- a/drivers/usb/core/driver.c | ||
479 | +++ b/drivers/usb/core/driver.c | ||
480 | @@ -1888,14 +1888,11 @@ int usb_runtime_idle(struct device *dev) | ||
481 | return -EBUSY; | ||
482 | } | ||
483 | |||
484 | -int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable) | ||
485 | +static int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable) | ||
486 | { | ||
487 | struct usb_hcd *hcd = bus_to_hcd(udev->bus); | ||
488 | int ret = -EPERM; | ||
489 | |||
490 | - if (enable && !udev->usb2_hw_lpm_allowed) | ||
491 | - return 0; | ||
492 | - | ||
493 | if (hcd->driver->set_usb2_hw_lpm) { | ||
494 | ret = hcd->driver->set_usb2_hw_lpm(hcd, udev, enable); | ||
495 | if (!ret) | ||
496 | @@ -1905,6 +1902,24 @@ int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable) | ||
497 | return ret; | ||
498 | } | ||
499 | |||
500 | +int usb_enable_usb2_hardware_lpm(struct usb_device *udev) | ||
501 | +{ | ||
502 | + if (!udev->usb2_hw_lpm_capable || | ||
503 | + !udev->usb2_hw_lpm_allowed || | ||
504 | + udev->usb2_hw_lpm_enabled) | ||
505 | + return 0; | ||
506 | + | ||
507 | + return usb_set_usb2_hardware_lpm(udev, 1); | ||
508 | +} | ||
509 | + | ||
510 | +int usb_disable_usb2_hardware_lpm(struct usb_device *udev) | ||
511 | +{ | ||
512 | + if (!udev->usb2_hw_lpm_enabled) | ||
513 | + return 0; | ||
514 | + | ||
515 | + return usb_set_usb2_hardware_lpm(udev, 0); | ||
516 | +} | ||
517 | + | ||
518 | #endif /* CONFIG_PM */ | ||
519 | |||
520 | struct bus_type usb_bus_type = { | ||
521 | diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c | ||
522 | index 7b6919086539..8fddb94f1874 100644 | ||
523 | --- a/drivers/usb/core/hub.c | ||
524 | +++ b/drivers/usb/core/hub.c | ||
525 | @@ -3168,8 +3168,7 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) | ||
526 | } | ||
527 | |||
528 | /* disable USB2 hardware LPM */ | ||
529 | - if (udev->usb2_hw_lpm_enabled == 1) | ||
530 | - usb_set_usb2_hardware_lpm(udev, 0); | ||
531 | + usb_disable_usb2_hardware_lpm(udev); | ||
532 | |||
533 | if (usb_disable_ltm(udev)) { | ||
534 | dev_err(&udev->dev, "Failed to disable LTM before suspend\n."); | ||
535 | @@ -3215,8 +3214,7 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) | ||
536 | usb_enable_ltm(udev); | ||
537 | err_ltm: | ||
538 | /* Try to enable USB2 hardware LPM again */ | ||
539 | - if (udev->usb2_hw_lpm_capable == 1) | ||
540 | - usb_set_usb2_hardware_lpm(udev, 1); | ||
541 | + usb_enable_usb2_hardware_lpm(udev); | ||
542 | |||
543 | if (udev->do_remote_wakeup) | ||
544 | (void) usb_disable_remote_wakeup(udev); | ||
545 | @@ -3499,8 +3497,7 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg) | ||
546 | hub_port_logical_disconnect(hub, port1); | ||
547 | } else { | ||
548 | /* Try to enable USB2 hardware LPM */ | ||
549 | - if (udev->usb2_hw_lpm_capable == 1) | ||
550 | - usb_set_usb2_hardware_lpm(udev, 1); | ||
551 | + usb_enable_usb2_hardware_lpm(udev); | ||
552 | |||
553 | /* Try to enable USB3 LTM and LPM */ | ||
554 | usb_enable_ltm(udev); | ||
555 | @@ -4337,7 +4334,7 @@ static void hub_set_initial_usb2_lpm_policy(struct usb_device *udev) | ||
556 | if ((udev->bos->ext_cap->bmAttributes & cpu_to_le32(USB_BESL_SUPPORT)) || | ||
557 | connect_type == USB_PORT_CONNECT_TYPE_HARD_WIRED) { | ||
558 | udev->usb2_hw_lpm_allowed = 1; | ||
559 | - usb_set_usb2_hardware_lpm(udev, 1); | ||
560 | + usb_enable_usb2_hardware_lpm(udev); | ||
561 | } | ||
562 | } | ||
563 | |||
564 | @@ -5481,8 +5478,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev) | ||
565 | /* Disable USB2 hardware LPM. | ||
566 | * It will be re-enabled by the enumeration process. | ||
567 | */ | ||
568 | - if (udev->usb2_hw_lpm_enabled == 1) | ||
569 | - usb_set_usb2_hardware_lpm(udev, 0); | ||
570 | + usb_disable_usb2_hardware_lpm(udev); | ||
571 | |||
572 | /* Disable LPM and LTM while we reset the device and reinstall the alt | ||
573 | * settings. Device-initiated LPM settings, and system exit latency | ||
574 | @@ -5592,7 +5588,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev) | ||
575 | |||
576 | done: | ||
577 | /* Now that the alt settings are re-installed, enable LTM and LPM. */ | ||
578 | - usb_set_usb2_hardware_lpm(udev, 1); | ||
579 | + usb_enable_usb2_hardware_lpm(udev); | ||
580 | usb_unlocked_enable_lpm(udev); | ||
581 | usb_enable_ltm(udev); | ||
582 | usb_release_bos_descriptor(udev); | ||
583 | diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c | ||
584 | index c0c5d5b3ec40..0e6ab0a17c08 100644 | ||
585 | --- a/drivers/usb/core/message.c | ||
586 | +++ b/drivers/usb/core/message.c | ||
587 | @@ -1181,8 +1181,7 @@ void usb_disable_device(struct usb_device *dev, int skip_ep0) | ||
588 | dev->actconfig->interface[i] = NULL; | ||
589 | } | ||
590 | |||
591 | - if (dev->usb2_hw_lpm_enabled == 1) | ||
592 | - usb_set_usb2_hardware_lpm(dev, 0); | ||
593 | + usb_disable_usb2_hardware_lpm(dev); | ||
594 | usb_unlocked_disable_lpm(dev); | ||
595 | usb_disable_ltm(dev); | ||
596 | |||
597 | diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c | ||
598 | index c953a0f1c695..1a232b4ffe71 100644 | ||
599 | --- a/drivers/usb/core/sysfs.c | ||
600 | +++ b/drivers/usb/core/sysfs.c | ||
601 | @@ -494,7 +494,10 @@ static ssize_t usb2_hardware_lpm_store(struct device *dev, | ||
602 | |||
603 | if (!ret) { | ||
604 | udev->usb2_hw_lpm_allowed = value; | ||
605 | - ret = usb_set_usb2_hardware_lpm(udev, value); | ||
606 | + if (value) | ||
607 | + ret = usb_enable_usb2_hardware_lpm(udev); | ||
608 | + else | ||
609 | + ret = usb_disable_usb2_hardware_lpm(udev); | ||
610 | } | ||
611 | |||
612 | usb_unlock_device(udev); | ||
613 | diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h | ||
614 | index 53318126ed91..6b2f11544283 100644 | ||
615 | --- a/drivers/usb/core/usb.h | ||
616 | +++ b/drivers/usb/core/usb.h | ||
617 | @@ -84,7 +84,8 @@ extern int usb_remote_wakeup(struct usb_device *dev); | ||
618 | extern int usb_runtime_suspend(struct device *dev); | ||
619 | extern int usb_runtime_resume(struct device *dev); | ||
620 | extern int usb_runtime_idle(struct device *dev); | ||
621 | -extern int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable); | ||
622 | +extern int usb_enable_usb2_hardware_lpm(struct usb_device *udev); | ||
623 | +extern int usb_disable_usb2_hardware_lpm(struct usb_device *udev); | ||
624 | |||
625 | #else | ||
626 | |||
627 | @@ -104,7 +105,12 @@ static inline int usb_autoresume_device(struct usb_device *udev) | ||
628 | return 0; | ||
629 | } | ||
630 | |||
631 | -static inline int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable) | ||
632 | +static inline int usb_enable_usb2_hardware_lpm(struct usb_device *udev) | ||
633 | +{ | ||
634 | + return 0; | ||
635 | +} | ||
636 | + | ||
637 | +static inline int usb_disable_usb2_hardware_lpm(struct usb_device *udev) | ||
638 | { | ||
639 | return 0; | ||
640 | } | ||
641 | diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c | ||
642 | index cec25691cbae..2ffc7fe8da52 100644 | ||
643 | --- a/fs/ceph/dir.c | ||
644 | +++ b/fs/ceph/dir.c | ||
645 | @@ -1471,6 +1471,7 @@ void ceph_dentry_lru_del(struct dentry *dn) | ||
646 | unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) | ||
647 | { | ||
648 | struct ceph_inode_info *dci = ceph_inode(dir); | ||
649 | + unsigned hash; | ||
650 | |||
651 | switch (dci->i_dir_layout.dl_dir_hash) { | ||
652 | case 0: /* for backward compat */ | ||
653 | @@ -1478,8 +1479,11 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) | ||
654 | return dn->d_name.hash; | ||
655 | |||
656 | default: | ||
657 | - return ceph_str_hash(dci->i_dir_layout.dl_dir_hash, | ||
658 | + spin_lock(&dn->d_lock); | ||
659 | + hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash, | ||
660 | dn->d_name.name, dn->d_name.len); | ||
661 | + spin_unlock(&dn->d_lock); | ||
662 | + return hash; | ||
663 | } | ||
664 | } | ||
665 | |||
666 | diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c | ||
667 | index 6cbd0d805c9d..67cb9d078bfa 100644 | ||
668 | --- a/fs/ceph/mds_client.c | ||
669 | +++ b/fs/ceph/mds_client.c | ||
670 | @@ -1187,6 +1187,15 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | ||
671 | list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove); | ||
672 | ci->i_prealloc_cap_flush = NULL; | ||
673 | } | ||
674 | + | ||
675 | + if (drop && | ||
676 | + ci->i_wrbuffer_ref_head == 0 && | ||
677 | + ci->i_wr_ref == 0 && | ||
678 | + ci->i_dirty_caps == 0 && | ||
679 | + ci->i_flushing_caps == 0) { | ||
680 | + ceph_put_snap_context(ci->i_head_snapc); | ||
681 | + ci->i_head_snapc = NULL; | ||
682 | + } | ||
683 | } | ||
684 | spin_unlock(&ci->i_ceph_lock); | ||
685 | while (!list_empty(&to_remove)) { | ||
686 | diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c | ||
687 | index 411e9df0d40e..3a76ae001360 100644 | ||
688 | --- a/fs/ceph/snap.c | ||
689 | +++ b/fs/ceph/snap.c | ||
690 | @@ -563,7 +563,12 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | ||
691 | old_snapc = NULL; | ||
692 | |||
693 | update_snapc: | ||
694 | - if (ci->i_head_snapc) { | ||
695 | + if (ci->i_wrbuffer_ref_head == 0 && | ||
696 | + ci->i_wr_ref == 0 && | ||
697 | + ci->i_dirty_caps == 0 && | ||
698 | + ci->i_flushing_caps == 0) { | ||
699 | + ci->i_head_snapc = NULL; | ||
700 | + } else { | ||
701 | ci->i_head_snapc = ceph_get_snap_context(new_snapc); | ||
702 | dout(" new snapc is %p\n", new_snapc); | ||
703 | } | ||
704 | diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c | ||
705 | index a8a2fc9ae056..786f67bee43a 100644 | ||
706 | --- a/fs/cifs/inode.c | ||
707 | +++ b/fs/cifs/inode.c | ||
708 | @@ -1722,6 +1722,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, | ||
709 | if (rc == 0 || rc != -EBUSY) | ||
710 | goto do_rename_exit; | ||
711 | |||
712 | + /* Don't fall back to using SMB on SMB 2+ mount */ | ||
713 | + if (server->vals->protocol_id != 0) | ||
714 | + goto do_rename_exit; | ||
715 | + | ||
716 | /* open-file renames don't work across directories */ | ||
717 | if (to_dentry->d_parent != from_dentry->d_parent) | ||
718 | goto do_rename_exit; | ||
719 | diff --git a/fs/nfs/super.c b/fs/nfs/super.c | ||
720 | index 659ad12e33ba..42c31587a936 100644 | ||
721 | --- a/fs/nfs/super.c | ||
722 | +++ b/fs/nfs/super.c | ||
723 | @@ -2047,7 +2047,8 @@ static int nfs23_validate_mount_data(void *options, | ||
724 | memcpy(sap, &data->addr, sizeof(data->addr)); | ||
725 | args->nfs_server.addrlen = sizeof(data->addr); | ||
726 | args->nfs_server.port = ntohs(data->addr.sin_port); | ||
727 | - if (!nfs_verify_server_address(sap)) | ||
728 | + if (sap->sa_family != AF_INET || | ||
729 | + !nfs_verify_server_address(sap)) | ||
730 | goto out_no_address; | ||
731 | |||
732 | if (!(data->flags & NFS_MOUNT_TCP)) | ||
733 | diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c | ||
734 | index 3069cd46ea66..8d842282111b 100644 | ||
735 | --- a/fs/nfsd/nfs4callback.c | ||
736 | +++ b/fs/nfsd/nfs4callback.c | ||
737 | @@ -934,8 +934,9 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) | ||
738 | cb->cb_seq_status = 1; | ||
739 | cb->cb_status = 0; | ||
740 | if (minorversion) { | ||
741 | - if (!nfsd41_cb_get_slot(clp, task)) | ||
742 | + if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task)) | ||
743 | return; | ||
744 | + cb->cb_holds_slot = true; | ||
745 | } | ||
746 | rpc_call_start(task); | ||
747 | } | ||
748 | @@ -962,6 +963,9 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback | ||
749 | return true; | ||
750 | } | ||
751 | |||
752 | + if (!cb->cb_holds_slot) | ||
753 | + goto need_restart; | ||
754 | + | ||
755 | switch (cb->cb_seq_status) { | ||
756 | case 0: | ||
757 | /* | ||
758 | @@ -999,6 +1003,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback | ||
759 | cb->cb_seq_status); | ||
760 | } | ||
761 | |||
762 | + cb->cb_holds_slot = false; | ||
763 | clear_bit(0, &clp->cl_cb_slot_busy); | ||
764 | rpc_wake_up_next(&clp->cl_cb_waitq); | ||
765 | dprintk("%s: freed slot, new seqid=%d\n", __func__, | ||
766 | @@ -1206,6 +1211,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, | ||
767 | cb->cb_seq_status = 1; | ||
768 | cb->cb_status = 0; | ||
769 | cb->cb_need_restart = false; | ||
770 | + cb->cb_holds_slot = false; | ||
771 | } | ||
772 | |||
773 | void nfsd4_run_cb(struct nfsd4_callback *cb) | ||
774 | diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h | ||
775 | index 86aa92d200e1..133d8bf62a5c 100644 | ||
776 | --- a/fs/nfsd/state.h | ||
777 | +++ b/fs/nfsd/state.h | ||
778 | @@ -69,6 +69,7 @@ struct nfsd4_callback { | ||
779 | int cb_seq_status; | ||
780 | int cb_status; | ||
781 | bool cb_need_restart; | ||
782 | + bool cb_holds_slot; | ||
783 | }; | ||
784 | |||
785 | struct nfsd4_callback_ops { | ||
786 | diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c | ||
787 | index 6f30cf8ef7a1..5b32c054df71 100644 | ||
788 | --- a/fs/proc/proc_sysctl.c | ||
789 | +++ b/fs/proc/proc_sysctl.c | ||
790 | @@ -1604,9 +1604,11 @@ static void drop_sysctl_table(struct ctl_table_header *header) | ||
791 | if (--header->nreg) | ||
792 | return; | ||
793 | |||
794 | - if (parent) | ||
795 | + if (parent) { | ||
796 | put_links(header); | ||
797 | - start_unregistering(header); | ||
798 | + start_unregistering(header); | ||
799 | + } | ||
800 | + | ||
801 | if (!--header->count) | ||
802 | kfree_rcu(header, rcu); | ||
803 | |||
804 | diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h | ||
805 | index a3812e9c8fee..c2c724abde57 100644 | ||
806 | --- a/include/net/inet_frag.h | ||
807 | +++ b/include/net/inet_frag.h | ||
808 | @@ -76,8 +76,8 @@ struct inet_frag_queue { | ||
809 | struct timer_list timer; | ||
810 | spinlock_t lock; | ||
811 | atomic_t refcnt; | ||
812 | - struct sk_buff *fragments; /* Used in IPv6. */ | ||
813 | - struct rb_root rb_fragments; /* Used in IPv4. */ | ||
814 | + struct sk_buff *fragments; /* used in 6lopwpan IPv6. */ | ||
815 | + struct rb_root rb_fragments; /* Used in IPv4/IPv6. */ | ||
816 | struct sk_buff *fragments_tail; | ||
817 | struct sk_buff *last_run_head; | ||
818 | ktime_t stamp; | ||
819 | @@ -152,4 +152,16 @@ static inline void add_frag_mem_limit(struct netns_frags *nf, long val) | ||
820 | |||
821 | extern const u8 ip_frag_ecn_table[16]; | ||
822 | |||
823 | +/* Return values of inet_frag_queue_insert() */ | ||
824 | +#define IPFRAG_OK 0 | ||
825 | +#define IPFRAG_DUP 1 | ||
826 | +#define IPFRAG_OVERLAP 2 | ||
827 | +int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, | ||
828 | + int offset, int end); | ||
829 | +void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, | ||
830 | + struct sk_buff *parent); | ||
831 | +void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, | ||
832 | + void *reasm_data); | ||
833 | +struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q); | ||
834 | + | ||
835 | #endif | ||
836 | diff --git a/include/net/ipv6.h b/include/net/ipv6.h | ||
837 | index 7cb100d25bb5..168009eef5e4 100644 | ||
838 | --- a/include/net/ipv6.h | ||
839 | +++ b/include/net/ipv6.h | ||
840 | @@ -511,35 +511,6 @@ static inline bool ipv6_prefix_equal(const struct in6_addr *addr1, | ||
841 | } | ||
842 | #endif | ||
843 | |||
844 | -struct inet_frag_queue; | ||
845 | - | ||
846 | -enum ip6_defrag_users { | ||
847 | - IP6_DEFRAG_LOCAL_DELIVER, | ||
848 | - IP6_DEFRAG_CONNTRACK_IN, | ||
849 | - __IP6_DEFRAG_CONNTRACK_IN = IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX, | ||
850 | - IP6_DEFRAG_CONNTRACK_OUT, | ||
851 | - __IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX, | ||
852 | - IP6_DEFRAG_CONNTRACK_BRIDGE_IN, | ||
853 | - __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX, | ||
854 | -}; | ||
855 | - | ||
856 | -void ip6_frag_init(struct inet_frag_queue *q, const void *a); | ||
857 | -extern const struct rhashtable_params ip6_rhash_params; | ||
858 | - | ||
859 | -/* | ||
860 | - * Equivalent of ipv4 struct ip | ||
861 | - */ | ||
862 | -struct frag_queue { | ||
863 | - struct inet_frag_queue q; | ||
864 | - | ||
865 | - int iif; | ||
866 | - unsigned int csum; | ||
867 | - __u16 nhoffset; | ||
868 | - u8 ecn; | ||
869 | -}; | ||
870 | - | ||
871 | -void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq); | ||
872 | - | ||
873 | static inline bool ipv6_addr_any(const struct in6_addr *a) | ||
874 | { | ||
875 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 | ||
876 | diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h | ||
877 | new file mode 100644 | ||
878 | index 000000000000..28aa9b30aece | ||
879 | --- /dev/null | ||
880 | +++ b/include/net/ipv6_frag.h | ||
881 | @@ -0,0 +1,111 @@ | ||
882 | +/* SPDX-License-Identifier: GPL-2.0 */ | ||
883 | +#ifndef _IPV6_FRAG_H | ||
884 | +#define _IPV6_FRAG_H | ||
885 | +#include <linux/kernel.h> | ||
886 | +#include <net/addrconf.h> | ||
887 | +#include <net/ipv6.h> | ||
888 | +#include <net/inet_frag.h> | ||
889 | + | ||
890 | +enum ip6_defrag_users { | ||
891 | + IP6_DEFRAG_LOCAL_DELIVER, | ||
892 | + IP6_DEFRAG_CONNTRACK_IN, | ||
893 | + __IP6_DEFRAG_CONNTRACK_IN = IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX, | ||
894 | + IP6_DEFRAG_CONNTRACK_OUT, | ||
895 | + __IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX, | ||
896 | + IP6_DEFRAG_CONNTRACK_BRIDGE_IN, | ||
897 | + __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX, | ||
898 | +}; | ||
899 | + | ||
900 | +/* | ||
901 | + * Equivalent of ipv4 struct ip | ||
902 | + */ | ||
903 | +struct frag_queue { | ||
904 | + struct inet_frag_queue q; | ||
905 | + | ||
906 | + int iif; | ||
907 | + __u16 nhoffset; | ||
908 | + u8 ecn; | ||
909 | +}; | ||
910 | + | ||
911 | +#if IS_ENABLED(CONFIG_IPV6) | ||
912 | +static inline void ip6frag_init(struct inet_frag_queue *q, const void *a) | ||
913 | +{ | ||
914 | + struct frag_queue *fq = container_of(q, struct frag_queue, q); | ||
915 | + const struct frag_v6_compare_key *key = a; | ||
916 | + | ||
917 | + q->key.v6 = *key; | ||
918 | + fq->ecn = 0; | ||
919 | +} | ||
920 | + | ||
921 | +static inline u32 ip6frag_key_hashfn(const void *data, u32 len, u32 seed) | ||
922 | +{ | ||
923 | + return jhash2(data, | ||
924 | + sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); | ||
925 | +} | ||
926 | + | ||
927 | +static inline u32 ip6frag_obj_hashfn(const void *data, u32 len, u32 seed) | ||
928 | +{ | ||
929 | + const struct inet_frag_queue *fq = data; | ||
930 | + | ||
931 | + return jhash2((const u32 *)&fq->key.v6, | ||
932 | + sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); | ||
933 | +} | ||
934 | + | ||
935 | +static inline int | ||
936 | +ip6frag_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) | ||
937 | +{ | ||
938 | + const struct frag_v6_compare_key *key = arg->key; | ||
939 | + const struct inet_frag_queue *fq = ptr; | ||
940 | + | ||
941 | + return !!memcmp(&fq->key, key, sizeof(*key)); | ||
942 | +} | ||
943 | + | ||
944 | +static inline void | ||
945 | +ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) | ||
946 | +{ | ||
947 | + struct net_device *dev = NULL; | ||
948 | + struct sk_buff *head; | ||
949 | + | ||
950 | + rcu_read_lock(); | ||
951 | + spin_lock(&fq->q.lock); | ||
952 | + | ||
953 | + if (fq->q.flags & INET_FRAG_COMPLETE) | ||
954 | + goto out; | ||
955 | + | ||
956 | + inet_frag_kill(&fq->q); | ||
957 | + | ||
958 | + dev = dev_get_by_index_rcu(net, fq->iif); | ||
959 | + if (!dev) | ||
960 | + goto out; | ||
961 | + | ||
962 | + __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); | ||
963 | + __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); | ||
964 | + | ||
965 | + /* Don't send error if the first segment did not arrive. */ | ||
966 | + if (!(fq->q.flags & INET_FRAG_FIRST_IN)) | ||
967 | + goto out; | ||
968 | + | ||
969 | + /* sk_buff::dev and sk_buff::rbnode are unionized. So we | ||
970 | + * pull the head out of the tree in order to be able to | ||
971 | + * deal with head->dev. | ||
972 | + */ | ||
973 | + head = inet_frag_pull_head(&fq->q); | ||
974 | + if (!head) | ||
975 | + goto out; | ||
976 | + | ||
977 | + head->dev = dev; | ||
978 | + skb_get(head); | ||
979 | + spin_unlock(&fq->q.lock); | ||
980 | + | ||
981 | + icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); | ||
982 | + kfree_skb(head); | ||
983 | + goto out_rcu_unlock; | ||
984 | + | ||
985 | +out: | ||
986 | + spin_unlock(&fq->q.lock); | ||
987 | +out_rcu_unlock: | ||
988 | + rcu_read_unlock(); | ||
989 | + inet_frag_put(&fq->q); | ||
990 | +} | ||
991 | +#endif | ||
992 | +#endif | ||
993 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c | ||
994 | index 4b1e0669740c..f0c9b6925687 100644 | ||
995 | --- a/kernel/sched/fair.c | ||
996 | +++ b/kernel/sched/fair.c | ||
997 | @@ -1925,6 +1925,10 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period) | ||
998 | if (p->last_task_numa_placement) { | ||
999 | delta = runtime - p->last_sum_exec_runtime; | ||
1000 | *period = now - p->last_task_numa_placement; | ||
1001 | + | ||
1002 | + /* Avoid time going backwards, prevent potential divide error: */ | ||
1003 | + if (unlikely((s64)*period < 0)) | ||
1004 | + *period = 0; | ||
1005 | } else { | ||
1006 | delta = p->se.avg.load_sum / p->se.load.weight; | ||
1007 | *period = LOAD_AVG_MAX; | ||
1008 | diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c | ||
1009 | index 5473dcaaca8d..2cfe11e1190b 100644 | ||
1010 | --- a/kernel/trace/ring_buffer.c | ||
1011 | +++ b/kernel/trace/ring_buffer.c | ||
1012 | @@ -701,7 +701,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) | ||
1013 | |||
1014 | preempt_disable_notrace(); | ||
1015 | time = rb_time_stamp(buffer); | ||
1016 | - preempt_enable_no_resched_notrace(); | ||
1017 | + preempt_enable_notrace(); | ||
1018 | |||
1019 | return time; | ||
1020 | } | ||
1021 | diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c | ||
1022 | index d4773939c054..a2d8bd68c16e 100644 | ||
1023 | --- a/kernel/trace/trace.c | ||
1024 | +++ b/kernel/trace/trace.c | ||
1025 | @@ -500,8 +500,10 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, | ||
1026 | * not modified. | ||
1027 | */ | ||
1028 | pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL); | ||
1029 | - if (!pid_list) | ||
1030 | + if (!pid_list) { | ||
1031 | + trace_parser_put(&parser); | ||
1032 | return -ENOMEM; | ||
1033 | + } | ||
1034 | |||
1035 | pid_list->pid_max = READ_ONCE(pid_max); | ||
1036 | |||
1037 | @@ -511,6 +513,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, | ||
1038 | |||
1039 | pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3); | ||
1040 | if (!pid_list->pids) { | ||
1041 | + trace_parser_put(&parser); | ||
1042 | kfree(pid_list); | ||
1043 | return -ENOMEM; | ||
1044 | } | ||
1045 | diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c | ||
1046 | index c7e5aaf2eeb8..142ccaae9c7b 100644 | ||
1047 | --- a/net/bridge/netfilter/ebtables.c | ||
1048 | +++ b/net/bridge/netfilter/ebtables.c | ||
1049 | @@ -2056,7 +2056,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, | ||
1050 | if (match_kern) | ||
1051 | match_kern->match_size = ret; | ||
1052 | |||
1053 | - if (WARN_ON(type == EBT_COMPAT_TARGET && size_left)) | ||
1054 | + /* rule should have no remaining data after target */ | ||
1055 | + if (type == EBT_COMPAT_TARGET && size_left) | ||
1056 | return -EINVAL; | ||
1057 | |||
1058 | match32 = (struct compat_ebt_entry_mwt *) buf; | ||
1059 | diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c | ||
1060 | index aab1e2dfdfca..c01df341b5f6 100644 | ||
1061 | --- a/net/ieee802154/6lowpan/reassembly.c | ||
1062 | +++ b/net/ieee802154/6lowpan/reassembly.c | ||
1063 | @@ -25,7 +25,7 @@ | ||
1064 | |||
1065 | #include <net/ieee802154_netdev.h> | ||
1066 | #include <net/6lowpan.h> | ||
1067 | -#include <net/ipv6.h> | ||
1068 | +#include <net/ipv6_frag.h> | ||
1069 | #include <net/inet_frag.h> | ||
1070 | |||
1071 | #include "6lowpan_i.h" | ||
1072 | diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c | ||
1073 | index 0fb49dedc9fb..2325cd3454a6 100644 | ||
1074 | --- a/net/ipv4/inet_fragment.c | ||
1075 | +++ b/net/ipv4/inet_fragment.c | ||
1076 | @@ -24,6 +24,62 @@ | ||
1077 | #include <net/sock.h> | ||
1078 | #include <net/inet_frag.h> | ||
1079 | #include <net/inet_ecn.h> | ||
1080 | +#include <net/ip.h> | ||
1081 | +#include <net/ipv6.h> | ||
1082 | + | ||
1083 | +/* Use skb->cb to track consecutive/adjacent fragments coming at | ||
1084 | + * the end of the queue. Nodes in the rb-tree queue will | ||
1085 | + * contain "runs" of one or more adjacent fragments. | ||
1086 | + * | ||
1087 | + * Invariants: | ||
1088 | + * - next_frag is NULL at the tail of a "run"; | ||
1089 | + * - the head of a "run" has the sum of all fragment lengths in frag_run_len. | ||
1090 | + */ | ||
1091 | +struct ipfrag_skb_cb { | ||
1092 | + union { | ||
1093 | + struct inet_skb_parm h4; | ||
1094 | + struct inet6_skb_parm h6; | ||
1095 | + }; | ||
1096 | + struct sk_buff *next_frag; | ||
1097 | + int frag_run_len; | ||
1098 | +}; | ||
1099 | + | ||
1100 | +#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) | ||
1101 | + | ||
1102 | +static void fragcb_clear(struct sk_buff *skb) | ||
1103 | +{ | ||
1104 | + RB_CLEAR_NODE(&skb->rbnode); | ||
1105 | + FRAG_CB(skb)->next_frag = NULL; | ||
1106 | + FRAG_CB(skb)->frag_run_len = skb->len; | ||
1107 | +} | ||
1108 | + | ||
1109 | +/* Append skb to the last "run". */ | ||
1110 | +static void fragrun_append_to_last(struct inet_frag_queue *q, | ||
1111 | + struct sk_buff *skb) | ||
1112 | +{ | ||
1113 | + fragcb_clear(skb); | ||
1114 | + | ||
1115 | + FRAG_CB(q->last_run_head)->frag_run_len += skb->len; | ||
1116 | + FRAG_CB(q->fragments_tail)->next_frag = skb; | ||
1117 | + q->fragments_tail = skb; | ||
1118 | +} | ||
1119 | + | ||
1120 | +/* Create a new "run" with the skb. */ | ||
1121 | +static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb) | ||
1122 | +{ | ||
1123 | + BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb)); | ||
1124 | + fragcb_clear(skb); | ||
1125 | + | ||
1126 | + if (q->last_run_head) | ||
1127 | + rb_link_node(&skb->rbnode, &q->last_run_head->rbnode, | ||
1128 | + &q->last_run_head->rbnode.rb_right); | ||
1129 | + else | ||
1130 | + rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node); | ||
1131 | + rb_insert_color(&skb->rbnode, &q->rb_fragments); | ||
1132 | + | ||
1133 | + q->fragments_tail = skb; | ||
1134 | + q->last_run_head = skb; | ||
1135 | +} | ||
1136 | |||
1137 | /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements | ||
1138 | * Value : 0xff if frame should be dropped. | ||
1139 | @@ -122,6 +178,28 @@ static void inet_frag_destroy_rcu(struct rcu_head *head) | ||
1140 | kmem_cache_free(f->frags_cachep, q); | ||
1141 | } | ||
1142 | |||
1143 | +unsigned int inet_frag_rbtree_purge(struct rb_root *root) | ||
1144 | +{ | ||
1145 | + struct rb_node *p = rb_first(root); | ||
1146 | + unsigned int sum = 0; | ||
1147 | + | ||
1148 | + while (p) { | ||
1149 | + struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); | ||
1150 | + | ||
1151 | + p = rb_next(p); | ||
1152 | + rb_erase(&skb->rbnode, root); | ||
1153 | + while (skb) { | ||
1154 | + struct sk_buff *next = FRAG_CB(skb)->next_frag; | ||
1155 | + | ||
1156 | + sum += skb->truesize; | ||
1157 | + kfree_skb(skb); | ||
1158 | + skb = next; | ||
1159 | + } | ||
1160 | + } | ||
1161 | + return sum; | ||
1162 | +} | ||
1163 | +EXPORT_SYMBOL(inet_frag_rbtree_purge); | ||
1164 | + | ||
1165 | void inet_frag_destroy(struct inet_frag_queue *q) | ||
1166 | { | ||
1167 | struct sk_buff *fp; | ||
1168 | @@ -223,3 +301,218 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) | ||
1169 | return fq; | ||
1170 | } | ||
1171 | EXPORT_SYMBOL(inet_frag_find); | ||
1172 | + | ||
1173 | +int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, | ||
1174 | + int offset, int end) | ||
1175 | +{ | ||
1176 | + struct sk_buff *last = q->fragments_tail; | ||
1177 | + | ||
1178 | + /* RFC5722, Section 4, amended by Errata ID : 3089 | ||
1179 | + * When reassembling an IPv6 datagram, if | ||
1180 | + * one or more its constituent fragments is determined to be an | ||
1181 | + * overlapping fragment, the entire datagram (and any constituent | ||
1182 | + * fragments) MUST be silently discarded. | ||
1183 | + * | ||
1184 | + * Duplicates, however, should be ignored (i.e. skb dropped, but the | ||
1185 | + * queue/fragments kept for later reassembly). | ||
1186 | + */ | ||
1187 | + if (!last) | ||
1188 | + fragrun_create(q, skb); /* First fragment. */ | ||
1189 | + else if (last->ip_defrag_offset + last->len < end) { | ||
1190 | + /* This is the common case: skb goes to the end. */ | ||
1191 | + /* Detect and discard overlaps. */ | ||
1192 | + if (offset < last->ip_defrag_offset + last->len) | ||
1193 | + return IPFRAG_OVERLAP; | ||
1194 | + if (offset == last->ip_defrag_offset + last->len) | ||
1195 | + fragrun_append_to_last(q, skb); | ||
1196 | + else | ||
1197 | + fragrun_create(q, skb); | ||
1198 | + } else { | ||
1199 | + /* Binary search. Note that skb can become the first fragment, | ||
1200 | + * but not the last (covered above). | ||
1201 | + */ | ||
1202 | + struct rb_node **rbn, *parent; | ||
1203 | + | ||
1204 | + rbn = &q->rb_fragments.rb_node; | ||
1205 | + do { | ||
1206 | + struct sk_buff *curr; | ||
1207 | + int curr_run_end; | ||
1208 | + | ||
1209 | + parent = *rbn; | ||
1210 | + curr = rb_to_skb(parent); | ||
1211 | + curr_run_end = curr->ip_defrag_offset + | ||
1212 | + FRAG_CB(curr)->frag_run_len; | ||
1213 | + if (end <= curr->ip_defrag_offset) | ||
1214 | + rbn = &parent->rb_left; | ||
1215 | + else if (offset >= curr_run_end) | ||
1216 | + rbn = &parent->rb_right; | ||
1217 | + else if (offset >= curr->ip_defrag_offset && | ||
1218 | + end <= curr_run_end) | ||
1219 | + return IPFRAG_DUP; | ||
1220 | + else | ||
1221 | + return IPFRAG_OVERLAP; | ||
1222 | + } while (*rbn); | ||
1223 | + /* Here we have parent properly set, and rbn pointing to | ||
1224 | + * one of its NULL left/right children. Insert skb. | ||
1225 | + */ | ||
1226 | + fragcb_clear(skb); | ||
1227 | + rb_link_node(&skb->rbnode, parent, rbn); | ||
1228 | + rb_insert_color(&skb->rbnode, &q->rb_fragments); | ||
1229 | + } | ||
1230 | + | ||
1231 | + skb->ip_defrag_offset = offset; | ||
1232 | + | ||
1233 | + return IPFRAG_OK; | ||
1234 | +} | ||
1235 | +EXPORT_SYMBOL(inet_frag_queue_insert); | ||
1236 | + | ||
1237 | +void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, | ||
1238 | + struct sk_buff *parent) | ||
1239 | +{ | ||
1240 | + struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); | ||
1241 | + struct sk_buff **nextp; | ||
1242 | + int delta; | ||
1243 | + | ||
1244 | + if (head != skb) { | ||
1245 | + fp = skb_clone(skb, GFP_ATOMIC); | ||
1246 | + if (!fp) | ||
1247 | + return NULL; | ||
1248 | + FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; | ||
1249 | + if (RB_EMPTY_NODE(&skb->rbnode)) | ||
1250 | + FRAG_CB(parent)->next_frag = fp; | ||
1251 | + else | ||
1252 | + rb_replace_node(&skb->rbnode, &fp->rbnode, | ||
1253 | + &q->rb_fragments); | ||
1254 | + if (q->fragments_tail == skb) | ||
1255 | + q->fragments_tail = fp; | ||
1256 | + skb_morph(skb, head); | ||
1257 | + FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; | ||
1258 | + rb_replace_node(&head->rbnode, &skb->rbnode, | ||
1259 | + &q->rb_fragments); | ||
1260 | + consume_skb(head); | ||
1261 | + head = skb; | ||
1262 | + } | ||
1263 | + WARN_ON(head->ip_defrag_offset != 0); | ||
1264 | + | ||
1265 | + delta = -head->truesize; | ||
1266 | + | ||
1267 | + /* Head of list must not be cloned. */ | ||
1268 | + if (skb_unclone(head, GFP_ATOMIC)) | ||
1269 | + return NULL; | ||
1270 | + | ||
1271 | + delta += head->truesize; | ||
1272 | + if (delta) | ||
1273 | + add_frag_mem_limit(q->net, delta); | ||
1274 | + | ||
1275 | + /* If the first fragment is fragmented itself, we split | ||
1276 | + * it to two chunks: the first with data and paged part | ||
1277 | + * and the second, holding only fragments. | ||
1278 | + */ | ||
1279 | + if (skb_has_frag_list(head)) { | ||
1280 | + struct sk_buff *clone; | ||
1281 | + int i, plen = 0; | ||
1282 | + | ||
1283 | + clone = alloc_skb(0, GFP_ATOMIC); | ||
1284 | + if (!clone) | ||
1285 | + return NULL; | ||
1286 | + skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; | ||
1287 | + skb_frag_list_init(head); | ||
1288 | + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) | ||
1289 | + plen += skb_frag_size(&skb_shinfo(head)->frags[i]); | ||
1290 | + clone->data_len = head->data_len - plen; | ||
1291 | + clone->len = clone->data_len; | ||
1292 | + head->truesize += clone->truesize; | ||
1293 | + clone->csum = 0; | ||
1294 | + clone->ip_summed = head->ip_summed; | ||
1295 | + add_frag_mem_limit(q->net, clone->truesize); | ||
1296 | + skb_shinfo(head)->frag_list = clone; | ||
1297 | + nextp = &clone->next; | ||
1298 | + } else { | ||
1299 | + nextp = &skb_shinfo(head)->frag_list; | ||
1300 | + } | ||
1301 | + | ||
1302 | + return nextp; | ||
1303 | +} | ||
1304 | +EXPORT_SYMBOL(inet_frag_reasm_prepare); | ||
1305 | + | ||
1306 | +void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, | ||
1307 | + void *reasm_data) | ||
1308 | +{ | ||
1309 | + struct sk_buff **nextp = (struct sk_buff **)reasm_data; | ||
1310 | + struct rb_node *rbn; | ||
1311 | + struct sk_buff *fp; | ||
1312 | + | ||
1313 | + skb_push(head, head->data - skb_network_header(head)); | ||
1314 | + | ||
1315 | + /* Traverse the tree in order, to build frag_list. */ | ||
1316 | + fp = FRAG_CB(head)->next_frag; | ||
1317 | + rbn = rb_next(&head->rbnode); | ||
1318 | + rb_erase(&head->rbnode, &q->rb_fragments); | ||
1319 | + while (rbn || fp) { | ||
1320 | + /* fp points to the next sk_buff in the current run; | ||
1321 | + * rbn points to the next run. | ||
1322 | + */ | ||
1323 | + /* Go through the current run. */ | ||
1324 | + while (fp) { | ||
1325 | + *nextp = fp; | ||
1326 | + nextp = &fp->next; | ||
1327 | + fp->prev = NULL; | ||
1328 | + memset(&fp->rbnode, 0, sizeof(fp->rbnode)); | ||
1329 | + fp->sk = NULL; | ||
1330 | + head->data_len += fp->len; | ||
1331 | + head->len += fp->len; | ||
1332 | + if (head->ip_summed != fp->ip_summed) | ||
1333 | + head->ip_summed = CHECKSUM_NONE; | ||
1334 | + else if (head->ip_summed == CHECKSUM_COMPLETE) | ||
1335 | + head->csum = csum_add(head->csum, fp->csum); | ||
1336 | + head->truesize += fp->truesize; | ||
1337 | + fp = FRAG_CB(fp)->next_frag; | ||
1338 | + } | ||
1339 | + /* Move to the next run. */ | ||
1340 | + if (rbn) { | ||
1341 | + struct rb_node *rbnext = rb_next(rbn); | ||
1342 | + | ||
1343 | + fp = rb_to_skb(rbn); | ||
1344 | + rb_erase(rbn, &q->rb_fragments); | ||
1345 | + rbn = rbnext; | ||
1346 | + } | ||
1347 | + } | ||
1348 | + sub_frag_mem_limit(q->net, head->truesize); | ||
1349 | + | ||
1350 | + *nextp = NULL; | ||
1351 | + head->next = NULL; | ||
1352 | + head->prev = NULL; | ||
1353 | + head->tstamp = q->stamp; | ||
1354 | +} | ||
1355 | +EXPORT_SYMBOL(inet_frag_reasm_finish); | ||
1356 | + | ||
1357 | +struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q) | ||
1358 | +{ | ||
1359 | + struct sk_buff *head; | ||
1360 | + | ||
1361 | + if (q->fragments) { | ||
1362 | + head = q->fragments; | ||
1363 | + q->fragments = head->next; | ||
1364 | + } else { | ||
1365 | + struct sk_buff *skb; | ||
1366 | + | ||
1367 | + head = skb_rb_first(&q->rb_fragments); | ||
1368 | + if (!head) | ||
1369 | + return NULL; | ||
1370 | + skb = FRAG_CB(head)->next_frag; | ||
1371 | + if (skb) | ||
1372 | + rb_replace_node(&head->rbnode, &skb->rbnode, | ||
1373 | + &q->rb_fragments); | ||
1374 | + else | ||
1375 | + rb_erase(&head->rbnode, &q->rb_fragments); | ||
1376 | + memset(&head->rbnode, 0, sizeof(head->rbnode)); | ||
1377 | + barrier(); | ||
1378 | + } | ||
1379 | + if (head == q->fragments_tail) | ||
1380 | + q->fragments_tail = NULL; | ||
1381 | + | ||
1382 | + sub_frag_mem_limit(q->net, head->truesize); | ||
1383 | + | ||
1384 | + return head; | ||
1385 | +} | ||
1386 | +EXPORT_SYMBOL(inet_frag_pull_head); | ||
1387 | diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c | ||
1388 | index c7334d1e392a..6e9ba9dfb5b2 100644 | ||
1389 | --- a/net/ipv4/ip_fragment.c | ||
1390 | +++ b/net/ipv4/ip_fragment.c | ||
1391 | @@ -56,57 +56,6 @@ | ||
1392 | */ | ||
1393 | static const char ip_frag_cache_name[] = "ip4-frags"; | ||
1394 | |||
1395 | -/* Use skb->cb to track consecutive/adjacent fragments coming at | ||
1396 | - * the end of the queue. Nodes in the rb-tree queue will | ||
1397 | - * contain "runs" of one or more adjacent fragments. | ||
1398 | - * | ||
1399 | - * Invariants: | ||
1400 | - * - next_frag is NULL at the tail of a "run"; | ||
1401 | - * - the head of a "run" has the sum of all fragment lengths in frag_run_len. | ||
1402 | - */ | ||
1403 | -struct ipfrag_skb_cb { | ||
1404 | - struct inet_skb_parm h; | ||
1405 | - struct sk_buff *next_frag; | ||
1406 | - int frag_run_len; | ||
1407 | -}; | ||
1408 | - | ||
1409 | -#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) | ||
1410 | - | ||
1411 | -static void ip4_frag_init_run(struct sk_buff *skb) | ||
1412 | -{ | ||
1413 | - BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb)); | ||
1414 | - | ||
1415 | - FRAG_CB(skb)->next_frag = NULL; | ||
1416 | - FRAG_CB(skb)->frag_run_len = skb->len; | ||
1417 | -} | ||
1418 | - | ||
1419 | -/* Append skb to the last "run". */ | ||
1420 | -static void ip4_frag_append_to_last_run(struct inet_frag_queue *q, | ||
1421 | - struct sk_buff *skb) | ||
1422 | -{ | ||
1423 | - RB_CLEAR_NODE(&skb->rbnode); | ||
1424 | - FRAG_CB(skb)->next_frag = NULL; | ||
1425 | - | ||
1426 | - FRAG_CB(q->last_run_head)->frag_run_len += skb->len; | ||
1427 | - FRAG_CB(q->fragments_tail)->next_frag = skb; | ||
1428 | - q->fragments_tail = skb; | ||
1429 | -} | ||
1430 | - | ||
1431 | -/* Create a new "run" with the skb. */ | ||
1432 | -static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb) | ||
1433 | -{ | ||
1434 | - if (q->last_run_head) | ||
1435 | - rb_link_node(&skb->rbnode, &q->last_run_head->rbnode, | ||
1436 | - &q->last_run_head->rbnode.rb_right); | ||
1437 | - else | ||
1438 | - rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node); | ||
1439 | - rb_insert_color(&skb->rbnode, &q->rb_fragments); | ||
1440 | - | ||
1441 | - ip4_frag_init_run(skb); | ||
1442 | - q->fragments_tail = skb; | ||
1443 | - q->last_run_head = skb; | ||
1444 | -} | ||
1445 | - | ||
1446 | /* Describe an entry in the "incomplete datagrams" queue. */ | ||
1447 | struct ipq { | ||
1448 | struct inet_frag_queue q; | ||
1449 | @@ -210,27 +159,9 @@ static void ip_expire(unsigned long arg) | ||
1450 | * pull the head out of the tree in order to be able to | ||
1451 | * deal with head->dev. | ||
1452 | */ | ||
1453 | - if (qp->q.fragments) { | ||
1454 | - head = qp->q.fragments; | ||
1455 | - qp->q.fragments = head->next; | ||
1456 | - } else { | ||
1457 | - head = skb_rb_first(&qp->q.rb_fragments); | ||
1458 | - if (!head) | ||
1459 | - goto out; | ||
1460 | - if (FRAG_CB(head)->next_frag) | ||
1461 | - rb_replace_node(&head->rbnode, | ||
1462 | - &FRAG_CB(head)->next_frag->rbnode, | ||
1463 | - &qp->q.rb_fragments); | ||
1464 | - else | ||
1465 | - rb_erase(&head->rbnode, &qp->q.rb_fragments); | ||
1466 | - memset(&head->rbnode, 0, sizeof(head->rbnode)); | ||
1467 | - barrier(); | ||
1468 | - } | ||
1469 | - if (head == qp->q.fragments_tail) | ||
1470 | - qp->q.fragments_tail = NULL; | ||
1471 | - | ||
1472 | - sub_frag_mem_limit(qp->q.net, head->truesize); | ||
1473 | - | ||
1474 | + head = inet_frag_pull_head(&qp->q); | ||
1475 | + if (!head) | ||
1476 | + goto out; | ||
1477 | head->dev = dev_get_by_index_rcu(net, qp->iif); | ||
1478 | if (!head->dev) | ||
1479 | goto out; | ||
1480 | @@ -343,12 +274,10 @@ static int ip_frag_reinit(struct ipq *qp) | ||
1481 | static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | ||
1482 | { | ||
1483 | struct net *net = container_of(qp->q.net, struct net, ipv4.frags); | ||
1484 | - struct rb_node **rbn, *parent; | ||
1485 | - struct sk_buff *skb1, *prev_tail; | ||
1486 | - int ihl, end, skb1_run_end; | ||
1487 | + int ihl, end, flags, offset; | ||
1488 | + struct sk_buff *prev_tail; | ||
1489 | struct net_device *dev; | ||
1490 | unsigned int fragsize; | ||
1491 | - int flags, offset; | ||
1492 | int err = -ENOENT; | ||
1493 | u8 ecn; | ||
1494 | |||
1495 | @@ -380,7 +309,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | ||
1496 | */ | ||
1497 | if (end < qp->q.len || | ||
1498 | ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len)) | ||
1499 | - goto err; | ||
1500 | + goto discard_qp; | ||
1501 | qp->q.flags |= INET_FRAG_LAST_IN; | ||
1502 | qp->q.len = end; | ||
1503 | } else { | ||
1504 | @@ -392,82 +321,33 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | ||
1505 | if (end > qp->q.len) { | ||
1506 | /* Some bits beyond end -> corruption. */ | ||
1507 | if (qp->q.flags & INET_FRAG_LAST_IN) | ||
1508 | - goto err; | ||
1509 | + goto discard_qp; | ||
1510 | qp->q.len = end; | ||
1511 | } | ||
1512 | } | ||
1513 | if (end == offset) | ||
1514 | - goto err; | ||
1515 | + goto discard_qp; | ||
1516 | |||
1517 | err = -ENOMEM; | ||
1518 | if (!pskb_pull(skb, skb_network_offset(skb) + ihl)) | ||
1519 | - goto err; | ||
1520 | + goto discard_qp; | ||
1521 | |||
1522 | err = pskb_trim_rcsum(skb, end - offset); | ||
1523 | if (err) | ||
1524 | - goto err; | ||
1525 | + goto discard_qp; | ||
1526 | |||
1527 | /* Note : skb->rbnode and skb->dev share the same location. */ | ||
1528 | dev = skb->dev; | ||
1529 | /* Makes sure compiler wont do silly aliasing games */ | ||
1530 | barrier(); | ||
1531 | |||
1532 | - /* RFC5722, Section 4, amended by Errata ID : 3089 | ||
1533 | - * When reassembling an IPv6 datagram, if | ||
1534 | - * one or more its constituent fragments is determined to be an | ||
1535 | - * overlapping fragment, the entire datagram (and any constituent | ||
1536 | - * fragments) MUST be silently discarded. | ||
1537 | - * | ||
1538 | - * We do the same here for IPv4 (and increment an snmp counter) but | ||
1539 | - * we do not want to drop the whole queue in response to a duplicate | ||
1540 | - * fragment. | ||
1541 | - */ | ||
1542 | - | ||
1543 | - err = -EINVAL; | ||
1544 | - /* Find out where to put this fragment. */ | ||
1545 | prev_tail = qp->q.fragments_tail; | ||
1546 | - if (!prev_tail) | ||
1547 | - ip4_frag_create_run(&qp->q, skb); /* First fragment. */ | ||
1548 | - else if (prev_tail->ip_defrag_offset + prev_tail->len < end) { | ||
1549 | - /* This is the common case: skb goes to the end. */ | ||
1550 | - /* Detect and discard overlaps. */ | ||
1551 | - if (offset < prev_tail->ip_defrag_offset + prev_tail->len) | ||
1552 | - goto discard_qp; | ||
1553 | - if (offset == prev_tail->ip_defrag_offset + prev_tail->len) | ||
1554 | - ip4_frag_append_to_last_run(&qp->q, skb); | ||
1555 | - else | ||
1556 | - ip4_frag_create_run(&qp->q, skb); | ||
1557 | - } else { | ||
1558 | - /* Binary search. Note that skb can become the first fragment, | ||
1559 | - * but not the last (covered above). | ||
1560 | - */ | ||
1561 | - rbn = &qp->q.rb_fragments.rb_node; | ||
1562 | - do { | ||
1563 | - parent = *rbn; | ||
1564 | - skb1 = rb_to_skb(parent); | ||
1565 | - skb1_run_end = skb1->ip_defrag_offset + | ||
1566 | - FRAG_CB(skb1)->frag_run_len; | ||
1567 | - if (end <= skb1->ip_defrag_offset) | ||
1568 | - rbn = &parent->rb_left; | ||
1569 | - else if (offset >= skb1_run_end) | ||
1570 | - rbn = &parent->rb_right; | ||
1571 | - else if (offset >= skb1->ip_defrag_offset && | ||
1572 | - end <= skb1_run_end) | ||
1573 | - goto err; /* No new data, potential duplicate */ | ||
1574 | - else | ||
1575 | - goto discard_qp; /* Found an overlap */ | ||
1576 | - } while (*rbn); | ||
1577 | - /* Here we have parent properly set, and rbn pointing to | ||
1578 | - * one of its NULL left/right children. Insert skb. | ||
1579 | - */ | ||
1580 | - ip4_frag_init_run(skb); | ||
1581 | - rb_link_node(&skb->rbnode, parent, rbn); | ||
1582 | - rb_insert_color(&skb->rbnode, &qp->q.rb_fragments); | ||
1583 | - } | ||
1584 | + err = inet_frag_queue_insert(&qp->q, skb, offset, end); | ||
1585 | + if (err) | ||
1586 | + goto insert_error; | ||
1587 | |||
1588 | if (dev) | ||
1589 | qp->iif = dev->ifindex; | ||
1590 | - skb->ip_defrag_offset = offset; | ||
1591 | |||
1592 | qp->q.stamp = skb->tstamp; | ||
1593 | qp->q.meat += skb->len; | ||
1594 | @@ -492,15 +372,24 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | ||
1595 | skb->_skb_refdst = 0UL; | ||
1596 | err = ip_frag_reasm(qp, skb, prev_tail, dev); | ||
1597 | skb->_skb_refdst = orefdst; | ||
1598 | + if (err) | ||
1599 | + inet_frag_kill(&qp->q); | ||
1600 | return err; | ||
1601 | } | ||
1602 | |||
1603 | skb_dst_drop(skb); | ||
1604 | return -EINPROGRESS; | ||
1605 | |||
1606 | +insert_error: | ||
1607 | + if (err == IPFRAG_DUP) { | ||
1608 | + kfree_skb(skb); | ||
1609 | + return -EINVAL; | ||
1610 | + } | ||
1611 | + err = -EINVAL; | ||
1612 | + __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS); | ||
1613 | discard_qp: | ||
1614 | inet_frag_kill(&qp->q); | ||
1615 | - __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS); | ||
1616 | + __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); | ||
1617 | err: | ||
1618 | kfree_skb(skb); | ||
1619 | return err; | ||
1620 | @@ -512,12 +401,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, | ||
1621 | { | ||
1622 | struct net *net = container_of(qp->q.net, struct net, ipv4.frags); | ||
1623 | struct iphdr *iph; | ||
1624 | - struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments); | ||
1625 | - struct sk_buff **nextp; /* To build frag_list. */ | ||
1626 | - struct rb_node *rbn; | ||
1627 | - int len; | ||
1628 | - int ihlen; | ||
1629 | - int err; | ||
1630 | + void *reasm_data; | ||
1631 | + int len, err; | ||
1632 | u8 ecn; | ||
1633 | |||
1634 | ipq_kill(qp); | ||
1635 | @@ -527,111 +412,23 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, | ||
1636 | err = -EINVAL; | ||
1637 | goto out_fail; | ||
1638 | } | ||
1639 | - /* Make the one we just received the head. */ | ||
1640 | - if (head != skb) { | ||
1641 | - fp = skb_clone(skb, GFP_ATOMIC); | ||
1642 | - if (!fp) | ||
1643 | - goto out_nomem; | ||
1644 | - FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; | ||
1645 | - if (RB_EMPTY_NODE(&skb->rbnode)) | ||
1646 | - FRAG_CB(prev_tail)->next_frag = fp; | ||
1647 | - else | ||
1648 | - rb_replace_node(&skb->rbnode, &fp->rbnode, | ||
1649 | - &qp->q.rb_fragments); | ||
1650 | - if (qp->q.fragments_tail == skb) | ||
1651 | - qp->q.fragments_tail = fp; | ||
1652 | - skb_morph(skb, head); | ||
1653 | - FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; | ||
1654 | - rb_replace_node(&head->rbnode, &skb->rbnode, | ||
1655 | - &qp->q.rb_fragments); | ||
1656 | - consume_skb(head); | ||
1657 | - head = skb; | ||
1658 | - } | ||
1659 | - | ||
1660 | - WARN_ON(head->ip_defrag_offset != 0); | ||
1661 | |||
1662 | - /* Allocate a new buffer for the datagram. */ | ||
1663 | - ihlen = ip_hdrlen(head); | ||
1664 | - len = ihlen + qp->q.len; | ||
1665 | + /* Make the one we just received the head. */ | ||
1666 | + reasm_data = inet_frag_reasm_prepare(&qp->q, skb, prev_tail); | ||
1667 | + if (!reasm_data) | ||
1668 | + goto out_nomem; | ||
1669 | |||
1670 | + len = ip_hdrlen(skb) + qp->q.len; | ||
1671 | err = -E2BIG; | ||
1672 | if (len > 65535) | ||
1673 | goto out_oversize; | ||
1674 | |||
1675 | - /* Head of list must not be cloned. */ | ||
1676 | - if (skb_unclone(head, GFP_ATOMIC)) | ||
1677 | - goto out_nomem; | ||
1678 | - | ||
1679 | - /* If the first fragment is fragmented itself, we split | ||
1680 | - * it to two chunks: the first with data and paged part | ||
1681 | - * and the second, holding only fragments. */ | ||
1682 | - if (skb_has_frag_list(head)) { | ||
1683 | - struct sk_buff *clone; | ||
1684 | - int i, plen = 0; | ||
1685 | - | ||
1686 | - clone = alloc_skb(0, GFP_ATOMIC); | ||
1687 | - if (!clone) | ||
1688 | - goto out_nomem; | ||
1689 | - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; | ||
1690 | - skb_frag_list_init(head); | ||
1691 | - for (i = 0; i < skb_shinfo(head)->nr_frags; i++) | ||
1692 | - plen += skb_frag_size(&skb_shinfo(head)->frags[i]); | ||
1693 | - clone->len = clone->data_len = head->data_len - plen; | ||
1694 | - head->truesize += clone->truesize; | ||
1695 | - clone->csum = 0; | ||
1696 | - clone->ip_summed = head->ip_summed; | ||
1697 | - add_frag_mem_limit(qp->q.net, clone->truesize); | ||
1698 | - skb_shinfo(head)->frag_list = clone; | ||
1699 | - nextp = &clone->next; | ||
1700 | - } else { | ||
1701 | - nextp = &skb_shinfo(head)->frag_list; | ||
1702 | - } | ||
1703 | + inet_frag_reasm_finish(&qp->q, skb, reasm_data); | ||
1704 | |||
1705 | - skb_push(head, head->data - skb_network_header(head)); | ||
1706 | + skb->dev = dev; | ||
1707 | + IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size); | ||
1708 | |||
1709 | - /* Traverse the tree in order, to build frag_list. */ | ||
1710 | - fp = FRAG_CB(head)->next_frag; | ||
1711 | - rbn = rb_next(&head->rbnode); | ||
1712 | - rb_erase(&head->rbnode, &qp->q.rb_fragments); | ||
1713 | - while (rbn || fp) { | ||
1714 | - /* fp points to the next sk_buff in the current run; | ||
1715 | - * rbn points to the next run. | ||
1716 | - */ | ||
1717 | - /* Go through the current run. */ | ||
1718 | - while (fp) { | ||
1719 | - *nextp = fp; | ||
1720 | - nextp = &fp->next; | ||
1721 | - fp->prev = NULL; | ||
1722 | - memset(&fp->rbnode, 0, sizeof(fp->rbnode)); | ||
1723 | - fp->sk = NULL; | ||
1724 | - head->data_len += fp->len; | ||
1725 | - head->len += fp->len; | ||
1726 | - if (head->ip_summed != fp->ip_summed) | ||
1727 | - head->ip_summed = CHECKSUM_NONE; | ||
1728 | - else if (head->ip_summed == CHECKSUM_COMPLETE) | ||
1729 | - head->csum = csum_add(head->csum, fp->csum); | ||
1730 | - head->truesize += fp->truesize; | ||
1731 | - fp = FRAG_CB(fp)->next_frag; | ||
1732 | - } | ||
1733 | - /* Move to the next run. */ | ||
1734 | - if (rbn) { | ||
1735 | - struct rb_node *rbnext = rb_next(rbn); | ||
1736 | - | ||
1737 | - fp = rb_to_skb(rbn); | ||
1738 | - rb_erase(rbn, &qp->q.rb_fragments); | ||
1739 | - rbn = rbnext; | ||
1740 | - } | ||
1741 | - } | ||
1742 | - sub_frag_mem_limit(qp->q.net, head->truesize); | ||
1743 | - | ||
1744 | - *nextp = NULL; | ||
1745 | - head->next = NULL; | ||
1746 | - head->prev = NULL; | ||
1747 | - head->dev = dev; | ||
1748 | - head->tstamp = qp->q.stamp; | ||
1749 | - IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size); | ||
1750 | - | ||
1751 | - iph = ip_hdr(head); | ||
1752 | + iph = ip_hdr(skb); | ||
1753 | iph->tot_len = htons(len); | ||
1754 | iph->tos |= ecn; | ||
1755 | |||
1756 | @@ -644,7 +441,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, | ||
1757 | * from one very small df-fragment and one large non-df frag. | ||
1758 | */ | ||
1759 | if (qp->max_df_size == qp->q.max_size) { | ||
1760 | - IPCB(head)->flags |= IPSKB_FRAG_PMTU; | ||
1761 | + IPCB(skb)->flags |= IPSKB_FRAG_PMTU; | ||
1762 | iph->frag_off = htons(IP_DF); | ||
1763 | } else { | ||
1764 | iph->frag_off = 0; | ||
1765 | @@ -742,28 +539,6 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) | ||
1766 | } | ||
1767 | EXPORT_SYMBOL(ip_check_defrag); | ||
1768 | |||
1769 | -unsigned int inet_frag_rbtree_purge(struct rb_root *root) | ||
1770 | -{ | ||
1771 | - struct rb_node *p = rb_first(root); | ||
1772 | - unsigned int sum = 0; | ||
1773 | - | ||
1774 | - while (p) { | ||
1775 | - struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); | ||
1776 | - | ||
1777 | - p = rb_next(p); | ||
1778 | - rb_erase(&skb->rbnode, root); | ||
1779 | - while (skb) { | ||
1780 | - struct sk_buff *next = FRAG_CB(skb)->next_frag; | ||
1781 | - | ||
1782 | - sum += skb->truesize; | ||
1783 | - kfree_skb(skb); | ||
1784 | - skb = next; | ||
1785 | - } | ||
1786 | - } | ||
1787 | - return sum; | ||
1788 | -} | ||
1789 | -EXPORT_SYMBOL(inet_frag_rbtree_purge); | ||
1790 | - | ||
1791 | #ifdef CONFIG_SYSCTL | ||
1792 | static int dist_min; | ||
1793 | |||
1794 | diff --git a/net/ipv4/route.c b/net/ipv4/route.c | ||
1795 | index 0e2cf9634541..02c49857b5a7 100644 | ||
1796 | --- a/net/ipv4/route.c | ||
1797 | +++ b/net/ipv4/route.c | ||
1798 | @@ -1168,25 +1168,39 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | ||
1799 | return dst; | ||
1800 | } | ||
1801 | |||
1802 | -static void ipv4_link_failure(struct sk_buff *skb) | ||
1803 | +static void ipv4_send_dest_unreach(struct sk_buff *skb) | ||
1804 | { | ||
1805 | struct ip_options opt; | ||
1806 | - struct rtable *rt; | ||
1807 | int res; | ||
1808 | |||
1809 | /* Recompile ip options since IPCB may not be valid anymore. | ||
1810 | + * Also check we have a reasonable ipv4 header. | ||
1811 | */ | ||
1812 | - memset(&opt, 0, sizeof(opt)); | ||
1813 | - opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); | ||
1814 | + if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) || | ||
1815 | + ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5) | ||
1816 | + return; | ||
1817 | |||
1818 | - rcu_read_lock(); | ||
1819 | - res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); | ||
1820 | - rcu_read_unlock(); | ||
1821 | + memset(&opt, 0, sizeof(opt)); | ||
1822 | + if (ip_hdr(skb)->ihl > 5) { | ||
1823 | + if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4)) | ||
1824 | + return; | ||
1825 | + opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); | ||
1826 | |||
1827 | - if (res) | ||
1828 | - return; | ||
1829 | + rcu_read_lock(); | ||
1830 | + res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); | ||
1831 | + rcu_read_unlock(); | ||
1832 | |||
1833 | + if (res) | ||
1834 | + return; | ||
1835 | + } | ||
1836 | __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); | ||
1837 | +} | ||
1838 | + | ||
1839 | +static void ipv4_link_failure(struct sk_buff *skb) | ||
1840 | +{ | ||
1841 | + struct rtable *rt; | ||
1842 | + | ||
1843 | + ipv4_send_dest_unreach(skb); | ||
1844 | |||
1845 | rt = skb_rtable(skb); | ||
1846 | if (rt) | ||
1847 | diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c | ||
1848 | index 024ab833557d..85713adf2770 100644 | ||
1849 | --- a/net/ipv4/sysctl_net_ipv4.c | ||
1850 | +++ b/net/ipv4/sysctl_net_ipv4.c | ||
1851 | @@ -41,6 +41,7 @@ static int tcp_syn_retries_min = 1; | ||
1852 | static int tcp_syn_retries_max = MAX_TCP_SYNCNT; | ||
1853 | static int ip_ping_group_range_min[] = { 0, 0 }; | ||
1854 | static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; | ||
1855 | +static int one_day_secs = 24 * 3600; | ||
1856 | |||
1857 | /* Update system visible IP port range */ | ||
1858 | static void set_local_port_range(struct net *net, int range[2]) | ||
1859 | @@ -460,7 +461,9 @@ static struct ctl_table ipv4_table[] = { | ||
1860 | .data = &sysctl_tcp_min_rtt_wlen, | ||
1861 | .maxlen = sizeof(int), | ||
1862 | .mode = 0644, | ||
1863 | - .proc_handler = proc_dointvec | ||
1864 | + .proc_handler = proc_dointvec_minmax, | ||
1865 | + .extra1 = &zero, | ||
1866 | + .extra2 = &one_day_secs | ||
1867 | }, | ||
1868 | { | ||
1869 | .procname = "tcp_low_latency", | ||
1870 | diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c | ||
1871 | index e46185377981..1e1fa99b3243 100644 | ||
1872 | --- a/net/ipv6/netfilter/nf_conntrack_reasm.c | ||
1873 | +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c | ||
1874 | @@ -33,9 +33,8 @@ | ||
1875 | |||
1876 | #include <net/sock.h> | ||
1877 | #include <net/snmp.h> | ||
1878 | -#include <net/inet_frag.h> | ||
1879 | +#include <net/ipv6_frag.h> | ||
1880 | |||
1881 | -#include <net/ipv6.h> | ||
1882 | #include <net/protocol.h> | ||
1883 | #include <net/transp_v6.h> | ||
1884 | #include <net/rawv6.h> | ||
1885 | @@ -52,14 +51,6 @@ | ||
1886 | |||
1887 | static const char nf_frags_cache_name[] = "nf-frags"; | ||
1888 | |||
1889 | -struct nf_ct_frag6_skb_cb | ||
1890 | -{ | ||
1891 | - struct inet6_skb_parm h; | ||
1892 | - int offset; | ||
1893 | -}; | ||
1894 | - | ||
1895 | -#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb)) | ||
1896 | - | ||
1897 | static struct inet_frags nf_frags; | ||
1898 | |||
1899 | #ifdef CONFIG_SYSCTL | ||
1900 | @@ -145,6 +136,9 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) | ||
1901 | } | ||
1902 | #endif | ||
1903 | |||
1904 | +static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, | ||
1905 | + struct sk_buff *prev_tail, struct net_device *dev); | ||
1906 | + | ||
1907 | static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) | ||
1908 | { | ||
1909 | return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); | ||
1910 | @@ -158,7 +152,7 @@ static void nf_ct_frag6_expire(unsigned long data) | ||
1911 | fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); | ||
1912 | net = container_of(fq->q.net, struct net, nf_frag.frags); | ||
1913 | |||
1914 | - ip6_expire_frag_queue(net, fq); | ||
1915 | + ip6frag_expire_frag_queue(net, fq); | ||
1916 | } | ||
1917 | |||
1918 | /* Creation primitives. */ | ||
1919 | @@ -185,9 +179,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, | ||
1920 | static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, | ||
1921 | const struct frag_hdr *fhdr, int nhoff) | ||
1922 | { | ||
1923 | - struct sk_buff *prev, *next; | ||
1924 | unsigned int payload_len; | ||
1925 | - int offset, end; | ||
1926 | + struct net_device *dev; | ||
1927 | + struct sk_buff *prev; | ||
1928 | + int offset, end, err; | ||
1929 | u8 ecn; | ||
1930 | |||
1931 | if (fq->q.flags & INET_FRAG_COMPLETE) { | ||
1932 | @@ -262,55 +257,19 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, | ||
1933 | goto err; | ||
1934 | } | ||
1935 | |||
1936 | - /* Find out which fragments are in front and at the back of us | ||
1937 | - * in the chain of fragments so far. We must know where to put | ||
1938 | - * this fragment, right? | ||
1939 | - */ | ||
1940 | + /* Note : skb->rbnode and skb->dev share the same location. */ | ||
1941 | + dev = skb->dev; | ||
1942 | + /* Makes sure compiler wont do silly aliasing games */ | ||
1943 | + barrier(); | ||
1944 | + | ||
1945 | prev = fq->q.fragments_tail; | ||
1946 | - if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) { | ||
1947 | - next = NULL; | ||
1948 | - goto found; | ||
1949 | - } | ||
1950 | - prev = NULL; | ||
1951 | - for (next = fq->q.fragments; next != NULL; next = next->next) { | ||
1952 | - if (NFCT_FRAG6_CB(next)->offset >= offset) | ||
1953 | - break; /* bingo! */ | ||
1954 | - prev = next; | ||
1955 | - } | ||
1956 | + err = inet_frag_queue_insert(&fq->q, skb, offset, end); | ||
1957 | + if (err) | ||
1958 | + goto insert_error; | ||
1959 | |||
1960 | -found: | ||
1961 | - /* RFC5722, Section 4: | ||
1962 | - * When reassembling an IPv6 datagram, if | ||
1963 | - * one or more its constituent fragments is determined to be an | ||
1964 | - * overlapping fragment, the entire datagram (and any constituent | ||
1965 | - * fragments, including those not yet received) MUST be silently | ||
1966 | - * discarded. | ||
1967 | - */ | ||
1968 | + if (dev) | ||
1969 | + fq->iif = dev->ifindex; | ||
1970 | |||
1971 | - /* Check for overlap with preceding fragment. */ | ||
1972 | - if (prev && | ||
1973 | - (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset) | ||
1974 | - goto discard_fq; | ||
1975 | - | ||
1976 | - /* Look for overlap with succeeding segment. */ | ||
1977 | - if (next && NFCT_FRAG6_CB(next)->offset < end) | ||
1978 | - goto discard_fq; | ||
1979 | - | ||
1980 | - NFCT_FRAG6_CB(skb)->offset = offset; | ||
1981 | - | ||
1982 | - /* Insert this fragment in the chain of fragments. */ | ||
1983 | - skb->next = next; | ||
1984 | - if (!next) | ||
1985 | - fq->q.fragments_tail = skb; | ||
1986 | - if (prev) | ||
1987 | - prev->next = skb; | ||
1988 | - else | ||
1989 | - fq->q.fragments = skb; | ||
1990 | - | ||
1991 | - if (skb->dev) { | ||
1992 | - fq->iif = skb->dev->ifindex; | ||
1993 | - skb->dev = NULL; | ||
1994 | - } | ||
1995 | fq->q.stamp = skb->tstamp; | ||
1996 | fq->q.meat += skb->len; | ||
1997 | fq->ecn |= ecn; | ||
1998 | @@ -326,11 +285,25 @@ found: | ||
1999 | fq->q.flags |= INET_FRAG_FIRST_IN; | ||
2000 | } | ||
2001 | |||
2002 | - return 0; | ||
2003 | + if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && | ||
2004 | + fq->q.meat == fq->q.len) { | ||
2005 | + unsigned long orefdst = skb->_skb_refdst; | ||
2006 | |||
2007 | -discard_fq: | ||
2008 | + skb->_skb_refdst = 0UL; | ||
2009 | + err = nf_ct_frag6_reasm(fq, skb, prev, dev); | ||
2010 | + skb->_skb_refdst = orefdst; | ||
2011 | + return err; | ||
2012 | + } | ||
2013 | + | ||
2014 | + skb_dst_drop(skb); | ||
2015 | + return -EINPROGRESS; | ||
2016 | + | ||
2017 | +insert_error: | ||
2018 | + if (err == IPFRAG_DUP) | ||
2019 | + goto err; | ||
2020 | inet_frag_kill(&fq->q); | ||
2021 | err: | ||
2022 | + skb_dst_drop(skb); | ||
2023 | return -EINVAL; | ||
2024 | } | ||
2025 | |||
2026 | @@ -340,141 +313,67 @@ err: | ||
2027 | * It is called with locked fq, and caller must check that | ||
2028 | * queue is eligible for reassembly i.e. it is not COMPLETE, | ||
2029 | * the last and the first frames arrived and all the bits are here. | ||
2030 | - * | ||
2031 | - * returns true if *prev skb has been transformed into the reassembled | ||
2032 | - * skb, false otherwise. | ||
2033 | */ | ||
2034 | -static bool | ||
2035 | -nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) | ||
2036 | +static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, | ||
2037 | + struct sk_buff *prev_tail, struct net_device *dev) | ||
2038 | { | ||
2039 | - struct sk_buff *fp, *head = fq->q.fragments; | ||
2040 | - int payload_len; | ||
2041 | + void *reasm_data; | ||
2042 | + int payload_len; | ||
2043 | u8 ecn; | ||
2044 | |||
2045 | inet_frag_kill(&fq->q); | ||
2046 | |||
2047 | - WARN_ON(head == NULL); | ||
2048 | - WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); | ||
2049 | - | ||
2050 | ecn = ip_frag_ecn_table[fq->ecn]; | ||
2051 | if (unlikely(ecn == 0xff)) | ||
2052 | - return false; | ||
2053 | + goto err; | ||
2054 | |||
2055 | - /* Unfragmented part is taken from the first segment. */ | ||
2056 | - payload_len = ((head->data - skb_network_header(head)) - | ||
2057 | + reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); | ||
2058 | + if (!reasm_data) | ||
2059 | + goto err; | ||
2060 | + | ||
2061 | + payload_len = ((skb->data - skb_network_header(skb)) - | ||
2062 | sizeof(struct ipv6hdr) + fq->q.len - | ||
2063 | sizeof(struct frag_hdr)); | ||
2064 | if (payload_len > IPV6_MAXPLEN) { | ||
2065 | net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", | ||
2066 | payload_len); | ||
2067 | - return false; | ||
2068 | - } | ||
2069 | - | ||
2070 | - /* Head of list must not be cloned. */ | ||
2071 | - if (skb_unclone(head, GFP_ATOMIC)) | ||
2072 | - return false; | ||
2073 | - | ||
2074 | - /* If the first fragment is fragmented itself, we split | ||
2075 | - * it to two chunks: the first with data and paged part | ||
2076 | - * and the second, holding only fragments. */ | ||
2077 | - if (skb_has_frag_list(head)) { | ||
2078 | - struct sk_buff *clone; | ||
2079 | - int i, plen = 0; | ||
2080 | - | ||
2081 | - clone = alloc_skb(0, GFP_ATOMIC); | ||
2082 | - if (clone == NULL) | ||
2083 | - return false; | ||
2084 | - | ||
2085 | - clone->next = head->next; | ||
2086 | - head->next = clone; | ||
2087 | - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; | ||
2088 | - skb_frag_list_init(head); | ||
2089 | - for (i = 0; i < skb_shinfo(head)->nr_frags; i++) | ||
2090 | - plen += skb_frag_size(&skb_shinfo(head)->frags[i]); | ||
2091 | - clone->len = clone->data_len = head->data_len - plen; | ||
2092 | - head->data_len -= clone->len; | ||
2093 | - head->len -= clone->len; | ||
2094 | - clone->csum = 0; | ||
2095 | - clone->ip_summed = head->ip_summed; | ||
2096 | - | ||
2097 | - add_frag_mem_limit(fq->q.net, clone->truesize); | ||
2098 | - } | ||
2099 | - | ||
2100 | - /* morph head into last received skb: prev. | ||
2101 | - * | ||
2102 | - * This allows callers of ipv6 conntrack defrag to continue | ||
2103 | - * to use the last skb(frag) passed into the reasm engine. | ||
2104 | - * The last skb frag 'silently' turns into the full reassembled skb. | ||
2105 | - * | ||
2106 | - * Since prev is also part of q->fragments we have to clone it first. | ||
2107 | - */ | ||
2108 | - if (head != prev) { | ||
2109 | - struct sk_buff *iter; | ||
2110 | - | ||
2111 | - fp = skb_clone(prev, GFP_ATOMIC); | ||
2112 | - if (!fp) | ||
2113 | - return false; | ||
2114 | - | ||
2115 | - fp->next = prev->next; | ||
2116 | - | ||
2117 | - iter = head; | ||
2118 | - while (iter) { | ||
2119 | - if (iter->next == prev) { | ||
2120 | - iter->next = fp; | ||
2121 | - break; | ||
2122 | - } | ||
2123 | - iter = iter->next; | ||
2124 | - } | ||
2125 | - | ||
2126 | - skb_morph(prev, head); | ||
2127 | - prev->next = head->next; | ||
2128 | - consume_skb(head); | ||
2129 | - head = prev; | ||
2130 | + goto err; | ||
2131 | } | ||
2132 | |||
2133 | /* We have to remove fragment header from datagram and to relocate | ||
2134 | * header in order to calculate ICV correctly. */ | ||
2135 | - skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0]; | ||
2136 | - memmove(head->head + sizeof(struct frag_hdr), head->head, | ||
2137 | - (head->data - head->head) - sizeof(struct frag_hdr)); | ||
2138 | - head->mac_header += sizeof(struct frag_hdr); | ||
2139 | - head->network_header += sizeof(struct frag_hdr); | ||
2140 | - | ||
2141 | - skb_shinfo(head)->frag_list = head->next; | ||
2142 | - skb_reset_transport_header(head); | ||
2143 | - skb_push(head, head->data - skb_network_header(head)); | ||
2144 | - | ||
2145 | - for (fp = head->next; fp; fp = fp->next) { | ||
2146 | - head->data_len += fp->len; | ||
2147 | - head->len += fp->len; | ||
2148 | - if (head->ip_summed != fp->ip_summed) | ||
2149 | - head->ip_summed = CHECKSUM_NONE; | ||
2150 | - else if (head->ip_summed == CHECKSUM_COMPLETE) | ||
2151 | - head->csum = csum_add(head->csum, fp->csum); | ||
2152 | - head->truesize += fp->truesize; | ||
2153 | - fp->sk = NULL; | ||
2154 | - } | ||
2155 | - sub_frag_mem_limit(fq->q.net, head->truesize); | ||
2156 | + skb_network_header(skb)[fq->nhoffset] = skb_transport_header(skb)[0]; | ||
2157 | + memmove(skb->head + sizeof(struct frag_hdr), skb->head, | ||
2158 | + (skb->data - skb->head) - sizeof(struct frag_hdr)); | ||
2159 | + skb->mac_header += sizeof(struct frag_hdr); | ||
2160 | + skb->network_header += sizeof(struct frag_hdr); | ||
2161 | + | ||
2162 | + skb_reset_transport_header(skb); | ||
2163 | |||
2164 | - head->ignore_df = 1; | ||
2165 | - head->next = NULL; | ||
2166 | - head->dev = dev; | ||
2167 | - head->tstamp = fq->q.stamp; | ||
2168 | - ipv6_hdr(head)->payload_len = htons(payload_len); | ||
2169 | - ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); | ||
2170 | - IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; | ||
2171 | + inet_frag_reasm_finish(&fq->q, skb, reasm_data); | ||
2172 | + | ||
2173 | + skb->ignore_df = 1; | ||
2174 | + skb->dev = dev; | ||
2175 | + ipv6_hdr(skb)->payload_len = htons(payload_len); | ||
2176 | + ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn); | ||
2177 | + IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; | ||
2178 | |||
2179 | /* Yes, and fold redundant checksum back. 8) */ | ||
2180 | - if (head->ip_summed == CHECKSUM_COMPLETE) | ||
2181 | - head->csum = csum_partial(skb_network_header(head), | ||
2182 | - skb_network_header_len(head), | ||
2183 | - head->csum); | ||
2184 | + if (skb->ip_summed == CHECKSUM_COMPLETE) | ||
2185 | + skb->csum = csum_partial(skb_network_header(skb), | ||
2186 | + skb_network_header_len(skb), | ||
2187 | + skb->csum); | ||
2188 | |||
2189 | fq->q.fragments = NULL; | ||
2190 | fq->q.rb_fragments = RB_ROOT; | ||
2191 | fq->q.fragments_tail = NULL; | ||
2192 | + fq->q.last_run_head = NULL; | ||
2193 | + | ||
2194 | + return 0; | ||
2195 | |||
2196 | - return true; | ||
2197 | +err: | ||
2198 | + inet_frag_kill(&fq->q); | ||
2199 | + return -EINVAL; | ||
2200 | } | ||
2201 | |||
2202 | /* | ||
2203 | @@ -543,7 +442,6 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) | ||
2204 | int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) | ||
2205 | { | ||
2206 | u16 savethdr = skb->transport_header; | ||
2207 | - struct net_device *dev = skb->dev; | ||
2208 | int fhoff, nhoff, ret; | ||
2209 | struct frag_hdr *fhdr; | ||
2210 | struct frag_queue *fq; | ||
2211 | @@ -566,10 +464,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) | ||
2212 | hdr = ipv6_hdr(skb); | ||
2213 | fhdr = (struct frag_hdr *)skb_transport_header(skb); | ||
2214 | |||
2215 | - if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU && | ||
2216 | - fhdr->frag_off & htons(IP6_MF)) | ||
2217 | - return -EINVAL; | ||
2218 | - | ||
2219 | skb_orphan(skb); | ||
2220 | fq = fq_find(net, fhdr->identification, user, hdr, | ||
2221 | skb->dev ? skb->dev->ifindex : 0); | ||
2222 | @@ -581,24 +475,17 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) | ||
2223 | spin_lock_bh(&fq->q.lock); | ||
2224 | |||
2225 | ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff); | ||
2226 | - if (ret < 0) { | ||
2227 | - if (ret == -EPROTO) { | ||
2228 | - skb->transport_header = savethdr; | ||
2229 | - ret = 0; | ||
2230 | - } | ||
2231 | - goto out_unlock; | ||
2232 | + if (ret == -EPROTO) { | ||
2233 | + skb->transport_header = savethdr; | ||
2234 | + ret = 0; | ||
2235 | } | ||
2236 | |||
2237 | /* after queue has assumed skb ownership, only 0 or -EINPROGRESS | ||
2238 | * must be returned. | ||
2239 | */ | ||
2240 | - ret = -EINPROGRESS; | ||
2241 | - if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && | ||
2242 | - fq->q.meat == fq->q.len && | ||
2243 | - nf_ct_frag6_reasm(fq, skb, dev)) | ||
2244 | - ret = 0; | ||
2245 | + if (ret) | ||
2246 | + ret = -EINPROGRESS; | ||
2247 | |||
2248 | -out_unlock: | ||
2249 | spin_unlock_bh(&fq->q.lock); | ||
2250 | inet_frag_put(&fq->q); | ||
2251 | return ret; | ||
2252 | @@ -634,16 +521,24 @@ static struct pernet_operations nf_ct_net_ops = { | ||
2253 | .exit = nf_ct_net_exit, | ||
2254 | }; | ||
2255 | |||
2256 | +static const struct rhashtable_params nfct_rhash_params = { | ||
2257 | + .head_offset = offsetof(struct inet_frag_queue, node), | ||
2258 | + .hashfn = ip6frag_key_hashfn, | ||
2259 | + .obj_hashfn = ip6frag_obj_hashfn, | ||
2260 | + .obj_cmpfn = ip6frag_obj_cmpfn, | ||
2261 | + .automatic_shrinking = true, | ||
2262 | +}; | ||
2263 | + | ||
2264 | int nf_ct_frag6_init(void) | ||
2265 | { | ||
2266 | int ret = 0; | ||
2267 | |||
2268 | - nf_frags.constructor = ip6_frag_init; | ||
2269 | + nf_frags.constructor = ip6frag_init; | ||
2270 | nf_frags.destructor = NULL; | ||
2271 | nf_frags.qsize = sizeof(struct frag_queue); | ||
2272 | nf_frags.frag_expire = nf_ct_frag6_expire; | ||
2273 | nf_frags.frags_cache_name = nf_frags_cache_name; | ||
2274 | - nf_frags.rhash_params = ip6_rhash_params; | ||
2275 | + nf_frags.rhash_params = nfct_rhash_params; | ||
2276 | ret = inet_frags_init(&nf_frags); | ||
2277 | if (ret) | ||
2278 | goto out; | ||
2279 | diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | ||
2280 | index f06b0471f39f..c4070e9c4260 100644 | ||
2281 | --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | ||
2282 | +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | ||
2283 | @@ -14,8 +14,7 @@ | ||
2284 | #include <linux/skbuff.h> | ||
2285 | #include <linux/icmp.h> | ||
2286 | #include <linux/sysctl.h> | ||
2287 | -#include <net/ipv6.h> | ||
2288 | -#include <net/inet_frag.h> | ||
2289 | +#include <net/ipv6_frag.h> | ||
2290 | |||
2291 | #include <linux/netfilter_ipv6.h> | ||
2292 | #include <linux/netfilter_bridge.h> | ||
2293 | diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c | ||
2294 | index 74ffbcb306a6..4aed9c45a91a 100644 | ||
2295 | --- a/net/ipv6/reassembly.c | ||
2296 | +++ b/net/ipv6/reassembly.c | ||
2297 | @@ -57,18 +57,11 @@ | ||
2298 | #include <net/rawv6.h> | ||
2299 | #include <net/ndisc.h> | ||
2300 | #include <net/addrconf.h> | ||
2301 | -#include <net/inet_frag.h> | ||
2302 | +#include <net/ipv6_frag.h> | ||
2303 | #include <net/inet_ecn.h> | ||
2304 | |||
2305 | static const char ip6_frag_cache_name[] = "ip6-frags"; | ||
2306 | |||
2307 | -struct ip6frag_skb_cb { | ||
2308 | - struct inet6_skb_parm h; | ||
2309 | - int offset; | ||
2310 | -}; | ||
2311 | - | ||
2312 | -#define FRAG6_CB(skb) ((struct ip6frag_skb_cb *)((skb)->cb)) | ||
2313 | - | ||
2314 | static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) | ||
2315 | { | ||
2316 | return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); | ||
2317 | @@ -76,63 +69,8 @@ static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) | ||
2318 | |||
2319 | static struct inet_frags ip6_frags; | ||
2320 | |||
2321 | -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, | ||
2322 | - struct net_device *dev); | ||
2323 | - | ||
2324 | -void ip6_frag_init(struct inet_frag_queue *q, const void *a) | ||
2325 | -{ | ||
2326 | - struct frag_queue *fq = container_of(q, struct frag_queue, q); | ||
2327 | - const struct frag_v6_compare_key *key = a; | ||
2328 | - | ||
2329 | - q->key.v6 = *key; | ||
2330 | - fq->ecn = 0; | ||
2331 | -} | ||
2332 | -EXPORT_SYMBOL(ip6_frag_init); | ||
2333 | - | ||
2334 | -void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq) | ||
2335 | -{ | ||
2336 | - struct net_device *dev = NULL; | ||
2337 | - struct sk_buff *head; | ||
2338 | - | ||
2339 | - rcu_read_lock(); | ||
2340 | - spin_lock(&fq->q.lock); | ||
2341 | - | ||
2342 | - if (fq->q.flags & INET_FRAG_COMPLETE) | ||
2343 | - goto out; | ||
2344 | - | ||
2345 | - inet_frag_kill(&fq->q); | ||
2346 | - | ||
2347 | - dev = dev_get_by_index_rcu(net, fq->iif); | ||
2348 | - if (!dev) | ||
2349 | - goto out; | ||
2350 | - | ||
2351 | - __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); | ||
2352 | - __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); | ||
2353 | - | ||
2354 | - /* Don't send error if the first segment did not arrive. */ | ||
2355 | - head = fq->q.fragments; | ||
2356 | - if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head) | ||
2357 | - goto out; | ||
2358 | - | ||
2359 | - /* But use as source device on which LAST ARRIVED | ||
2360 | - * segment was received. And do not use fq->dev | ||
2361 | - * pointer directly, device might already disappeared. | ||
2362 | - */ | ||
2363 | - head->dev = dev; | ||
2364 | - skb_get(head); | ||
2365 | - spin_unlock(&fq->q.lock); | ||
2366 | - | ||
2367 | - icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); | ||
2368 | - kfree_skb(head); | ||
2369 | - goto out_rcu_unlock; | ||
2370 | - | ||
2371 | -out: | ||
2372 | - spin_unlock(&fq->q.lock); | ||
2373 | -out_rcu_unlock: | ||
2374 | - rcu_read_unlock(); | ||
2375 | - inet_frag_put(&fq->q); | ||
2376 | -} | ||
2377 | -EXPORT_SYMBOL(ip6_expire_frag_queue); | ||
2378 | +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, | ||
2379 | + struct sk_buff *prev_tail, struct net_device *dev); | ||
2380 | |||
2381 | static void ip6_frag_expire(unsigned long data) | ||
2382 | { | ||
2383 | @@ -142,7 +80,7 @@ static void ip6_frag_expire(unsigned long data) | ||
2384 | fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); | ||
2385 | net = container_of(fq->q.net, struct net, ipv6.frags); | ||
2386 | |||
2387 | - ip6_expire_frag_queue(net, fq); | ||
2388 | + ip6frag_expire_frag_queue(net, fq); | ||
2389 | } | ||
2390 | |||
2391 | static struct frag_queue * | ||
2392 | @@ -169,27 +107,29 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif) | ||
2393 | } | ||
2394 | |||
2395 | static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, | ||
2396 | - struct frag_hdr *fhdr, int nhoff) | ||
2397 | + struct frag_hdr *fhdr, int nhoff, | ||
2398 | + u32 *prob_offset) | ||
2399 | { | ||
2400 | - struct sk_buff *prev, *next; | ||
2401 | - struct net_device *dev; | ||
2402 | - int offset, end; | ||
2403 | struct net *net = dev_net(skb_dst(skb)->dev); | ||
2404 | + int offset, end, fragsize; | ||
2405 | + struct sk_buff *prev_tail; | ||
2406 | + struct net_device *dev; | ||
2407 | + int err = -ENOENT; | ||
2408 | u8 ecn; | ||
2409 | |||
2410 | if (fq->q.flags & INET_FRAG_COMPLETE) | ||
2411 | goto err; | ||
2412 | |||
2413 | + err = -EINVAL; | ||
2414 | offset = ntohs(fhdr->frag_off) & ~0x7; | ||
2415 | end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - | ||
2416 | ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); | ||
2417 | |||
2418 | if ((unsigned int)end > IPV6_MAXPLEN) { | ||
2419 | - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), | ||
2420 | - IPSTATS_MIB_INHDRERRORS); | ||
2421 | - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, | ||
2422 | - ((u8 *)&fhdr->frag_off - | ||
2423 | - skb_network_header(skb))); | ||
2424 | + *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb); | ||
2425 | + /* note that if prob_offset is set, the skb is freed elsewhere, | ||
2426 | + * we do not free it here. | ||
2427 | + */ | ||
2428 | return -1; | ||
2429 | } | ||
2430 | |||
2431 | @@ -209,7 +149,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, | ||
2432 | */ | ||
2433 | if (end < fq->q.len || | ||
2434 | ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) | ||
2435 | - goto err; | ||
2436 | + goto discard_fq; | ||
2437 | fq->q.flags |= INET_FRAG_LAST_IN; | ||
2438 | fq->q.len = end; | ||
2439 | } else { | ||
2440 | @@ -220,84 +160,51 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, | ||
2441 | /* RFC2460 says always send parameter problem in | ||
2442 | * this case. -DaveM | ||
2443 | */ | ||
2444 | - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), | ||
2445 | - IPSTATS_MIB_INHDRERRORS); | ||
2446 | - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, | ||
2447 | - offsetof(struct ipv6hdr, payload_len)); | ||
2448 | + *prob_offset = offsetof(struct ipv6hdr, payload_len); | ||
2449 | return -1; | ||
2450 | } | ||
2451 | if (end > fq->q.len) { | ||
2452 | /* Some bits beyond end -> corruption. */ | ||
2453 | if (fq->q.flags & INET_FRAG_LAST_IN) | ||
2454 | - goto err; | ||
2455 | + goto discard_fq; | ||
2456 | fq->q.len = end; | ||
2457 | } | ||
2458 | } | ||
2459 | |||
2460 | if (end == offset) | ||
2461 | - goto err; | ||
2462 | + goto discard_fq; | ||
2463 | |||
2464 | + err = -ENOMEM; | ||
2465 | /* Point into the IP datagram 'data' part. */ | ||
2466 | if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) | ||
2467 | - goto err; | ||
2468 | - | ||
2469 | - if (pskb_trim_rcsum(skb, end - offset)) | ||
2470 | - goto err; | ||
2471 | - | ||
2472 | - /* Find out which fragments are in front and at the back of us | ||
2473 | - * in the chain of fragments so far. We must know where to put | ||
2474 | - * this fragment, right? | ||
2475 | - */ | ||
2476 | - prev = fq->q.fragments_tail; | ||
2477 | - if (!prev || FRAG6_CB(prev)->offset < offset) { | ||
2478 | - next = NULL; | ||
2479 | - goto found; | ||
2480 | - } | ||
2481 | - prev = NULL; | ||
2482 | - for (next = fq->q.fragments; next != NULL; next = next->next) { | ||
2483 | - if (FRAG6_CB(next)->offset >= offset) | ||
2484 | - break; /* bingo! */ | ||
2485 | - prev = next; | ||
2486 | - } | ||
2487 | - | ||
2488 | -found: | ||
2489 | - /* RFC5722, Section 4, amended by Errata ID : 3089 | ||
2490 | - * When reassembling an IPv6 datagram, if | ||
2491 | - * one or more its constituent fragments is determined to be an | ||
2492 | - * overlapping fragment, the entire datagram (and any constituent | ||
2493 | - * fragments) MUST be silently discarded. | ||
2494 | - */ | ||
2495 | - | ||
2496 | - /* Check for overlap with preceding fragment. */ | ||
2497 | - if (prev && | ||
2498 | - (FRAG6_CB(prev)->offset + prev->len) > offset) | ||
2499 | goto discard_fq; | ||
2500 | |||
2501 | - /* Look for overlap with succeeding segment. */ | ||
2502 | - if (next && FRAG6_CB(next)->offset < end) | ||
2503 | + err = pskb_trim_rcsum(skb, end - offset); | ||
2504 | + if (err) | ||
2505 | goto discard_fq; | ||
2506 | |||
2507 | - FRAG6_CB(skb)->offset = offset; | ||
2508 | + /* Note : skb->rbnode and skb->dev share the same location. */ | ||
2509 | + dev = skb->dev; | ||
2510 | + /* Makes sure compiler wont do silly aliasing games */ | ||
2511 | + barrier(); | ||
2512 | |||
2513 | - /* Insert this fragment in the chain of fragments. */ | ||
2514 | - skb->next = next; | ||
2515 | - if (!next) | ||
2516 | - fq->q.fragments_tail = skb; | ||
2517 | - if (prev) | ||
2518 | - prev->next = skb; | ||
2519 | - else | ||
2520 | - fq->q.fragments = skb; | ||
2521 | + prev_tail = fq->q.fragments_tail; | ||
2522 | + err = inet_frag_queue_insert(&fq->q, skb, offset, end); | ||
2523 | + if (err) | ||
2524 | + goto insert_error; | ||
2525 | |||
2526 | - dev = skb->dev; | ||
2527 | - if (dev) { | ||
2528 | + if (dev) | ||
2529 | fq->iif = dev->ifindex; | ||
2530 | - skb->dev = NULL; | ||
2531 | - } | ||
2532 | + | ||
2533 | fq->q.stamp = skb->tstamp; | ||
2534 | fq->q.meat += skb->len; | ||
2535 | fq->ecn |= ecn; | ||
2536 | add_frag_mem_limit(fq->q.net, skb->truesize); | ||
2537 | |||
2538 | + fragsize = -skb_network_offset(skb) + skb->len; | ||
2539 | + if (fragsize > fq->q.max_size) | ||
2540 | + fq->q.max_size = fragsize; | ||
2541 | + | ||
2542 | /* The first fragment. | ||
2543 | * nhoffset is obtained from the first fragment, of course. | ||
2544 | */ | ||
2545 | @@ -308,44 +215,48 @@ found: | ||
2546 | |||
2547 | if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && | ||
2548 | fq->q.meat == fq->q.len) { | ||
2549 | - int res; | ||
2550 | unsigned long orefdst = skb->_skb_refdst; | ||
2551 | |||
2552 | skb->_skb_refdst = 0UL; | ||
2553 | - res = ip6_frag_reasm(fq, prev, dev); | ||
2554 | + err = ip6_frag_reasm(fq, skb, prev_tail, dev); | ||
2555 | skb->_skb_refdst = orefdst; | ||
2556 | - return res; | ||
2557 | + return err; | ||
2558 | } | ||
2559 | |||
2560 | skb_dst_drop(skb); | ||
2561 | - return -1; | ||
2562 | + return -EINPROGRESS; | ||
2563 | |||
2564 | +insert_error: | ||
2565 | + if (err == IPFRAG_DUP) { | ||
2566 | + kfree_skb(skb); | ||
2567 | + return -EINVAL; | ||
2568 | + } | ||
2569 | + err = -EINVAL; | ||
2570 | + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), | ||
2571 | + IPSTATS_MIB_REASM_OVERLAPS); | ||
2572 | discard_fq: | ||
2573 | inet_frag_kill(&fq->q); | ||
2574 | -err: | ||
2575 | __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), | ||
2576 | IPSTATS_MIB_REASMFAILS); | ||
2577 | +err: | ||
2578 | kfree_skb(skb); | ||
2579 | - return -1; | ||
2580 | + return err; | ||
2581 | } | ||
2582 | |||
2583 | /* | ||
2584 | * Check if this packet is complete. | ||
2585 | - * Returns NULL on failure by any reason, and pointer | ||
2586 | - * to current nexthdr field in reassembled frame. | ||
2587 | * | ||
2588 | * It is called with locked fq, and caller must check that | ||
2589 | * queue is eligible for reassembly i.e. it is not COMPLETE, | ||
2590 | * the last and the first frames arrived and all the bits are here. | ||
2591 | */ | ||
2592 | -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, | ||
2593 | - struct net_device *dev) | ||
2594 | +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, | ||
2595 | + struct sk_buff *prev_tail, struct net_device *dev) | ||
2596 | { | ||
2597 | struct net *net = container_of(fq->q.net, struct net, ipv6.frags); | ||
2598 | - struct sk_buff *fp, *head = fq->q.fragments; | ||
2599 | - int payload_len; | ||
2600 | unsigned int nhoff; | ||
2601 | - int sum_truesize; | ||
2602 | + void *reasm_data; | ||
2603 | + int payload_len; | ||
2604 | u8 ecn; | ||
2605 | |||
2606 | inet_frag_kill(&fq->q); | ||
2607 | @@ -354,113 +265,40 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, | ||
2608 | if (unlikely(ecn == 0xff)) | ||
2609 | goto out_fail; | ||
2610 | |||
2611 | - /* Make the one we just received the head. */ | ||
2612 | - if (prev) { | ||
2613 | - head = prev->next; | ||
2614 | - fp = skb_clone(head, GFP_ATOMIC); | ||
2615 | - | ||
2616 | - if (!fp) | ||
2617 | - goto out_oom; | ||
2618 | - | ||
2619 | - fp->next = head->next; | ||
2620 | - if (!fp->next) | ||
2621 | - fq->q.fragments_tail = fp; | ||
2622 | - prev->next = fp; | ||
2623 | - | ||
2624 | - skb_morph(head, fq->q.fragments); | ||
2625 | - head->next = fq->q.fragments->next; | ||
2626 | - | ||
2627 | - consume_skb(fq->q.fragments); | ||
2628 | - fq->q.fragments = head; | ||
2629 | - } | ||
2630 | - | ||
2631 | - WARN_ON(head == NULL); | ||
2632 | - WARN_ON(FRAG6_CB(head)->offset != 0); | ||
2633 | + reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); | ||
2634 | + if (!reasm_data) | ||
2635 | + goto out_oom; | ||
2636 | |||
2637 | - /* Unfragmented part is taken from the first segment. */ | ||
2638 | - payload_len = ((head->data - skb_network_header(head)) - | ||
2639 | + payload_len = ((skb->data - skb_network_header(skb)) - | ||
2640 | sizeof(struct ipv6hdr) + fq->q.len - | ||
2641 | sizeof(struct frag_hdr)); | ||
2642 | if (payload_len > IPV6_MAXPLEN) | ||
2643 | goto out_oversize; | ||
2644 | |||
2645 | - /* Head of list must not be cloned. */ | ||
2646 | - if (skb_unclone(head, GFP_ATOMIC)) | ||
2647 | - goto out_oom; | ||
2648 | - | ||
2649 | - /* If the first fragment is fragmented itself, we split | ||
2650 | - * it to two chunks: the first with data and paged part | ||
2651 | - * and the second, holding only fragments. */ | ||
2652 | - if (skb_has_frag_list(head)) { | ||
2653 | - struct sk_buff *clone; | ||
2654 | - int i, plen = 0; | ||
2655 | - | ||
2656 | - clone = alloc_skb(0, GFP_ATOMIC); | ||
2657 | - if (!clone) | ||
2658 | - goto out_oom; | ||
2659 | - clone->next = head->next; | ||
2660 | - head->next = clone; | ||
2661 | - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; | ||
2662 | - skb_frag_list_init(head); | ||
2663 | - for (i = 0; i < skb_shinfo(head)->nr_frags; i++) | ||
2664 | - plen += skb_frag_size(&skb_shinfo(head)->frags[i]); | ||
2665 | - clone->len = clone->data_len = head->data_len - plen; | ||
2666 | - head->data_len -= clone->len; | ||
2667 | - head->len -= clone->len; | ||
2668 | - clone->csum = 0; | ||
2669 | - clone->ip_summed = head->ip_summed; | ||
2670 | - add_frag_mem_limit(fq->q.net, clone->truesize); | ||
2671 | - } | ||
2672 | - | ||
2673 | /* We have to remove fragment header from datagram and to relocate | ||
2674 | * header in order to calculate ICV correctly. */ | ||
2675 | nhoff = fq->nhoffset; | ||
2676 | - skb_network_header(head)[nhoff] = skb_transport_header(head)[0]; | ||
2677 | - memmove(head->head + sizeof(struct frag_hdr), head->head, | ||
2678 | - (head->data - head->head) - sizeof(struct frag_hdr)); | ||
2679 | - if (skb_mac_header_was_set(head)) | ||
2680 | - head->mac_header += sizeof(struct frag_hdr); | ||
2681 | - head->network_header += sizeof(struct frag_hdr); | ||
2682 | - | ||
2683 | - skb_reset_transport_header(head); | ||
2684 | - skb_push(head, head->data - skb_network_header(head)); | ||
2685 | - | ||
2686 | - sum_truesize = head->truesize; | ||
2687 | - for (fp = head->next; fp;) { | ||
2688 | - bool headstolen; | ||
2689 | - int delta; | ||
2690 | - struct sk_buff *next = fp->next; | ||
2691 | - | ||
2692 | - sum_truesize += fp->truesize; | ||
2693 | - if (head->ip_summed != fp->ip_summed) | ||
2694 | - head->ip_summed = CHECKSUM_NONE; | ||
2695 | - else if (head->ip_summed == CHECKSUM_COMPLETE) | ||
2696 | - head->csum = csum_add(head->csum, fp->csum); | ||
2697 | - | ||
2698 | - if (skb_try_coalesce(head, fp, &headstolen, &delta)) { | ||
2699 | - kfree_skb_partial(fp, headstolen); | ||
2700 | - } else { | ||
2701 | - if (!skb_shinfo(head)->frag_list) | ||
2702 | - skb_shinfo(head)->frag_list = fp; | ||
2703 | - head->data_len += fp->len; | ||
2704 | - head->len += fp->len; | ||
2705 | - head->truesize += fp->truesize; | ||
2706 | - } | ||
2707 | - fp = next; | ||
2708 | - } | ||
2709 | - sub_frag_mem_limit(fq->q.net, sum_truesize); | ||
2710 | + skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0]; | ||
2711 | + memmove(skb->head + sizeof(struct frag_hdr), skb->head, | ||
2712 | + (skb->data - skb->head) - sizeof(struct frag_hdr)); | ||
2713 | + if (skb_mac_header_was_set(skb)) | ||
2714 | + skb->mac_header += sizeof(struct frag_hdr); | ||
2715 | + skb->network_header += sizeof(struct frag_hdr); | ||
2716 | |||
2717 | - head->next = NULL; | ||
2718 | - head->dev = dev; | ||
2719 | - head->tstamp = fq->q.stamp; | ||
2720 | - ipv6_hdr(head)->payload_len = htons(payload_len); | ||
2721 | - ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); | ||
2722 | - IP6CB(head)->nhoff = nhoff; | ||
2723 | - IP6CB(head)->flags |= IP6SKB_FRAGMENTED; | ||
2724 | + skb_reset_transport_header(skb); | ||
2725 | + | ||
2726 | + inet_frag_reasm_finish(&fq->q, skb, reasm_data); | ||
2727 | + | ||
2728 | + skb->dev = dev; | ||
2729 | + ipv6_hdr(skb)->payload_len = htons(payload_len); | ||
2730 | + ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn); | ||
2731 | + IP6CB(skb)->nhoff = nhoff; | ||
2732 | + IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; | ||
2733 | + IP6CB(skb)->frag_max_size = fq->q.max_size; | ||
2734 | |||
2735 | /* Yes, and fold redundant checksum back. 8) */ | ||
2736 | - skb_postpush_rcsum(head, skb_network_header(head), | ||
2737 | - skb_network_header_len(head)); | ||
2738 | + skb_postpush_rcsum(skb, skb_network_header(skb), | ||
2739 | + skb_network_header_len(skb)); | ||
2740 | |||
2741 | rcu_read_lock(); | ||
2742 | __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); | ||
2743 | @@ -468,6 +306,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, | ||
2744 | fq->q.fragments = NULL; | ||
2745 | fq->q.rb_fragments = RB_ROOT; | ||
2746 | fq->q.fragments_tail = NULL; | ||
2747 | + fq->q.last_run_head = NULL; | ||
2748 | return 1; | ||
2749 | |||
2750 | out_oversize: | ||
2751 | @@ -479,6 +318,7 @@ out_fail: | ||
2752 | rcu_read_lock(); | ||
2753 | __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); | ||
2754 | rcu_read_unlock(); | ||
2755 | + inet_frag_kill(&fq->q); | ||
2756 | return -1; | ||
2757 | } | ||
2758 | |||
2759 | @@ -517,22 +357,26 @@ static int ipv6_frag_rcv(struct sk_buff *skb) | ||
2760 | return 1; | ||
2761 | } | ||
2762 | |||
2763 | - if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU && | ||
2764 | - fhdr->frag_off & htons(IP6_MF)) | ||
2765 | - goto fail_hdr; | ||
2766 | - | ||
2767 | iif = skb->dev ? skb->dev->ifindex : 0; | ||
2768 | fq = fq_find(net, fhdr->identification, hdr, iif); | ||
2769 | if (fq) { | ||
2770 | + u32 prob_offset = 0; | ||
2771 | int ret; | ||
2772 | |||
2773 | spin_lock(&fq->q.lock); | ||
2774 | |||
2775 | fq->iif = iif; | ||
2776 | - ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); | ||
2777 | + ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff, | ||
2778 | + &prob_offset); | ||
2779 | |||
2780 | spin_unlock(&fq->q.lock); | ||
2781 | inet_frag_put(&fq->q); | ||
2782 | + if (prob_offset) { | ||
2783 | + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), | ||
2784 | + IPSTATS_MIB_INHDRERRORS); | ||
2785 | + /* icmpv6_param_prob() calls kfree_skb(skb) */ | ||
2786 | + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset); | ||
2787 | + } | ||
2788 | return ret; | ||
2789 | } | ||
2790 | |||
2791 | @@ -700,42 +544,19 @@ static struct pernet_operations ip6_frags_ops = { | ||
2792 | .exit = ipv6_frags_exit_net, | ||
2793 | }; | ||
2794 | |||
2795 | -static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed) | ||
2796 | -{ | ||
2797 | - return jhash2(data, | ||
2798 | - sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); | ||
2799 | -} | ||
2800 | - | ||
2801 | -static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed) | ||
2802 | -{ | ||
2803 | - const struct inet_frag_queue *fq = data; | ||
2804 | - | ||
2805 | - return jhash2((const u32 *)&fq->key.v6, | ||
2806 | - sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); | ||
2807 | -} | ||
2808 | - | ||
2809 | -static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) | ||
2810 | -{ | ||
2811 | - const struct frag_v6_compare_key *key = arg->key; | ||
2812 | - const struct inet_frag_queue *fq = ptr; | ||
2813 | - | ||
2814 | - return !!memcmp(&fq->key, key, sizeof(*key)); | ||
2815 | -} | ||
2816 | - | ||
2817 | -const struct rhashtable_params ip6_rhash_params = { | ||
2818 | +static const struct rhashtable_params ip6_rhash_params = { | ||
2819 | .head_offset = offsetof(struct inet_frag_queue, node), | ||
2820 | - .hashfn = ip6_key_hashfn, | ||
2821 | - .obj_hashfn = ip6_obj_hashfn, | ||
2822 | - .obj_cmpfn = ip6_obj_cmpfn, | ||
2823 | + .hashfn = ip6frag_key_hashfn, | ||
2824 | + .obj_hashfn = ip6frag_obj_hashfn, | ||
2825 | + .obj_cmpfn = ip6frag_obj_cmpfn, | ||
2826 | .automatic_shrinking = true, | ||
2827 | }; | ||
2828 | -EXPORT_SYMBOL(ip6_rhash_params); | ||
2829 | |||
2830 | int __init ipv6_frag_init(void) | ||
2831 | { | ||
2832 | int ret; | ||
2833 | |||
2834 | - ip6_frags.constructor = ip6_frag_init; | ||
2835 | + ip6_frags.constructor = ip6frag_init; | ||
2836 | ip6_frags.destructor = NULL; | ||
2837 | ip6_frags.qsize = sizeof(struct frag_queue); | ||
2838 | ip6_frags.frag_expire = ip6_frag_expire; | ||
2839 | diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c | ||
2840 | index f135814c34ad..02d6f38f7869 100644 | ||
2841 | --- a/net/openvswitch/conntrack.c | ||
2842 | +++ b/net/openvswitch/conntrack.c | ||
2843 | @@ -23,6 +23,7 @@ | ||
2844 | #include <net/netfilter/nf_conntrack_seqadj.h> | ||
2845 | #include <net/netfilter/nf_conntrack_zones.h> | ||
2846 | #include <net/netfilter/ipv6/nf_defrag_ipv6.h> | ||
2847 | +#include <net/ipv6_frag.h> | ||
2848 | |||
2849 | #ifdef CONFIG_NF_NAT_NEEDED | ||
2850 | #include <linux/netfilter/nf_nat.h> | ||
2851 | diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c | ||
2852 | index 4fe8f4fec4ee..da84d6b2f72c 100644 | ||
2853 | --- a/net/rds/ib_fmr.c | ||
2854 | +++ b/net/rds/ib_fmr.c | ||
2855 | @@ -44,6 +44,17 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages) | ||
2856 | else | ||
2857 | pool = rds_ibdev->mr_1m_pool; | ||
2858 | |||
2859 | + if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) | ||
2860 | + queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); | ||
2861 | + | ||
2862 | + /* Switch pools if one of the pool is reaching upper limit */ | ||
2863 | + if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) { | ||
2864 | + if (pool->pool_type == RDS_IB_MR_8K_POOL) | ||
2865 | + pool = rds_ibdev->mr_1m_pool; | ||
2866 | + else | ||
2867 | + pool = rds_ibdev->mr_8k_pool; | ||
2868 | + } | ||
2869 | + | ||
2870 | ibmr = rds_ib_try_reuse_ibmr(pool); | ||
2871 | if (ibmr) | ||
2872 | return ibmr; | ||
2873 | diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c | ||
2874 | index 977f69886c00..91b53d462fc0 100644 | ||
2875 | --- a/net/rds/ib_rdma.c | ||
2876 | +++ b/net/rds/ib_rdma.c | ||
2877 | @@ -442,9 +442,6 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool) | ||
2878 | struct rds_ib_mr *ibmr = NULL; | ||
2879 | int iter = 0; | ||
2880 | |||
2881 | - if (atomic_read(&pool->dirty_count) >= pool->max_items_soft / 10) | ||
2882 | - queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); | ||
2883 | - | ||
2884 | while (1) { | ||
2885 | ibmr = rds_ib_reuse_mr(pool); | ||
2886 | if (ibmr) | ||
2887 | diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c | ||
2888 | index cab50ece6f3d..cdcc0fea9f5a 100644 | ||
2889 | --- a/net/sunrpc/cache.c | ||
2890 | +++ b/net/sunrpc/cache.c | ||
2891 | @@ -54,6 +54,7 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail) | ||
2892 | h->last_refresh = now; | ||
2893 | } | ||
2894 | |||
2895 | +static inline int cache_is_valid(struct cache_head *h); | ||
2896 | static void cache_fresh_locked(struct cache_head *head, time_t expiry, | ||
2897 | struct cache_detail *detail); | ||
2898 | static void cache_fresh_unlocked(struct cache_head *head, | ||
2899 | @@ -100,6 +101,8 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, | ||
2900 | if (cache_is_expired(detail, tmp)) { | ||
2901 | hlist_del_init(&tmp->cache_list); | ||
2902 | detail->entries --; | ||
2903 | + if (cache_is_valid(tmp) == -EAGAIN) | ||
2904 | + set_bit(CACHE_NEGATIVE, &tmp->flags); | ||
2905 | cache_fresh_locked(tmp, 0, detail); | ||
2906 | freeme = tmp; | ||
2907 | break; | ||
2908 | diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c | ||
2909 | index d947b8210399..0cf9403b4c44 100644 | ||
2910 | --- a/net/tipc/netlink_compat.c | ||
2911 | +++ b/net/tipc/netlink_compat.c | ||
2912 | @@ -262,8 +262,14 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, | ||
2913 | if (msg->rep_type) | ||
2914 | tipc_tlv_init(msg->rep, msg->rep_type); | ||
2915 | |||
2916 | - if (cmd->header) | ||
2917 | - (*cmd->header)(msg); | ||
2918 | + if (cmd->header) { | ||
2919 | + err = (*cmd->header)(msg); | ||
2920 | + if (err) { | ||
2921 | + kfree_skb(msg->rep); | ||
2922 | + msg->rep = NULL; | ||
2923 | + return err; | ||
2924 | + } | ||
2925 | + } | ||
2926 | |||
2927 | arg = nlmsg_new(0, GFP_KERNEL); | ||
2928 | if (!arg) { | ||
2929 | @@ -388,7 +394,12 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, | ||
2930 | if (!bearer) | ||
2931 | return -EMSGSIZE; | ||
2932 | |||
2933 | - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); | ||
2934 | + len = TLV_GET_DATA_LEN(msg->req); | ||
2935 | + len -= offsetof(struct tipc_bearer_config, name); | ||
2936 | + if (len <= 0) | ||
2937 | + return -EINVAL; | ||
2938 | + | ||
2939 | + len = min_t(int, len, TIPC_MAX_BEARER_NAME); | ||
2940 | if (!string_is_valid(b->name, len)) | ||
2941 | return -EINVAL; | ||
2942 | |||
2943 | @@ -757,7 +768,12 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, | ||
2944 | |||
2945 | lc = (struct tipc_link_config *)TLV_DATA(msg->req); | ||
2946 | |||
2947 | - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); | ||
2948 | + len = TLV_GET_DATA_LEN(msg->req); | ||
2949 | + len -= offsetof(struct tipc_link_config, name); | ||
2950 | + if (len <= 0) | ||
2951 | + return -EINVAL; | ||
2952 | + | ||
2953 | + len = min_t(int, len, TIPC_MAX_LINK_NAME); | ||
2954 | if (!string_is_valid(lc->name, len)) | ||
2955 | return -EINVAL; | ||
2956 | |||
2957 | diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c | ||
2958 | index 9c07c76c504d..cc4b4abb2759 100644 | ||
2959 | --- a/net/vmw_vsock/virtio_transport_common.c | ||
2960 | +++ b/net/vmw_vsock/virtio_transport_common.c | ||
2961 | @@ -601,6 +601,8 @@ static int virtio_transport_reset(struct vsock_sock *vsk, | ||
2962 | */ | ||
2963 | static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt) | ||
2964 | { | ||
2965 | + const struct virtio_transport *t; | ||
2966 | + struct virtio_vsock_pkt *reply; | ||
2967 | struct virtio_vsock_pkt_info info = { | ||
2968 | .op = VIRTIO_VSOCK_OP_RST, | ||
2969 | .type = le16_to_cpu(pkt->hdr.type), | ||
2970 | @@ -611,15 +613,21 @@ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt) | ||
2971 | if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) | ||
2972 | return 0; | ||
2973 | |||
2974 | - pkt = virtio_transport_alloc_pkt(&info, 0, | ||
2975 | - le64_to_cpu(pkt->hdr.dst_cid), | ||
2976 | - le32_to_cpu(pkt->hdr.dst_port), | ||
2977 | - le64_to_cpu(pkt->hdr.src_cid), | ||
2978 | - le32_to_cpu(pkt->hdr.src_port)); | ||
2979 | - if (!pkt) | ||
2980 | + reply = virtio_transport_alloc_pkt(&info, 0, | ||
2981 | + le64_to_cpu(pkt->hdr.dst_cid), | ||
2982 | + le32_to_cpu(pkt->hdr.dst_port), | ||
2983 | + le64_to_cpu(pkt->hdr.src_cid), | ||
2984 | + le32_to_cpu(pkt->hdr.src_port)); | ||
2985 | + if (!reply) | ||
2986 | return -ENOMEM; | ||
2987 | |||
2988 | - return virtio_transport_get_ops()->send_pkt(pkt); | ||
2989 | + t = virtio_transport_get_ops(); | ||
2990 | + if (!t) { | ||
2991 | + virtio_transport_free_pkt(reply); | ||
2992 | + return -ENOTCONN; | ||
2993 | + } | ||
2994 | + | ||
2995 | + return t->send_pkt(reply); | ||
2996 | } | ||
2997 | |||
2998 | static void virtio_transport_wait_close(struct sock *sk, long timeout) | ||
2999 | diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include | ||
3000 | index 7f430778f418..558dea61db11 100644 | ||
3001 | --- a/scripts/Kbuild.include | ||
3002 | +++ b/scripts/Kbuild.include | ||
3003 | @@ -166,9 +166,7 @@ cc-ldoption = $(call try-run,\ | ||
3004 | |||
3005 | # ld-option | ||
3006 | # Usage: LDFLAGS += $(call ld-option, -X) | ||
3007 | -ld-option = $(call try-run,\ | ||
3008 | - $(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -x c /dev/null -c -o "$$TMPO"; \ | ||
3009 | - $(LD) $(LDFLAGS) $(1) "$$TMPO" -o "$$TMP",$(1),$(2)) | ||
3010 | +ld-option = $(call try-run, $(LD) $(LDFLAGS) $(1) -v,$(1),$(2)) | ||
3011 | |||
3012 | # ar-option | ||
3013 | # Usage: KBUILD_ARFLAGS := $(call ar-option,D) |