Annotation of /trunk/kernel-alx/patches-4.4/0155-4.4.56-all-fixes.patch
Parent Directory | Revision Log
Revision 2891 -
(hide annotations)
(download)
Mon Mar 27 13:49:27 2017 UTC (7 years, 6 months ago) by niro
File size: 62180 byte(s)
Mon Mar 27 13:49:27 2017 UTC (7 years, 6 months ago) by niro
File size: 62180 byte(s)
linux-4.4.56
1 | niro | 2891 | diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt |
2 | deleted file mode 100644 | ||
3 | index 54f10478e8e3..000000000000 | ||
4 | --- a/Documentation/networking/netlink_mmap.txt | ||
5 | +++ /dev/null | ||
6 | @@ -1,332 +0,0 @@ | ||
7 | -This file documents how to use memory mapped I/O with netlink. | ||
8 | - | ||
9 | -Author: Patrick McHardy <kaber@trash.net> | ||
10 | - | ||
11 | -Overview | ||
12 | --------- | ||
13 | - | ||
14 | -Memory mapped netlink I/O can be used to increase throughput and decrease | ||
15 | -overhead of unicast receive and transmit operations. Some netlink subsystems | ||
16 | -require high throughput, these are mainly the netfilter subsystems | ||
17 | -nfnetlink_queue and nfnetlink_log, but it can also help speed up large | ||
18 | -dump operations of f.i. the routing database. | ||
19 | - | ||
20 | -Memory mapped netlink I/O used two circular ring buffers for RX and TX which | ||
21 | -are mapped into the processes address space. | ||
22 | - | ||
23 | -The RX ring is used by the kernel to directly construct netlink messages into | ||
24 | -user-space memory without copying them as done with regular socket I/O, | ||
25 | -additionally as long as the ring contains messages no recvmsg() or poll() | ||
26 | -syscalls have to be issued by user-space to get more message. | ||
27 | - | ||
28 | -The TX ring is used to process messages directly from user-space memory, the | ||
29 | -kernel processes all messages contained in the ring using a single sendmsg() | ||
30 | -call. | ||
31 | - | ||
32 | -Usage overview | ||
33 | --------------- | ||
34 | - | ||
35 | -In order to use memory mapped netlink I/O, user-space needs three main changes: | ||
36 | - | ||
37 | -- ring setup | ||
38 | -- conversion of the RX path to get messages from the ring instead of recvmsg() | ||
39 | -- conversion of the TX path to construct messages into the ring | ||
40 | - | ||
41 | -Ring setup is done using setsockopt() to provide the ring parameters to the | ||
42 | -kernel, then a call to mmap() to map the ring into the processes address space: | ||
43 | - | ||
44 | -- setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, ¶ms, sizeof(params)); | ||
45 | -- setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, ¶ms, sizeof(params)); | ||
46 | -- ring = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0) | ||
47 | - | ||
48 | -Usage of either ring is optional, but even if only the RX ring is used the | ||
49 | -mapping still needs to be writable in order to update the frame status after | ||
50 | -processing. | ||
51 | - | ||
52 | -Conversion of the reception path involves calling poll() on the file | ||
53 | -descriptor, once the socket is readable the frames from the ring are | ||
54 | -processed in order until no more messages are available, as indicated by | ||
55 | -a status word in the frame header. | ||
56 | - | ||
57 | -On kernel side, in order to make use of memory mapped I/O on receive, the | ||
58 | -originating netlink subsystem needs to support memory mapped I/O, otherwise | ||
59 | -it will use an allocated socket buffer as usual and the contents will be | ||
60 | - copied to the ring on transmission, nullifying most of the performance gains. | ||
61 | -Dumps of kernel databases automatically support memory mapped I/O. | ||
62 | - | ||
63 | -Conversion of the transmit path involves changing message construction to | ||
64 | -use memory from the TX ring instead of (usually) a buffer declared on the | ||
65 | -stack and setting up the frame header appropriately. Optionally poll() can | ||
66 | -be used to wait for free frames in the TX ring. | ||
67 | - | ||
68 | -Structured and definitions for using memory mapped I/O are contained in | ||
69 | -<linux/netlink.h>. | ||
70 | - | ||
71 | -RX and TX rings | ||
72 | ----------------- | ||
73 | - | ||
74 | -Each ring contains a number of continuous memory blocks, containing frames of | ||
75 | -fixed size dependent on the parameters used for ring setup. | ||
76 | - | ||
77 | -Ring: [ block 0 ] | ||
78 | - [ frame 0 ] | ||
79 | - [ frame 1 ] | ||
80 | - [ block 1 ] | ||
81 | - [ frame 2 ] | ||
82 | - [ frame 3 ] | ||
83 | - ... | ||
84 | - [ block n ] | ||
85 | - [ frame 2 * n ] | ||
86 | - [ frame 2 * n + 1 ] | ||
87 | - | ||
88 | -The blocks are only visible to the kernel, from the point of view of user-space | ||
89 | -the ring just contains the frames in a continuous memory zone. | ||
90 | - | ||
91 | -The ring parameters used for setting up the ring are defined as follows: | ||
92 | - | ||
93 | -struct nl_mmap_req { | ||
94 | - unsigned int nm_block_size; | ||
95 | - unsigned int nm_block_nr; | ||
96 | - unsigned int nm_frame_size; | ||
97 | - unsigned int nm_frame_nr; | ||
98 | -}; | ||
99 | - | ||
100 | -Frames are grouped into blocks, where each block is a continuous region of memory | ||
101 | -and holds nm_block_size / nm_frame_size frames. The total number of frames in | ||
102 | -the ring is nm_frame_nr. The following invariants hold: | ||
103 | - | ||
104 | -- frames_per_block = nm_block_size / nm_frame_size | ||
105 | - | ||
106 | -- nm_frame_nr = frames_per_block * nm_block_nr | ||
107 | - | ||
108 | -Some parameters are constrained, specifically: | ||
109 | - | ||
110 | -- nm_block_size must be a multiple of the architectures memory page size. | ||
111 | - The getpagesize() function can be used to get the page size. | ||
112 | - | ||
113 | -- nm_frame_size must be equal or larger to NL_MMAP_HDRLEN, IOW a frame must be | ||
114 | - able to hold at least the frame header | ||
115 | - | ||
116 | -- nm_frame_size must be smaller or equal to nm_block_size | ||
117 | - | ||
118 | -- nm_frame_size must be a multiple of NL_MMAP_MSG_ALIGNMENT | ||
119 | - | ||
120 | -- nm_frame_nr must equal the actual number of frames as specified above. | ||
121 | - | ||
122 | -When the kernel can't allocate physically continuous memory for a ring block, | ||
123 | -it will fall back to use physically discontinuous memory. This might affect | ||
124 | -performance negatively, in order to avoid this the nm_frame_size parameter | ||
125 | -should be chosen to be as small as possible for the required frame size and | ||
126 | -the number of blocks should be increased instead. | ||
127 | - | ||
128 | -Ring frames | ||
129 | ------------- | ||
130 | - | ||
131 | -Each frames contain a frame header, consisting of a synchronization word and some | ||
132 | -meta-data, and the message itself. | ||
133 | - | ||
134 | -Frame: [ header message ] | ||
135 | - | ||
136 | -The frame header is defined as follows: | ||
137 | - | ||
138 | -struct nl_mmap_hdr { | ||
139 | - unsigned int nm_status; | ||
140 | - unsigned int nm_len; | ||
141 | - __u32 nm_group; | ||
142 | - /* credentials */ | ||
143 | - __u32 nm_pid; | ||
144 | - __u32 nm_uid; | ||
145 | - __u32 nm_gid; | ||
146 | -}; | ||
147 | - | ||
148 | -- nm_status is used for synchronizing processing between the kernel and user- | ||
149 | - space and specifies ownership of the frame as well as the operation to perform | ||
150 | - | ||
151 | -- nm_len contains the length of the message contained in the data area | ||
152 | - | ||
153 | -- nm_group specified the destination multicast group of message | ||
154 | - | ||
155 | -- nm_pid, nm_uid and nm_gid contain the netlink pid, UID and GID of the sending | ||
156 | - process. These values correspond to the data available using SOCK_PASSCRED in | ||
157 | - the SCM_CREDENTIALS cmsg. | ||
158 | - | ||
159 | -The possible values in the status word are: | ||
160 | - | ||
161 | -- NL_MMAP_STATUS_UNUSED: | ||
162 | - RX ring: frame belongs to the kernel and contains no message | ||
163 | - for user-space. Approriate action is to invoke poll() | ||
164 | - to wait for new messages. | ||
165 | - | ||
166 | - TX ring: frame belongs to user-space and can be used for | ||
167 | - message construction. | ||
168 | - | ||
169 | -- NL_MMAP_STATUS_RESERVED: | ||
170 | - RX ring only: frame is currently used by the kernel for message | ||
171 | - construction and contains no valid message yet. | ||
172 | - Appropriate action is to invoke poll() to wait for | ||
173 | - new messages. | ||
174 | - | ||
175 | -- NL_MMAP_STATUS_VALID: | ||
176 | - RX ring: frame contains a valid message. Approriate action is | ||
177 | - to process the message and release the frame back to | ||
178 | - the kernel by setting the status to | ||
179 | - NL_MMAP_STATUS_UNUSED or queue the frame by setting the | ||
180 | - status to NL_MMAP_STATUS_SKIP. | ||
181 | - | ||
182 | - TX ring: the frame contains a valid message from user-space to | ||
183 | - be processed by the kernel. After completing processing | ||
184 | - the kernel will release the frame back to user-space by | ||
185 | - setting the status to NL_MMAP_STATUS_UNUSED. | ||
186 | - | ||
187 | -- NL_MMAP_STATUS_COPY: | ||
188 | - RX ring only: a message is ready to be processed but could not be | ||
189 | - stored in the ring, either because it exceeded the | ||
190 | - frame size or because the originating subsystem does | ||
191 | - not support memory mapped I/O. Appropriate action is | ||
192 | - to invoke recvmsg() to receive the message and release | ||
193 | - the frame back to the kernel by setting the status to | ||
194 | - NL_MMAP_STATUS_UNUSED. | ||
195 | - | ||
196 | -- NL_MMAP_STATUS_SKIP: | ||
197 | - RX ring only: user-space queued the message for later processing, but | ||
198 | - processed some messages following it in the ring. The | ||
199 | - kernel should skip this frame when looking for unused | ||
200 | - frames. | ||
201 | - | ||
202 | -The data area of a frame begins at a offset of NL_MMAP_HDRLEN relative to the | ||
203 | -frame header. | ||
204 | - | ||
205 | -TX limitations | ||
206 | --------------- | ||
207 | - | ||
208 | -As of Jan 2015 the message is always copied from the ring frame to an | ||
209 | -allocated buffer due to unresolved security concerns. | ||
210 | -See commit 4682a0358639b29cf ("netlink: Always copy on mmap TX."). | ||
211 | - | ||
212 | -Example | ||
213 | -------- | ||
214 | - | ||
215 | -Ring setup: | ||
216 | - | ||
217 | - unsigned int block_size = 16 * getpagesize(); | ||
218 | - struct nl_mmap_req req = { | ||
219 | - .nm_block_size = block_size, | ||
220 | - .nm_block_nr = 64, | ||
221 | - .nm_frame_size = 16384, | ||
222 | - .nm_frame_nr = 64 * block_size / 16384, | ||
223 | - }; | ||
224 | - unsigned int ring_size; | ||
225 | - void *rx_ring, *tx_ring; | ||
226 | - | ||
227 | - /* Configure ring parameters */ | ||
228 | - if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0) | ||
229 | - exit(1); | ||
230 | - if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0) | ||
231 | - exit(1) | ||
232 | - | ||
233 | - /* Calculate size of each individual ring */ | ||
234 | - ring_size = req.nm_block_nr * req.nm_block_size; | ||
235 | - | ||
236 | - /* Map RX/TX rings. The TX ring is located after the RX ring */ | ||
237 | - rx_ring = mmap(NULL, 2 * ring_size, PROT_READ | PROT_WRITE, | ||
238 | - MAP_SHARED, fd, 0); | ||
239 | - if ((long)rx_ring == -1L) | ||
240 | - exit(1); | ||
241 | - tx_ring = rx_ring + ring_size: | ||
242 | - | ||
243 | -Message reception: | ||
244 | - | ||
245 | -This example assumes some ring parameters of the ring setup are available. | ||
246 | - | ||
247 | - unsigned int frame_offset = 0; | ||
248 | - struct nl_mmap_hdr *hdr; | ||
249 | - struct nlmsghdr *nlh; | ||
250 | - unsigned char buf[16384]; | ||
251 | - ssize_t len; | ||
252 | - | ||
253 | - while (1) { | ||
254 | - struct pollfd pfds[1]; | ||
255 | - | ||
256 | - pfds[0].fd = fd; | ||
257 | - pfds[0].events = POLLIN | POLLERR; | ||
258 | - pfds[0].revents = 0; | ||
259 | - | ||
260 | - if (poll(pfds, 1, -1) < 0 && errno != -EINTR) | ||
261 | - exit(1); | ||
262 | - | ||
263 | - /* Check for errors. Error handling omitted */ | ||
264 | - if (pfds[0].revents & POLLERR) | ||
265 | - <handle error> | ||
266 | - | ||
267 | - /* If no new messages, poll again */ | ||
268 | - if (!(pfds[0].revents & POLLIN)) | ||
269 | - continue; | ||
270 | - | ||
271 | - /* Process all frames */ | ||
272 | - while (1) { | ||
273 | - /* Get next frame header */ | ||
274 | - hdr = rx_ring + frame_offset; | ||
275 | - | ||
276 | - if (hdr->nm_status == NL_MMAP_STATUS_VALID) { | ||
277 | - /* Regular memory mapped frame */ | ||
278 | - nlh = (void *)hdr + NL_MMAP_HDRLEN; | ||
279 | - len = hdr->nm_len; | ||
280 | - | ||
281 | - /* Release empty message immediately. May happen | ||
282 | - * on error during message construction. | ||
283 | - */ | ||
284 | - if (len == 0) | ||
285 | - goto release; | ||
286 | - } else if (hdr->nm_status == NL_MMAP_STATUS_COPY) { | ||
287 | - /* Frame queued to socket receive queue */ | ||
288 | - len = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); | ||
289 | - if (len <= 0) | ||
290 | - break; | ||
291 | - nlh = buf; | ||
292 | - } else | ||
293 | - /* No more messages to process, continue polling */ | ||
294 | - break; | ||
295 | - | ||
296 | - process_msg(nlh); | ||
297 | -release: | ||
298 | - /* Release frame back to the kernel */ | ||
299 | - hdr->nm_status = NL_MMAP_STATUS_UNUSED; | ||
300 | - | ||
301 | - /* Advance frame offset to next frame */ | ||
302 | - frame_offset = (frame_offset + frame_size) % ring_size; | ||
303 | - } | ||
304 | - } | ||
305 | - | ||
306 | -Message transmission: | ||
307 | - | ||
308 | -This example assumes some ring parameters of the ring setup are available. | ||
309 | -A single message is constructed and transmitted, to send multiple messages | ||
310 | -at once they would be constructed in consecutive frames before a final call | ||
311 | -to sendto(). | ||
312 | - | ||
313 | - unsigned int frame_offset = 0; | ||
314 | - struct nl_mmap_hdr *hdr; | ||
315 | - struct nlmsghdr *nlh; | ||
316 | - struct sockaddr_nl addr = { | ||
317 | - .nl_family = AF_NETLINK, | ||
318 | - }; | ||
319 | - | ||
320 | - hdr = tx_ring + frame_offset; | ||
321 | - if (hdr->nm_status != NL_MMAP_STATUS_UNUSED) | ||
322 | - /* No frame available. Use poll() to avoid. */ | ||
323 | - exit(1); | ||
324 | - | ||
325 | - nlh = (void *)hdr + NL_MMAP_HDRLEN; | ||
326 | - | ||
327 | - /* Build message */ | ||
328 | - build_message(nlh); | ||
329 | - | ||
330 | - /* Fill frame header: length and status need to be set */ | ||
331 | - hdr->nm_len = nlh->nlmsg_len; | ||
332 | - hdr->nm_status = NL_MMAP_STATUS_VALID; | ||
333 | - | ||
334 | - if (sendto(fd, NULL, 0, 0, &addr, sizeof(addr)) < 0) | ||
335 | - exit(1); | ||
336 | - | ||
337 | - /* Advance frame offset to next frame */ | ||
338 | - frame_offset = (frame_offset + frame_size) % ring_size; | ||
339 | diff --git a/Makefile b/Makefile | ||
340 | index d9cc21df444d..cf9303a5d621 100644 | ||
341 | --- a/Makefile | ||
342 | +++ b/Makefile | ||
343 | @@ -1,6 +1,6 @@ | ||
344 | VERSION = 4 | ||
345 | PATCHLEVEL = 4 | ||
346 | -SUBLEVEL = 55 | ||
347 | +SUBLEVEL = 56 | ||
348 | EXTRAVERSION = | ||
349 | NAME = Blurry Fish Butt | ||
350 | |||
351 | diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c | ||
352 | index 1a8256dd6729..5b2f2306fbcc 100644 | ||
353 | --- a/arch/x86/kernel/cpu/perf_event.c | ||
354 | +++ b/arch/x86/kernel/cpu/perf_event.c | ||
355 | @@ -1996,8 +1996,8 @@ static int x86_pmu_event_init(struct perf_event *event) | ||
356 | |||
357 | static void refresh_pce(void *ignored) | ||
358 | { | ||
359 | - if (current->mm) | ||
360 | - load_mm_cr4(current->mm); | ||
361 | + if (current->active_mm) | ||
362 | + load_mm_cr4(current->active_mm); | ||
363 | } | ||
364 | |||
365 | static void x86_pmu_event_mapped(struct perf_event *event) | ||
366 | diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c | ||
367 | index f129a9af6357..b6b0077da1af 100644 | ||
368 | --- a/arch/x86/kernel/head64.c | ||
369 | +++ b/arch/x86/kernel/head64.c | ||
370 | @@ -4,6 +4,7 @@ | ||
371 | * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE | ||
372 | */ | ||
373 | |||
374 | +#define DISABLE_BRANCH_PROFILING | ||
375 | #include <linux/init.h> | ||
376 | #include <linux/linkage.h> | ||
377 | #include <linux/types.h> | ||
378 | diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c | ||
379 | index d470cf219a2d..4e5ac46adc9d 100644 | ||
380 | --- a/arch/x86/mm/kasan_init_64.c | ||
381 | +++ b/arch/x86/mm/kasan_init_64.c | ||
382 | @@ -1,3 +1,4 @@ | ||
383 | +#define DISABLE_BRANCH_PROFILING | ||
384 | #define pr_fmt(fmt) "kasan: " fmt | ||
385 | #include <linux/bootmem.h> | ||
386 | #include <linux/kasan.h> | ||
387 | diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c | ||
388 | index d6b619667f1a..349aecbc210a 100644 | ||
389 | --- a/drivers/net/vrf.c | ||
390 | +++ b/drivers/net/vrf.c | ||
391 | @@ -345,6 +345,7 @@ static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev) | ||
392 | |||
393 | static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) | ||
394 | { | ||
395 | + int len = skb->len; | ||
396 | netdev_tx_t ret = is_ip_tx_frame(skb, dev); | ||
397 | |||
398 | if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { | ||
399 | @@ -352,7 +353,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) | ||
400 | |||
401 | u64_stats_update_begin(&dstats->syncp); | ||
402 | dstats->tx_pkts++; | ||
403 | - dstats->tx_bytes += skb->len; | ||
404 | + dstats->tx_bytes += len; | ||
405 | u64_stats_update_end(&dstats->syncp); | ||
406 | } else { | ||
407 | this_cpu_inc(dev->dstats->tx_drps); | ||
408 | diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c | ||
409 | index 6fa8e165878e..590750ab6564 100644 | ||
410 | --- a/drivers/net/vxlan.c | ||
411 | +++ b/drivers/net/vxlan.c | ||
412 | @@ -2600,7 +2600,7 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) | ||
413 | |||
414 | if (data[IFLA_VXLAN_ID]) { | ||
415 | __u32 id = nla_get_u32(data[IFLA_VXLAN_ID]); | ||
416 | - if (id >= VXLAN_VID_MASK) | ||
417 | + if (id >= VXLAN_N_VID) | ||
418 | return -ERANGE; | ||
419 | } | ||
420 | |||
421 | diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c | ||
422 | index 8a9feb341f31..dd561f916f0b 100644 | ||
423 | --- a/fs/ext4/crypto_policy.c | ||
424 | +++ b/fs/ext4/crypto_policy.c | ||
425 | @@ -156,6 +156,12 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent, | ||
426 | WARN_ON(1); /* Should never happen */ | ||
427 | return 0; | ||
428 | } | ||
429 | + | ||
430 | + /* No restrictions on file types which are never encrypted */ | ||
431 | + if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) && | ||
432 | + !S_ISLNK(child->i_mode)) | ||
433 | + return 1; | ||
434 | + | ||
435 | /* no restrictions if the parent directory is not encrypted */ | ||
436 | if (!ext4_encrypted_inode(parent)) | ||
437 | return 1; | ||
438 | diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c | ||
439 | index 1fb12f9c97a6..789e2d6724a9 100644 | ||
440 | --- a/fs/ext4/ioctl.c | ||
441 | +++ b/fs/ext4/ioctl.c | ||
442 | @@ -633,8 +633,12 @@ resizefs_out: | ||
443 | if (err) | ||
444 | goto encryption_policy_out; | ||
445 | |||
446 | + mutex_lock(&inode->i_mutex); | ||
447 | + | ||
448 | err = ext4_process_policy(&policy, inode); | ||
449 | |||
450 | + mutex_unlock(&inode->i_mutex); | ||
451 | + | ||
452 | mnt_drop_write_file(filp); | ||
453 | encryption_policy_out: | ||
454 | return err; | ||
455 | diff --git a/fs/f2fs/crypto_policy.c b/fs/f2fs/crypto_policy.c | ||
456 | index e504f548b64e..5bbd1989d5e6 100644 | ||
457 | --- a/fs/f2fs/crypto_policy.c | ||
458 | +++ b/fs/f2fs/crypto_policy.c | ||
459 | @@ -149,6 +149,11 @@ int f2fs_is_child_context_consistent_with_parent(struct inode *parent, | ||
460 | BUG_ON(1); | ||
461 | } | ||
462 | |||
463 | + /* No restrictions on file types which are never encrypted */ | ||
464 | + if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) && | ||
465 | + !S_ISLNK(child->i_mode)) | ||
466 | + return 1; | ||
467 | + | ||
468 | /* no restrictions if the parent directory is not encrypted */ | ||
469 | if (!f2fs_encrypted_inode(parent)) | ||
470 | return 1; | ||
471 | diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c | ||
472 | index a197215ad52b..4b449d263333 100644 | ||
473 | --- a/fs/f2fs/file.c | ||
474 | +++ b/fs/f2fs/file.c | ||
475 | @@ -1535,12 +1535,19 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) | ||
476 | #ifdef CONFIG_F2FS_FS_ENCRYPTION | ||
477 | struct f2fs_encryption_policy policy; | ||
478 | struct inode *inode = file_inode(filp); | ||
479 | + int err; | ||
480 | |||
481 | if (copy_from_user(&policy, (struct f2fs_encryption_policy __user *)arg, | ||
482 | sizeof(policy))) | ||
483 | return -EFAULT; | ||
484 | |||
485 | - return f2fs_process_policy(&policy, inode); | ||
486 | + mutex_lock(&inode->i_mutex); | ||
487 | + | ||
488 | + err = f2fs_process_policy(&policy, inode); | ||
489 | + | ||
490 | + mutex_unlock(&inode->i_mutex); | ||
491 | + | ||
492 | + return err; | ||
493 | #else | ||
494 | return -EOPNOTSUPP; | ||
495 | #endif | ||
496 | diff --git a/include/linux/dccp.h b/include/linux/dccp.h | ||
497 | index 61d042bbbf60..68449293c4b6 100644 | ||
498 | --- a/include/linux/dccp.h | ||
499 | +++ b/include/linux/dccp.h | ||
500 | @@ -163,6 +163,7 @@ struct dccp_request_sock { | ||
501 | __u64 dreq_isr; | ||
502 | __u64 dreq_gsr; | ||
503 | __be32 dreq_service; | ||
504 | + spinlock_t dreq_lock; | ||
505 | struct list_head dreq_featneg; | ||
506 | __u32 dreq_timestamp_echo; | ||
507 | __u32 dreq_timestamp_time; | ||
508 | diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h | ||
509 | index f095155d8749..0dba4e4ed2be 100644 | ||
510 | --- a/include/uapi/linux/netlink.h | ||
511 | +++ b/include/uapi/linux/netlink.h | ||
512 | @@ -107,8 +107,10 @@ struct nlmsgerr { | ||
513 | #define NETLINK_PKTINFO 3 | ||
514 | #define NETLINK_BROADCAST_ERROR 4 | ||
515 | #define NETLINK_NO_ENOBUFS 5 | ||
516 | +#ifndef __KERNEL__ | ||
517 | #define NETLINK_RX_RING 6 | ||
518 | #define NETLINK_TX_RING 7 | ||
519 | +#endif | ||
520 | #define NETLINK_LISTEN_ALL_NSID 8 | ||
521 | #define NETLINK_LIST_MEMBERSHIPS 9 | ||
522 | #define NETLINK_CAP_ACK 10 | ||
523 | @@ -134,6 +136,7 @@ struct nl_mmap_hdr { | ||
524 | __u32 nm_gid; | ||
525 | }; | ||
526 | |||
527 | +#ifndef __KERNEL__ | ||
528 | enum nl_mmap_status { | ||
529 | NL_MMAP_STATUS_UNUSED, | ||
530 | NL_MMAP_STATUS_RESERVED, | ||
531 | @@ -145,6 +148,7 @@ enum nl_mmap_status { | ||
532 | #define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO | ||
533 | #define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) | ||
534 | #define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) | ||
535 | +#endif | ||
536 | |||
537 | #define NET_MAJOR 36 /* Major 36 is reserved for networking */ | ||
538 | |||
539 | diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h | ||
540 | index f2159d30d1f5..d79399394b46 100644 | ||
541 | --- a/include/uapi/linux/netlink_diag.h | ||
542 | +++ b/include/uapi/linux/netlink_diag.h | ||
543 | @@ -48,6 +48,8 @@ enum { | ||
544 | |||
545 | #define NDIAG_SHOW_MEMINFO 0x00000001 /* show memory info of a socket */ | ||
546 | #define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */ | ||
547 | +#ifndef __KERNEL__ | ||
548 | #define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */ | ||
549 | +#endif | ||
550 | |||
551 | #endif | ||
552 | diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h | ||
553 | index d08c63f3dd6f..0c5d5dd61b6a 100644 | ||
554 | --- a/include/uapi/linux/packet_diag.h | ||
555 | +++ b/include/uapi/linux/packet_diag.h | ||
556 | @@ -64,7 +64,7 @@ struct packet_diag_mclist { | ||
557 | __u32 pdmc_count; | ||
558 | __u16 pdmc_type; | ||
559 | __u16 pdmc_alen; | ||
560 | - __u8 pdmc_addr[MAX_ADDR_LEN]; | ||
561 | + __u8 pdmc_addr[32]; /* MAX_ADDR_LEN */ | ||
562 | }; | ||
563 | |||
564 | struct packet_diag_ring { | ||
565 | diff --git a/kernel/futex.c b/kernel/futex.c | ||
566 | index 9d251dc3ec40..3057dabf726f 100644 | ||
567 | --- a/kernel/futex.c | ||
568 | +++ b/kernel/futex.c | ||
569 | @@ -2690,7 +2690,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | ||
570 | { | ||
571 | struct hrtimer_sleeper timeout, *to = NULL; | ||
572 | struct rt_mutex_waiter rt_waiter; | ||
573 | - struct rt_mutex *pi_mutex = NULL; | ||
574 | struct futex_hash_bucket *hb; | ||
575 | union futex_key key2 = FUTEX_KEY_INIT; | ||
576 | struct futex_q q = futex_q_init; | ||
577 | @@ -2774,6 +2773,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | ||
578 | if (q.pi_state && (q.pi_state->owner != current)) { | ||
579 | spin_lock(q.lock_ptr); | ||
580 | ret = fixup_pi_state_owner(uaddr2, &q, current); | ||
581 | + if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) | ||
582 | + rt_mutex_unlock(&q.pi_state->pi_mutex); | ||
583 | /* | ||
584 | * Drop the reference to the pi state which | ||
585 | * the requeue_pi() code acquired for us. | ||
586 | @@ -2782,6 +2783,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | ||
587 | spin_unlock(q.lock_ptr); | ||
588 | } | ||
589 | } else { | ||
590 | + struct rt_mutex *pi_mutex; | ||
591 | + | ||
592 | /* | ||
593 | * We have been woken up by futex_unlock_pi(), a timeout, or a | ||
594 | * signal. futex_unlock_pi() will not destroy the lock_ptr nor | ||
595 | @@ -2805,18 +2808,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | ||
596 | if (res) | ||
597 | ret = (res < 0) ? res : 0; | ||
598 | |||
599 | + /* | ||
600 | + * If fixup_pi_state_owner() faulted and was unable to handle | ||
601 | + * the fault, unlock the rt_mutex and return the fault to | ||
602 | + * userspace. | ||
603 | + */ | ||
604 | + if (ret && rt_mutex_owner(pi_mutex) == current) | ||
605 | + rt_mutex_unlock(pi_mutex); | ||
606 | + | ||
607 | /* Unqueue and drop the lock. */ | ||
608 | unqueue_me_pi(&q); | ||
609 | } | ||
610 | |||
611 | - /* | ||
612 | - * If fixup_pi_state_owner() faulted and was unable to handle the | ||
613 | - * fault, unlock the rt_mutex and return the fault to userspace. | ||
614 | - */ | ||
615 | - if (ret == -EFAULT) { | ||
616 | - if (pi_mutex && rt_mutex_owner(pi_mutex) == current) | ||
617 | - rt_mutex_unlock(pi_mutex); | ||
618 | - } else if (ret == -EINTR) { | ||
619 | + if (ret == -EINTR) { | ||
620 | /* | ||
621 | * We've already been requeued, but cannot restart by calling | ||
622 | * futex_lock_pi() directly. We could restart this syscall, but | ||
623 | diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c | ||
624 | index f7fba74108a9..e24754a0e052 100644 | ||
625 | --- a/net/bridge/br_input.c | ||
626 | +++ b/net/bridge/br_input.c | ||
627 | @@ -29,6 +29,7 @@ EXPORT_SYMBOL(br_should_route_hook); | ||
628 | static int | ||
629 | br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) | ||
630 | { | ||
631 | + br_drop_fake_rtable(skb); | ||
632 | return netif_receive_skb(skb); | ||
633 | } | ||
634 | |||
635 | diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c | ||
636 | index 7ddbe7ec81d6..97fc19f001bf 100644 | ||
637 | --- a/net/bridge/br_netfilter_hooks.c | ||
638 | +++ b/net/bridge/br_netfilter_hooks.c | ||
639 | @@ -516,21 +516,6 @@ static unsigned int br_nf_pre_routing(void *priv, | ||
640 | } | ||
641 | |||
642 | |||
643 | -/* PF_BRIDGE/LOCAL_IN ************************************************/ | ||
644 | -/* The packet is locally destined, which requires a real | ||
645 | - * dst_entry, so detach the fake one. On the way up, the | ||
646 | - * packet would pass through PRE_ROUTING again (which already | ||
647 | - * took place when the packet entered the bridge), but we | ||
648 | - * register an IPv4 PRE_ROUTING 'sabotage' hook that will | ||
649 | - * prevent this from happening. */ | ||
650 | -static unsigned int br_nf_local_in(void *priv, | ||
651 | - struct sk_buff *skb, | ||
652 | - const struct nf_hook_state *state) | ||
653 | -{ | ||
654 | - br_drop_fake_rtable(skb); | ||
655 | - return NF_ACCEPT; | ||
656 | -} | ||
657 | - | ||
658 | /* PF_BRIDGE/FORWARD *************************************************/ | ||
659 | static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) | ||
660 | { | ||
661 | @@ -901,12 +886,6 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { | ||
662 | .priority = NF_BR_PRI_BRNF, | ||
663 | }, | ||
664 | { | ||
665 | - .hook = br_nf_local_in, | ||
666 | - .pf = NFPROTO_BRIDGE, | ||
667 | - .hooknum = NF_BR_LOCAL_IN, | ||
668 | - .priority = NF_BR_PRI_BRNF, | ||
669 | - }, | ||
670 | - { | ||
671 | .hook = br_nf_forward_ip, | ||
672 | .pf = NFPROTO_BRIDGE, | ||
673 | .hooknum = NF_BR_FORWARD, | ||
674 | diff --git a/net/core/dev.c b/net/core/dev.c | ||
675 | index 08215a85c742..48399d8ce614 100644 | ||
676 | --- a/net/core/dev.c | ||
677 | +++ b/net/core/dev.c | ||
678 | @@ -1677,27 +1677,54 @@ EXPORT_SYMBOL_GPL(net_dec_ingress_queue); | ||
679 | static struct static_key netstamp_needed __read_mostly; | ||
680 | #ifdef HAVE_JUMP_LABEL | ||
681 | static atomic_t netstamp_needed_deferred; | ||
682 | +static atomic_t netstamp_wanted; | ||
683 | static void netstamp_clear(struct work_struct *work) | ||
684 | { | ||
685 | int deferred = atomic_xchg(&netstamp_needed_deferred, 0); | ||
686 | + int wanted; | ||
687 | |||
688 | - while (deferred--) | ||
689 | - static_key_slow_dec(&netstamp_needed); | ||
690 | + wanted = atomic_add_return(deferred, &netstamp_wanted); | ||
691 | + if (wanted > 0) | ||
692 | + static_key_enable(&netstamp_needed); | ||
693 | + else | ||
694 | + static_key_disable(&netstamp_needed); | ||
695 | } | ||
696 | static DECLARE_WORK(netstamp_work, netstamp_clear); | ||
697 | #endif | ||
698 | |||
699 | void net_enable_timestamp(void) | ||
700 | { | ||
701 | +#ifdef HAVE_JUMP_LABEL | ||
702 | + int wanted; | ||
703 | + | ||
704 | + while (1) { | ||
705 | + wanted = atomic_read(&netstamp_wanted); | ||
706 | + if (wanted <= 0) | ||
707 | + break; | ||
708 | + if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted) | ||
709 | + return; | ||
710 | + } | ||
711 | + atomic_inc(&netstamp_needed_deferred); | ||
712 | + schedule_work(&netstamp_work); | ||
713 | +#else | ||
714 | static_key_slow_inc(&netstamp_needed); | ||
715 | +#endif | ||
716 | } | ||
717 | EXPORT_SYMBOL(net_enable_timestamp); | ||
718 | |||
719 | void net_disable_timestamp(void) | ||
720 | { | ||
721 | #ifdef HAVE_JUMP_LABEL | ||
722 | - /* net_disable_timestamp() can be called from non process context */ | ||
723 | - atomic_inc(&netstamp_needed_deferred); | ||
724 | + int wanted; | ||
725 | + | ||
726 | + while (1) { | ||
727 | + wanted = atomic_read(&netstamp_wanted); | ||
728 | + if (wanted <= 1) | ||
729 | + break; | ||
730 | + if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted) | ||
731 | + return; | ||
732 | + } | ||
733 | + atomic_dec(&netstamp_needed_deferred); | ||
734 | schedule_work(&netstamp_work); | ||
735 | #else | ||
736 | static_key_slow_dec(&netstamp_needed); | ||
737 | diff --git a/net/core/skbuff.c b/net/core/skbuff.c | ||
738 | index 4968b5ddea69..73dfd7729bc9 100644 | ||
739 | --- a/net/core/skbuff.c | ||
740 | +++ b/net/core/skbuff.c | ||
741 | @@ -3678,13 +3678,14 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, | ||
742 | if (!skb_may_tx_timestamp(sk, false)) | ||
743 | return; | ||
744 | |||
745 | - /* take a reference to prevent skb_orphan() from freeing the socket */ | ||
746 | - sock_hold(sk); | ||
747 | - | ||
748 | - *skb_hwtstamps(skb) = *hwtstamps; | ||
749 | - __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); | ||
750 | - | ||
751 | - sock_put(sk); | ||
752 | + /* Take a reference to prevent skb_orphan() from freeing the socket, | ||
753 | + * but only if the socket refcount is not zero. | ||
754 | + */ | ||
755 | + if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { | ||
756 | + *skb_hwtstamps(skb) = *hwtstamps; | ||
757 | + __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); | ||
758 | + sock_put(sk); | ||
759 | + } | ||
760 | } | ||
761 | EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); | ||
762 | |||
763 | @@ -3735,7 +3736,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) | ||
764 | { | ||
765 | struct sock *sk = skb->sk; | ||
766 | struct sock_exterr_skb *serr; | ||
767 | - int err; | ||
768 | + int err = 1; | ||
769 | |||
770 | skb->wifi_acked_valid = 1; | ||
771 | skb->wifi_acked = acked; | ||
772 | @@ -3745,14 +3746,15 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) | ||
773 | serr->ee.ee_errno = ENOMSG; | ||
774 | serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; | ||
775 | |||
776 | - /* take a reference to prevent skb_orphan() from freeing the socket */ | ||
777 | - sock_hold(sk); | ||
778 | - | ||
779 | - err = sock_queue_err_skb(sk, skb); | ||
780 | + /* Take a reference to prevent skb_orphan() from freeing the socket, | ||
781 | + * but only if the socket refcount is not zero. | ||
782 | + */ | ||
783 | + if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { | ||
784 | + err = sock_queue_err_skb(sk, skb); | ||
785 | + sock_put(sk); | ||
786 | + } | ||
787 | if (err) | ||
788 | kfree_skb(skb); | ||
789 | - | ||
790 | - sock_put(sk); | ||
791 | } | ||
792 | EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); | ||
793 | |||
794 | diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c | ||
795 | index f053198e730c..5e3a7302f774 100644 | ||
796 | --- a/net/dccp/ccids/ccid2.c | ||
797 | +++ b/net/dccp/ccids/ccid2.c | ||
798 | @@ -749,6 +749,7 @@ static void ccid2_hc_tx_exit(struct sock *sk) | ||
799 | for (i = 0; i < hc->tx_seqbufc; i++) | ||
800 | kfree(hc->tx_seqbuf[i]); | ||
801 | hc->tx_seqbufc = 0; | ||
802 | + dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); | ||
803 | } | ||
804 | |||
805 | static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | ||
806 | diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c | ||
807 | index 0759f5b9180e..6467bf392e1b 100644 | ||
808 | --- a/net/dccp/ipv4.c | ||
809 | +++ b/net/dccp/ipv4.c | ||
810 | @@ -289,7 +289,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) | ||
811 | |||
812 | switch (type) { | ||
813 | case ICMP_REDIRECT: | ||
814 | - dccp_do_redirect(skb, sk); | ||
815 | + if (!sock_owned_by_user(sk)) | ||
816 | + dccp_do_redirect(skb, sk); | ||
817 | goto out; | ||
818 | case ICMP_SOURCE_QUENCH: | ||
819 | /* Just silently ignore these. */ | ||
820 | diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c | ||
821 | index 27c4e81efa24..8113ad58fcb4 100644 | ||
822 | --- a/net/dccp/ipv6.c | ||
823 | +++ b/net/dccp/ipv6.c | ||
824 | @@ -122,10 +122,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | ||
825 | np = inet6_sk(sk); | ||
826 | |||
827 | if (type == NDISC_REDIRECT) { | ||
828 | - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); | ||
829 | + if (!sock_owned_by_user(sk)) { | ||
830 | + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); | ||
831 | |||
832 | - if (dst) | ||
833 | - dst->ops->redirect(dst, sk, skb); | ||
834 | + if (dst) | ||
835 | + dst->ops->redirect(dst, sk, skb); | ||
836 | + } | ||
837 | goto out; | ||
838 | } | ||
839 | |||
840 | diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c | ||
841 | index 1994f8af646b..68eed344b471 100644 | ||
842 | --- a/net/dccp/minisocks.c | ||
843 | +++ b/net/dccp/minisocks.c | ||
844 | @@ -122,6 +122,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk, | ||
845 | /* It is still raw copy of parent, so invalidate | ||
846 | * destructor and make plain sk_free() */ | ||
847 | newsk->sk_destruct = NULL; | ||
848 | + bh_unlock_sock(newsk); | ||
849 | sk_free(newsk); | ||
850 | return NULL; | ||
851 | } | ||
852 | @@ -145,6 +146,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, | ||
853 | struct dccp_request_sock *dreq = dccp_rsk(req); | ||
854 | bool own_req; | ||
855 | |||
856 | + /* TCP/DCCP listeners became lockless. | ||
857 | + * DCCP stores complex state in its request_sock, so we need | ||
858 | + * a protection for them, now this code runs without being protected | ||
859 | + * by the parent (listener) lock. | ||
860 | + */ | ||
861 | + spin_lock_bh(&dreq->dreq_lock); | ||
862 | + | ||
863 | /* Check for retransmitted REQUEST */ | ||
864 | if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { | ||
865 | |||
866 | @@ -159,7 +167,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, | ||
867 | inet_rtx_syn_ack(sk, req); | ||
868 | } | ||
869 | /* Network Duplicate, discard packet */ | ||
870 | - return NULL; | ||
871 | + goto out; | ||
872 | } | ||
873 | |||
874 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; | ||
875 | @@ -185,20 +193,20 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, | ||
876 | |||
877 | child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, | ||
878 | req, &own_req); | ||
879 | - if (!child) | ||
880 | - goto listen_overflow; | ||
881 | - | ||
882 | - return inet_csk_complete_hashdance(sk, child, req, own_req); | ||
883 | + if (child) { | ||
884 | + child = inet_csk_complete_hashdance(sk, child, req, own_req); | ||
885 | + goto out; | ||
886 | + } | ||
887 | |||
888 | -listen_overflow: | ||
889 | - dccp_pr_debug("listen_overflow!\n"); | ||
890 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; | ||
891 | drop: | ||
892 | if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) | ||
893 | req->rsk_ops->send_reset(sk, skb); | ||
894 | |||
895 | inet_csk_reqsk_queue_drop(sk, req); | ||
896 | - return NULL; | ||
897 | +out: | ||
898 | + spin_unlock_bh(&dreq->dreq_lock); | ||
899 | + return child; | ||
900 | } | ||
901 | |||
902 | EXPORT_SYMBOL_GPL(dccp_check_req); | ||
903 | @@ -249,6 +257,7 @@ int dccp_reqsk_init(struct request_sock *req, | ||
904 | { | ||
905 | struct dccp_request_sock *dreq = dccp_rsk(req); | ||
906 | |||
907 | + spin_lock_init(&dreq->dreq_lock); | ||
908 | inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport; | ||
909 | inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport); | ||
910 | inet_rsk(req)->acked = 0; | ||
911 | diff --git a/net/ipv4/route.c b/net/ipv4/route.c | ||
912 | index ef2f527a119b..da4d68d78590 100644 | ||
913 | --- a/net/ipv4/route.c | ||
914 | +++ b/net/ipv4/route.c | ||
915 | @@ -1958,6 +1958,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, | ||
916 | { | ||
917 | int res; | ||
918 | |||
919 | + tos &= IPTOS_RT_MASK; | ||
920 | rcu_read_lock(); | ||
921 | |||
922 | /* Multicast recognition logic is moved from route cache to here. | ||
923 | diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c | ||
924 | index b58a38eea059..198fc2314c82 100644 | ||
925 | --- a/net/ipv4/tcp_ipv4.c | ||
926 | +++ b/net/ipv4/tcp_ipv4.c | ||
927 | @@ -271,10 +271,13 @@ EXPORT_SYMBOL(tcp_v4_connect); | ||
928 | */ | ||
929 | void tcp_v4_mtu_reduced(struct sock *sk) | ||
930 | { | ||
931 | - struct dst_entry *dst; | ||
932 | struct inet_sock *inet = inet_sk(sk); | ||
933 | - u32 mtu = tcp_sk(sk)->mtu_info; | ||
934 | + struct dst_entry *dst; | ||
935 | + u32 mtu; | ||
936 | |||
937 | + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) | ||
938 | + return; | ||
939 | + mtu = tcp_sk(sk)->mtu_info; | ||
940 | dst = inet_csk_update_pmtu(sk, mtu); | ||
941 | if (!dst) | ||
942 | return; | ||
943 | @@ -420,7 +423,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | ||
944 | |||
945 | switch (type) { | ||
946 | case ICMP_REDIRECT: | ||
947 | - do_redirect(icmp_skb, sk); | ||
948 | + if (!sock_owned_by_user(sk)) | ||
949 | + do_redirect(icmp_skb, sk); | ||
950 | goto out; | ||
951 | case ICMP_SOURCE_QUENCH: | ||
952 | /* Just silently ignore these. */ | ||
953 | diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c | ||
954 | index 193ba1fa8a9a..ebb34d0c5e80 100644 | ||
955 | --- a/net/ipv4/tcp_timer.c | ||
956 | +++ b/net/ipv4/tcp_timer.c | ||
957 | @@ -223,7 +223,8 @@ void tcp_delack_timer_handler(struct sock *sk) | ||
958 | |||
959 | sk_mem_reclaim_partial(sk); | ||
960 | |||
961 | - if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) | ||
962 | + if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || | ||
963 | + !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) | ||
964 | goto out; | ||
965 | |||
966 | if (time_after(icsk->icsk_ack.timeout, jiffies)) { | ||
967 | @@ -504,7 +505,8 @@ void tcp_write_timer_handler(struct sock *sk) | ||
968 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
969 | int event; | ||
970 | |||
971 | - if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) | ||
972 | + if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || | ||
973 | + !icsk->icsk_pending) | ||
974 | goto out; | ||
975 | |||
976 | if (time_after(icsk->icsk_timeout, jiffies)) { | ||
977 | diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c | ||
978 | index 34cf46d74554..85bf86458706 100644 | ||
979 | --- a/net/ipv6/ip6_fib.c | ||
980 | +++ b/net/ipv6/ip6_fib.c | ||
981 | @@ -903,6 +903,8 @@ add: | ||
982 | ins = &rt->dst.rt6_next; | ||
983 | iter = *ins; | ||
984 | while (iter) { | ||
985 | + if (iter->rt6i_metric > rt->rt6i_metric) | ||
986 | + break; | ||
987 | if (rt6_qualify_for_ecmp(iter)) { | ||
988 | *ins = iter->dst.rt6_next; | ||
989 | fib6_purge_rt(iter, fn, info->nl_net); | ||
990 | diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c | ||
991 | index 58900c21e4e4..8004532fa882 100644 | ||
992 | --- a/net/ipv6/ip6_output.c | ||
993 | +++ b/net/ipv6/ip6_output.c | ||
994 | @@ -742,13 +742,14 @@ slow_path: | ||
995 | * Fragment the datagram. | ||
996 | */ | ||
997 | |||
998 | - *prevhdr = NEXTHDR_FRAGMENT; | ||
999 | troom = rt->dst.dev->needed_tailroom; | ||
1000 | |||
1001 | /* | ||
1002 | * Keep copying data until we run out. | ||
1003 | */ | ||
1004 | while (left > 0) { | ||
1005 | + u8 *fragnexthdr_offset; | ||
1006 | + | ||
1007 | len = left; | ||
1008 | /* IF: it doesn't fit, use 'mtu' - the data space left */ | ||
1009 | if (len > mtu) | ||
1010 | @@ -793,6 +794,10 @@ slow_path: | ||
1011 | */ | ||
1012 | skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); | ||
1013 | |||
1014 | + fragnexthdr_offset = skb_network_header(frag); | ||
1015 | + fragnexthdr_offset += prevhdr - skb_network_header(skb); | ||
1016 | + *fragnexthdr_offset = NEXTHDR_FRAGMENT; | ||
1017 | + | ||
1018 | /* | ||
1019 | * Build fragment header. | ||
1020 | */ | ||
1021 | diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c | ||
1022 | index 0a8610b33d79..bdcc4d9cedd3 100644 | ||
1023 | --- a/net/ipv6/ip6_vti.c | ||
1024 | +++ b/net/ipv6/ip6_vti.c | ||
1025 | @@ -680,6 +680,10 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) | ||
1026 | u->link = p->link; | ||
1027 | u->i_key = p->i_key; | ||
1028 | u->o_key = p->o_key; | ||
1029 | + if (u->i_key) | ||
1030 | + u->i_flags |= GRE_KEY; | ||
1031 | + if (u->o_key) | ||
1032 | + u->o_flags |= GRE_KEY; | ||
1033 | u->proto = p->proto; | ||
1034 | |||
1035 | memcpy(u->name, p->name, sizeof(u->name)); | ||
1036 | diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c | ||
1037 | index 76a8c8057a23..1a63c4deef26 100644 | ||
1038 | --- a/net/ipv6/tcp_ipv6.c | ||
1039 | +++ b/net/ipv6/tcp_ipv6.c | ||
1040 | @@ -376,10 +376,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | ||
1041 | np = inet6_sk(sk); | ||
1042 | |||
1043 | if (type == NDISC_REDIRECT) { | ||
1044 | - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); | ||
1045 | + if (!sock_owned_by_user(sk)) { | ||
1046 | + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); | ||
1047 | |||
1048 | - if (dst) | ||
1049 | - dst->ops->redirect(dst, sk, skb); | ||
1050 | + if (dst) | ||
1051 | + dst->ops->redirect(dst, sk, skb); | ||
1052 | + } | ||
1053 | goto out; | ||
1054 | } | ||
1055 | |||
1056 | diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c | ||
1057 | index 445b7cd0826a..48ab93842322 100644 | ||
1058 | --- a/net/l2tp/l2tp_ip.c | ||
1059 | +++ b/net/l2tp/l2tp_ip.c | ||
1060 | @@ -383,7 +383,7 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb) | ||
1061 | drop: | ||
1062 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS); | ||
1063 | kfree_skb(skb); | ||
1064 | - return -1; | ||
1065 | + return 0; | ||
1066 | } | ||
1067 | |||
1068 | /* Userspace will call sendmsg() on the tunnel socket to send L2TP | ||
1069 | diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c | ||
1070 | index 881bc2072809..52cfc4478511 100644 | ||
1071 | --- a/net/mpls/af_mpls.c | ||
1072 | +++ b/net/mpls/af_mpls.c | ||
1073 | @@ -1567,6 +1567,7 @@ static void mpls_net_exit(struct net *net) | ||
1074 | for (index = 0; index < platform_labels; index++) { | ||
1075 | struct mpls_route *rt = rtnl_dereference(platform_label[index]); | ||
1076 | RCU_INIT_POINTER(platform_label[index], NULL); | ||
1077 | + mpls_notify_route(net, index, rt, NULL, NULL); | ||
1078 | mpls_rt_free(rt); | ||
1079 | } | ||
1080 | rtnl_unlock(); | ||
1081 | diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig | ||
1082 | index 2c5e95e9bfbd..5d6e8c05b3d4 100644 | ||
1083 | --- a/net/netlink/Kconfig | ||
1084 | +++ b/net/netlink/Kconfig | ||
1085 | @@ -2,15 +2,6 @@ | ||
1086 | # Netlink Sockets | ||
1087 | # | ||
1088 | |||
1089 | -config NETLINK_MMAP | ||
1090 | - bool "NETLINK: mmaped IO" | ||
1091 | - ---help--- | ||
1092 | - This option enables support for memory mapped netlink IO. This | ||
1093 | - reduces overhead by avoiding copying data between kernel- and | ||
1094 | - userspace. | ||
1095 | - | ||
1096 | - If unsure, say N. | ||
1097 | - | ||
1098 | config NETLINK_DIAG | ||
1099 | tristate "NETLINK: socket monitoring interface" | ||
1100 | default n | ||
1101 | diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c | ||
1102 | index 360700a2f46c..8e33019d8e7b 100644 | ||
1103 | --- a/net/netlink/af_netlink.c | ||
1104 | +++ b/net/netlink/af_netlink.c | ||
1105 | @@ -225,7 +225,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, | ||
1106 | |||
1107 | dev_hold(dev); | ||
1108 | |||
1109 | - if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) | ||
1110 | + if (is_vmalloc_addr(skb->head)) | ||
1111 | nskb = netlink_to_full_skb(skb, GFP_ATOMIC); | ||
1112 | else | ||
1113 | nskb = skb_clone(skb, GFP_ATOMIC); | ||
1114 | @@ -300,610 +300,8 @@ static void netlink_rcv_wake(struct sock *sk) | ||
1115 | wake_up_interruptible(&nlk->wait); | ||
1116 | } | ||
1117 | |||
1118 | -#ifdef CONFIG_NETLINK_MMAP | ||
1119 | -static bool netlink_rx_is_mmaped(struct sock *sk) | ||
1120 | -{ | ||
1121 | - return nlk_sk(sk)->rx_ring.pg_vec != NULL; | ||
1122 | -} | ||
1123 | - | ||
1124 | -static bool netlink_tx_is_mmaped(struct sock *sk) | ||
1125 | -{ | ||
1126 | - return nlk_sk(sk)->tx_ring.pg_vec != NULL; | ||
1127 | -} | ||
1128 | - | ||
1129 | -static __pure struct page *pgvec_to_page(const void *addr) | ||
1130 | -{ | ||
1131 | - if (is_vmalloc_addr(addr)) | ||
1132 | - return vmalloc_to_page(addr); | ||
1133 | - else | ||
1134 | - return virt_to_page(addr); | ||
1135 | -} | ||
1136 | - | ||
1137 | -static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len) | ||
1138 | -{ | ||
1139 | - unsigned int i; | ||
1140 | - | ||
1141 | - for (i = 0; i < len; i++) { | ||
1142 | - if (pg_vec[i] != NULL) { | ||
1143 | - if (is_vmalloc_addr(pg_vec[i])) | ||
1144 | - vfree(pg_vec[i]); | ||
1145 | - else | ||
1146 | - free_pages((unsigned long)pg_vec[i], order); | ||
1147 | - } | ||
1148 | - } | ||
1149 | - kfree(pg_vec); | ||
1150 | -} | ||
1151 | - | ||
1152 | -static void *alloc_one_pg_vec_page(unsigned long order) | ||
1153 | -{ | ||
1154 | - void *buffer; | ||
1155 | - gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | | ||
1156 | - __GFP_NOWARN | __GFP_NORETRY; | ||
1157 | - | ||
1158 | - buffer = (void *)__get_free_pages(gfp_flags, order); | ||
1159 | - if (buffer != NULL) | ||
1160 | - return buffer; | ||
1161 | - | ||
1162 | - buffer = vzalloc((1 << order) * PAGE_SIZE); | ||
1163 | - if (buffer != NULL) | ||
1164 | - return buffer; | ||
1165 | - | ||
1166 | - gfp_flags &= ~__GFP_NORETRY; | ||
1167 | - return (void *)__get_free_pages(gfp_flags, order); | ||
1168 | -} | ||
1169 | - | ||
1170 | -static void **alloc_pg_vec(struct netlink_sock *nlk, | ||
1171 | - struct nl_mmap_req *req, unsigned int order) | ||
1172 | -{ | ||
1173 | - unsigned int block_nr = req->nm_block_nr; | ||
1174 | - unsigned int i; | ||
1175 | - void **pg_vec; | ||
1176 | - | ||
1177 | - pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL); | ||
1178 | - if (pg_vec == NULL) | ||
1179 | - return NULL; | ||
1180 | - | ||
1181 | - for (i = 0; i < block_nr; i++) { | ||
1182 | - pg_vec[i] = alloc_one_pg_vec_page(order); | ||
1183 | - if (pg_vec[i] == NULL) | ||
1184 | - goto err1; | ||
1185 | - } | ||
1186 | - | ||
1187 | - return pg_vec; | ||
1188 | -err1: | ||
1189 | - free_pg_vec(pg_vec, order, block_nr); | ||
1190 | - return NULL; | ||
1191 | -} | ||
1192 | - | ||
1193 | - | ||
1194 | -static void | ||
1195 | -__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, | ||
1196 | - unsigned int order) | ||
1197 | -{ | ||
1198 | - struct netlink_sock *nlk = nlk_sk(sk); | ||
1199 | - struct sk_buff_head *queue; | ||
1200 | - struct netlink_ring *ring; | ||
1201 | - | ||
1202 | - queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; | ||
1203 | - ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; | ||
1204 | - | ||
1205 | - spin_lock_bh(&queue->lock); | ||
1206 | - | ||
1207 | - ring->frame_max = req->nm_frame_nr - 1; | ||
1208 | - ring->head = 0; | ||
1209 | - ring->frame_size = req->nm_frame_size; | ||
1210 | - ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; | ||
1211 | - | ||
1212 | - swap(ring->pg_vec_len, req->nm_block_nr); | ||
1213 | - swap(ring->pg_vec_order, order); | ||
1214 | - swap(ring->pg_vec, pg_vec); | ||
1215 | - | ||
1216 | - __skb_queue_purge(queue); | ||
1217 | - spin_unlock_bh(&queue->lock); | ||
1218 | - | ||
1219 | - WARN_ON(atomic_read(&nlk->mapped)); | ||
1220 | - | ||
1221 | - if (pg_vec) | ||
1222 | - free_pg_vec(pg_vec, order, req->nm_block_nr); | ||
1223 | -} | ||
1224 | - | ||
1225 | -static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, | ||
1226 | - bool tx_ring) | ||
1227 | -{ | ||
1228 | - struct netlink_sock *nlk = nlk_sk(sk); | ||
1229 | - struct netlink_ring *ring; | ||
1230 | - void **pg_vec = NULL; | ||
1231 | - unsigned int order = 0; | ||
1232 | - | ||
1233 | - ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; | ||
1234 | - | ||
1235 | - if (atomic_read(&nlk->mapped)) | ||
1236 | - return -EBUSY; | ||
1237 | - if (atomic_read(&ring->pending)) | ||
1238 | - return -EBUSY; | ||
1239 | - | ||
1240 | - if (req->nm_block_nr) { | ||
1241 | - if (ring->pg_vec != NULL) | ||
1242 | - return -EBUSY; | ||
1243 | - | ||
1244 | - if ((int)req->nm_block_size <= 0) | ||
1245 | - return -EINVAL; | ||
1246 | - if (!PAGE_ALIGNED(req->nm_block_size)) | ||
1247 | - return -EINVAL; | ||
1248 | - if (req->nm_frame_size < NL_MMAP_HDRLEN) | ||
1249 | - return -EINVAL; | ||
1250 | - if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT)) | ||
1251 | - return -EINVAL; | ||
1252 | - | ||
1253 | - ring->frames_per_block = req->nm_block_size / | ||
1254 | - req->nm_frame_size; | ||
1255 | - if (ring->frames_per_block == 0) | ||
1256 | - return -EINVAL; | ||
1257 | - if (ring->frames_per_block * req->nm_block_nr != | ||
1258 | - req->nm_frame_nr) | ||
1259 | - return -EINVAL; | ||
1260 | - | ||
1261 | - order = get_order(req->nm_block_size); | ||
1262 | - pg_vec = alloc_pg_vec(nlk, req, order); | ||
1263 | - if (pg_vec == NULL) | ||
1264 | - return -ENOMEM; | ||
1265 | - } else { | ||
1266 | - if (req->nm_frame_nr) | ||
1267 | - return -EINVAL; | ||
1268 | - } | ||
1269 | - | ||
1270 | - mutex_lock(&nlk->pg_vec_lock); | ||
1271 | - if (atomic_read(&nlk->mapped) == 0) { | ||
1272 | - __netlink_set_ring(sk, req, tx_ring, pg_vec, order); | ||
1273 | - mutex_unlock(&nlk->pg_vec_lock); | ||
1274 | - return 0; | ||
1275 | - } | ||
1276 | - | ||
1277 | - mutex_unlock(&nlk->pg_vec_lock); | ||
1278 | - | ||
1279 | - if (pg_vec) | ||
1280 | - free_pg_vec(pg_vec, order, req->nm_block_nr); | ||
1281 | - | ||
1282 | - return -EBUSY; | ||
1283 | -} | ||
1284 | - | ||
1285 | -static void netlink_mm_open(struct vm_area_struct *vma) | ||
1286 | -{ | ||
1287 | - struct file *file = vma->vm_file; | ||
1288 | - struct socket *sock = file->private_data; | ||
1289 | - struct sock *sk = sock->sk; | ||
1290 | - | ||
1291 | - if (sk) | ||
1292 | - atomic_inc(&nlk_sk(sk)->mapped); | ||
1293 | -} | ||
1294 | - | ||
1295 | -static void netlink_mm_close(struct vm_area_struct *vma) | ||
1296 | -{ | ||
1297 | - struct file *file = vma->vm_file; | ||
1298 | - struct socket *sock = file->private_data; | ||
1299 | - struct sock *sk = sock->sk; | ||
1300 | - | ||
1301 | - if (sk) | ||
1302 | - atomic_dec(&nlk_sk(sk)->mapped); | ||
1303 | -} | ||
1304 | - | ||
1305 | -static const struct vm_operations_struct netlink_mmap_ops = { | ||
1306 | - .open = netlink_mm_open, | ||
1307 | - .close = netlink_mm_close, | ||
1308 | -}; | ||
1309 | - | ||
1310 | -static int netlink_mmap(struct file *file, struct socket *sock, | ||
1311 | - struct vm_area_struct *vma) | ||
1312 | -{ | ||
1313 | - struct sock *sk = sock->sk; | ||
1314 | - struct netlink_sock *nlk = nlk_sk(sk); | ||
1315 | - struct netlink_ring *ring; | ||
1316 | - unsigned long start, size, expected; | ||
1317 | - unsigned int i; | ||
1318 | - int err = -EINVAL; | ||
1319 | - | ||
1320 | - if (vma->vm_pgoff) | ||
1321 | - return -EINVAL; | ||
1322 | - | ||
1323 | - mutex_lock(&nlk->pg_vec_lock); | ||
1324 | - | ||
1325 | - expected = 0; | ||
1326 | - for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { | ||
1327 | - if (ring->pg_vec == NULL) | ||
1328 | - continue; | ||
1329 | - expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE; | ||
1330 | - } | ||
1331 | - | ||
1332 | - if (expected == 0) | ||
1333 | - goto out; | ||
1334 | - | ||
1335 | - size = vma->vm_end - vma->vm_start; | ||
1336 | - if (size != expected) | ||
1337 | - goto out; | ||
1338 | - | ||
1339 | - start = vma->vm_start; | ||
1340 | - for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { | ||
1341 | - if (ring->pg_vec == NULL) | ||
1342 | - continue; | ||
1343 | - | ||
1344 | - for (i = 0; i < ring->pg_vec_len; i++) { | ||
1345 | - struct page *page; | ||
1346 | - void *kaddr = ring->pg_vec[i]; | ||
1347 | - unsigned int pg_num; | ||
1348 | - | ||
1349 | - for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) { | ||
1350 | - page = pgvec_to_page(kaddr); | ||
1351 | - err = vm_insert_page(vma, start, page); | ||
1352 | - if (err < 0) | ||
1353 | - goto out; | ||
1354 | - start += PAGE_SIZE; | ||
1355 | - kaddr += PAGE_SIZE; | ||
1356 | - } | ||
1357 | - } | ||
1358 | - } | ||
1359 | - | ||
1360 | - atomic_inc(&nlk->mapped); | ||
1361 | - vma->vm_ops = &netlink_mmap_ops; | ||
1362 | - err = 0; | ||
1363 | -out: | ||
1364 | - mutex_unlock(&nlk->pg_vec_lock); | ||
1365 | - return err; | ||
1366 | -} | ||
1367 | - | ||
1368 | -static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len) | ||
1369 | -{ | ||
1370 | -#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 | ||
1371 | - struct page *p_start, *p_end; | ||
1372 | - | ||
1373 | - /* First page is flushed through netlink_{get,set}_status */ | ||
1374 | - p_start = pgvec_to_page(hdr + PAGE_SIZE); | ||
1375 | - p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1); | ||
1376 | - while (p_start <= p_end) { | ||
1377 | - flush_dcache_page(p_start); | ||
1378 | - p_start++; | ||
1379 | - } | ||
1380 | -#endif | ||
1381 | -} | ||
1382 | - | ||
1383 | -static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) | ||
1384 | -{ | ||
1385 | - smp_rmb(); | ||
1386 | - flush_dcache_page(pgvec_to_page(hdr)); | ||
1387 | - return hdr->nm_status; | ||
1388 | -} | ||
1389 | - | ||
1390 | -static void netlink_set_status(struct nl_mmap_hdr *hdr, | ||
1391 | - enum nl_mmap_status status) | ||
1392 | -{ | ||
1393 | - smp_mb(); | ||
1394 | - hdr->nm_status = status; | ||
1395 | - flush_dcache_page(pgvec_to_page(hdr)); | ||
1396 | -} | ||
1397 | - | ||
1398 | -static struct nl_mmap_hdr * | ||
1399 | -__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos) | ||
1400 | -{ | ||
1401 | - unsigned int pg_vec_pos, frame_off; | ||
1402 | - | ||
1403 | - pg_vec_pos = pos / ring->frames_per_block; | ||
1404 | - frame_off = pos % ring->frames_per_block; | ||
1405 | - | ||
1406 | - return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size); | ||
1407 | -} | ||
1408 | - | ||
1409 | -static struct nl_mmap_hdr * | ||
1410 | -netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos, | ||
1411 | - enum nl_mmap_status status) | ||
1412 | -{ | ||
1413 | - struct nl_mmap_hdr *hdr; | ||
1414 | - | ||
1415 | - hdr = __netlink_lookup_frame(ring, pos); | ||
1416 | - if (netlink_get_status(hdr) != status) | ||
1417 | - return NULL; | ||
1418 | - | ||
1419 | - return hdr; | ||
1420 | -} | ||
1421 | - | ||
1422 | -static struct nl_mmap_hdr * | ||
1423 | -netlink_current_frame(const struct netlink_ring *ring, | ||
1424 | - enum nl_mmap_status status) | ||
1425 | -{ | ||
1426 | - return netlink_lookup_frame(ring, ring->head, status); | ||
1427 | -} | ||
1428 | - | ||
1429 | -static void netlink_increment_head(struct netlink_ring *ring) | ||
1430 | -{ | ||
1431 | - ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0; | ||
1432 | -} | ||
1433 | - | ||
1434 | -static void netlink_forward_ring(struct netlink_ring *ring) | ||
1435 | -{ | ||
1436 | - unsigned int head = ring->head; | ||
1437 | - const struct nl_mmap_hdr *hdr; | ||
1438 | - | ||
1439 | - do { | ||
1440 | - hdr = __netlink_lookup_frame(ring, ring->head); | ||
1441 | - if (hdr->nm_status == NL_MMAP_STATUS_UNUSED) | ||
1442 | - break; | ||
1443 | - if (hdr->nm_status != NL_MMAP_STATUS_SKIP) | ||
1444 | - break; | ||
1445 | - netlink_increment_head(ring); | ||
1446 | - } while (ring->head != head); | ||
1447 | -} | ||
1448 | - | ||
1449 | -static bool netlink_has_valid_frame(struct netlink_ring *ring) | ||
1450 | -{ | ||
1451 | - unsigned int head = ring->head, pos = head; | ||
1452 | - const struct nl_mmap_hdr *hdr; | ||
1453 | - | ||
1454 | - do { | ||
1455 | - hdr = __netlink_lookup_frame(ring, pos); | ||
1456 | - if (hdr->nm_status == NL_MMAP_STATUS_VALID) | ||
1457 | - return true; | ||
1458 | - pos = pos != 0 ? pos - 1 : ring->frame_max; | ||
1459 | - } while (pos != head); | ||
1460 | - | ||
1461 | - return false; | ||
1462 | -} | ||
1463 | - | ||
1464 | -static bool netlink_dump_space(struct netlink_sock *nlk) | ||
1465 | -{ | ||
1466 | - struct netlink_ring *ring = &nlk->rx_ring; | ||
1467 | - struct nl_mmap_hdr *hdr; | ||
1468 | - unsigned int n; | ||
1469 | - | ||
1470 | - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); | ||
1471 | - if (hdr == NULL) | ||
1472 | - return false; | ||
1473 | - | ||
1474 | - n = ring->head + ring->frame_max / 2; | ||
1475 | - if (n > ring->frame_max) | ||
1476 | - n -= ring->frame_max; | ||
1477 | - | ||
1478 | - hdr = __netlink_lookup_frame(ring, n); | ||
1479 | - | ||
1480 | - return hdr->nm_status == NL_MMAP_STATUS_UNUSED; | ||
1481 | -} | ||
1482 | - | ||
1483 | -static unsigned int netlink_poll(struct file *file, struct socket *sock, | ||
1484 | - poll_table *wait) | ||
1485 | -{ | ||
1486 | - struct sock *sk = sock->sk; | ||
1487 | - struct netlink_sock *nlk = nlk_sk(sk); | ||
1488 | - unsigned int mask; | ||
1489 | - int err; | ||
1490 | - | ||
1491 | - if (nlk->rx_ring.pg_vec != NULL) { | ||
1492 | - /* Memory mapped sockets don't call recvmsg(), so flow control | ||
1493 | - * for dumps is performed here. A dump is allowed to continue | ||
1494 | - * if at least half the ring is unused. | ||
1495 | - */ | ||
1496 | - while (nlk->cb_running && netlink_dump_space(nlk)) { | ||
1497 | - err = netlink_dump(sk); | ||
1498 | - if (err < 0) { | ||
1499 | - sk->sk_err = -err; | ||
1500 | - sk->sk_error_report(sk); | ||
1501 | - break; | ||
1502 | - } | ||
1503 | - } | ||
1504 | - netlink_rcv_wake(sk); | ||
1505 | - } | ||
1506 | - | ||
1507 | - mask = datagram_poll(file, sock, wait); | ||
1508 | - | ||
1509 | - /* We could already have received frames in the normal receive | ||
1510 | - * queue, that will show up as NL_MMAP_STATUS_COPY in the ring, | ||
1511 | - * so if mask contains pollin/etc already, there's no point | ||
1512 | - * walking the ring. | ||
1513 | - */ | ||
1514 | - if ((mask & (POLLIN | POLLRDNORM)) != (POLLIN | POLLRDNORM)) { | ||
1515 | - spin_lock_bh(&sk->sk_receive_queue.lock); | ||
1516 | - if (nlk->rx_ring.pg_vec) { | ||
1517 | - if (netlink_has_valid_frame(&nlk->rx_ring)) | ||
1518 | - mask |= POLLIN | POLLRDNORM; | ||
1519 | - } | ||
1520 | - spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
1521 | - } | ||
1522 | - | ||
1523 | - spin_lock_bh(&sk->sk_write_queue.lock); | ||
1524 | - if (nlk->tx_ring.pg_vec) { | ||
1525 | - if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED)) | ||
1526 | - mask |= POLLOUT | POLLWRNORM; | ||
1527 | - } | ||
1528 | - spin_unlock_bh(&sk->sk_write_queue.lock); | ||
1529 | - | ||
1530 | - return mask; | ||
1531 | -} | ||
1532 | - | ||
1533 | -static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb) | ||
1534 | -{ | ||
1535 | - return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN); | ||
1536 | -} | ||
1537 | - | ||
1538 | -static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, | ||
1539 | - struct netlink_ring *ring, | ||
1540 | - struct nl_mmap_hdr *hdr) | ||
1541 | -{ | ||
1542 | - unsigned int size; | ||
1543 | - void *data; | ||
1544 | - | ||
1545 | - size = ring->frame_size - NL_MMAP_HDRLEN; | ||
1546 | - data = (void *)hdr + NL_MMAP_HDRLEN; | ||
1547 | - | ||
1548 | - skb->head = data; | ||
1549 | - skb->data = data; | ||
1550 | - skb_reset_tail_pointer(skb); | ||
1551 | - skb->end = skb->tail + size; | ||
1552 | - skb->len = 0; | ||
1553 | - | ||
1554 | - skb->destructor = netlink_skb_destructor; | ||
1555 | - NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED; | ||
1556 | - NETLINK_CB(skb).sk = sk; | ||
1557 | -} | ||
1558 | - | ||
1559 | -static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, | ||
1560 | - u32 dst_portid, u32 dst_group, | ||
1561 | - struct scm_cookie *scm) | ||
1562 | -{ | ||
1563 | - struct netlink_sock *nlk = nlk_sk(sk); | ||
1564 | - struct netlink_ring *ring; | ||
1565 | - struct nl_mmap_hdr *hdr; | ||
1566 | - struct sk_buff *skb; | ||
1567 | - unsigned int maxlen; | ||
1568 | - int err = 0, len = 0; | ||
1569 | - | ||
1570 | - mutex_lock(&nlk->pg_vec_lock); | ||
1571 | - | ||
1572 | - ring = &nlk->tx_ring; | ||
1573 | - maxlen = ring->frame_size - NL_MMAP_HDRLEN; | ||
1574 | - | ||
1575 | - do { | ||
1576 | - unsigned int nm_len; | ||
1577 | - | ||
1578 | - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); | ||
1579 | - if (hdr == NULL) { | ||
1580 | - if (!(msg->msg_flags & MSG_DONTWAIT) && | ||
1581 | - atomic_read(&nlk->tx_ring.pending)) | ||
1582 | - schedule(); | ||
1583 | - continue; | ||
1584 | - } | ||
1585 | - | ||
1586 | - nm_len = ACCESS_ONCE(hdr->nm_len); | ||
1587 | - if (nm_len > maxlen) { | ||
1588 | - err = -EINVAL; | ||
1589 | - goto out; | ||
1590 | - } | ||
1591 | - | ||
1592 | - netlink_frame_flush_dcache(hdr, nm_len); | ||
1593 | - | ||
1594 | - skb = alloc_skb(nm_len, GFP_KERNEL); | ||
1595 | - if (skb == NULL) { | ||
1596 | - err = -ENOBUFS; | ||
1597 | - goto out; | ||
1598 | - } | ||
1599 | - __skb_put(skb, nm_len); | ||
1600 | - memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len); | ||
1601 | - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); | ||
1602 | - | ||
1603 | - netlink_increment_head(ring); | ||
1604 | - | ||
1605 | - NETLINK_CB(skb).portid = nlk->portid; | ||
1606 | - NETLINK_CB(skb).dst_group = dst_group; | ||
1607 | - NETLINK_CB(skb).creds = scm->creds; | ||
1608 | - | ||
1609 | - err = security_netlink_send(sk, skb); | ||
1610 | - if (err) { | ||
1611 | - kfree_skb(skb); | ||
1612 | - goto out; | ||
1613 | - } | ||
1614 | - | ||
1615 | - if (unlikely(dst_group)) { | ||
1616 | - atomic_inc(&skb->users); | ||
1617 | - netlink_broadcast(sk, skb, dst_portid, dst_group, | ||
1618 | - GFP_KERNEL); | ||
1619 | - } | ||
1620 | - err = netlink_unicast(sk, skb, dst_portid, | ||
1621 | - msg->msg_flags & MSG_DONTWAIT); | ||
1622 | - if (err < 0) | ||
1623 | - goto out; | ||
1624 | - len += err; | ||
1625 | - | ||
1626 | - } while (hdr != NULL || | ||
1627 | - (!(msg->msg_flags & MSG_DONTWAIT) && | ||
1628 | - atomic_read(&nlk->tx_ring.pending))); | ||
1629 | - | ||
1630 | - if (len > 0) | ||
1631 | - err = len; | ||
1632 | -out: | ||
1633 | - mutex_unlock(&nlk->pg_vec_lock); | ||
1634 | - return err; | ||
1635 | -} | ||
1636 | - | ||
1637 | -static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) | ||
1638 | -{ | ||
1639 | - struct nl_mmap_hdr *hdr; | ||
1640 | - | ||
1641 | - hdr = netlink_mmap_hdr(skb); | ||
1642 | - hdr->nm_len = skb->len; | ||
1643 | - hdr->nm_group = NETLINK_CB(skb).dst_group; | ||
1644 | - hdr->nm_pid = NETLINK_CB(skb).creds.pid; | ||
1645 | - hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); | ||
1646 | - hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); | ||
1647 | - netlink_frame_flush_dcache(hdr, hdr->nm_len); | ||
1648 | - netlink_set_status(hdr, NL_MMAP_STATUS_VALID); | ||
1649 | - | ||
1650 | - NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; | ||
1651 | - kfree_skb(skb); | ||
1652 | -} | ||
1653 | - | ||
1654 | -static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) | ||
1655 | -{ | ||
1656 | - struct netlink_sock *nlk = nlk_sk(sk); | ||
1657 | - struct netlink_ring *ring = &nlk->rx_ring; | ||
1658 | - struct nl_mmap_hdr *hdr; | ||
1659 | - | ||
1660 | - spin_lock_bh(&sk->sk_receive_queue.lock); | ||
1661 | - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); | ||
1662 | - if (hdr == NULL) { | ||
1663 | - spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
1664 | - kfree_skb(skb); | ||
1665 | - netlink_overrun(sk); | ||
1666 | - return; | ||
1667 | - } | ||
1668 | - netlink_increment_head(ring); | ||
1669 | - __skb_queue_tail(&sk->sk_receive_queue, skb); | ||
1670 | - spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
1671 | - | ||
1672 | - hdr->nm_len = skb->len; | ||
1673 | - hdr->nm_group = NETLINK_CB(skb).dst_group; | ||
1674 | - hdr->nm_pid = NETLINK_CB(skb).creds.pid; | ||
1675 | - hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); | ||
1676 | - hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); | ||
1677 | - netlink_set_status(hdr, NL_MMAP_STATUS_COPY); | ||
1678 | -} | ||
1679 | - | ||
1680 | -#else /* CONFIG_NETLINK_MMAP */ | ||
1681 | -#define netlink_rx_is_mmaped(sk) false | ||
1682 | -#define netlink_tx_is_mmaped(sk) false | ||
1683 | -#define netlink_mmap sock_no_mmap | ||
1684 | -#define netlink_poll datagram_poll | ||
1685 | -#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm) 0 | ||
1686 | -#endif /* CONFIG_NETLINK_MMAP */ | ||
1687 | - | ||
1688 | static void netlink_skb_destructor(struct sk_buff *skb) | ||
1689 | { | ||
1690 | -#ifdef CONFIG_NETLINK_MMAP | ||
1691 | - struct nl_mmap_hdr *hdr; | ||
1692 | - struct netlink_ring *ring; | ||
1693 | - struct sock *sk; | ||
1694 | - | ||
1695 | - /* If a packet from the kernel to userspace was freed because of an | ||
1696 | - * error without being delivered to userspace, the kernel must reset | ||
1697 | - * the status. In the direction userspace to kernel, the status is | ||
1698 | - * always reset here after the packet was processed and freed. | ||
1699 | - */ | ||
1700 | - if (netlink_skb_is_mmaped(skb)) { | ||
1701 | - hdr = netlink_mmap_hdr(skb); | ||
1702 | - sk = NETLINK_CB(skb).sk; | ||
1703 | - | ||
1704 | - if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) { | ||
1705 | - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); | ||
1706 | - ring = &nlk_sk(sk)->tx_ring; | ||
1707 | - } else { | ||
1708 | - if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) { | ||
1709 | - hdr->nm_len = 0; | ||
1710 | - netlink_set_status(hdr, NL_MMAP_STATUS_VALID); | ||
1711 | - } | ||
1712 | - ring = &nlk_sk(sk)->rx_ring; | ||
1713 | - } | ||
1714 | - | ||
1715 | - WARN_ON(atomic_read(&ring->pending) == 0); | ||
1716 | - atomic_dec(&ring->pending); | ||
1717 | - sock_put(sk); | ||
1718 | - | ||
1719 | - skb->head = NULL; | ||
1720 | - } | ||
1721 | -#endif | ||
1722 | if (is_vmalloc_addr(skb->head)) { | ||
1723 | if (!skb->cloned || | ||
1724 | !atomic_dec_return(&(skb_shinfo(skb)->dataref))) | ||
1725 | @@ -936,18 +334,6 @@ static void netlink_sock_destruct(struct sock *sk) | ||
1726 | } | ||
1727 | |||
1728 | skb_queue_purge(&sk->sk_receive_queue); | ||
1729 | -#ifdef CONFIG_NETLINK_MMAP | ||
1730 | - if (1) { | ||
1731 | - struct nl_mmap_req req; | ||
1732 | - | ||
1733 | - memset(&req, 0, sizeof(req)); | ||
1734 | - if (nlk->rx_ring.pg_vec) | ||
1735 | - __netlink_set_ring(sk, &req, false, NULL, 0); | ||
1736 | - memset(&req, 0, sizeof(req)); | ||
1737 | - if (nlk->tx_ring.pg_vec) | ||
1738 | - __netlink_set_ring(sk, &req, true, NULL, 0); | ||
1739 | - } | ||
1740 | -#endif /* CONFIG_NETLINK_MMAP */ | ||
1741 | |||
1742 | if (!sock_flag(sk, SOCK_DEAD)) { | ||
1743 | printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); | ||
1744 | @@ -1201,9 +587,6 @@ static int __netlink_create(struct net *net, struct socket *sock, | ||
1745 | mutex_init(nlk->cb_mutex); | ||
1746 | } | ||
1747 | init_waitqueue_head(&nlk->wait); | ||
1748 | -#ifdef CONFIG_NETLINK_MMAP | ||
1749 | - mutex_init(&nlk->pg_vec_lock); | ||
1750 | -#endif | ||
1751 | |||
1752 | sk->sk_destruct = netlink_sock_destruct; | ||
1753 | sk->sk_protocol = protocol; | ||
1754 | @@ -1745,8 +1128,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, | ||
1755 | nlk = nlk_sk(sk); | ||
1756 | |||
1757 | if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || | ||
1758 | - test_bit(NETLINK_S_CONGESTED, &nlk->state)) && | ||
1759 | - !netlink_skb_is_mmaped(skb)) { | ||
1760 | + test_bit(NETLINK_S_CONGESTED, &nlk->state))) { | ||
1761 | DECLARE_WAITQUEUE(wait, current); | ||
1762 | if (!*timeo) { | ||
1763 | if (!ssk || netlink_is_kernel(ssk)) | ||
1764 | @@ -1784,14 +1166,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) | ||
1765 | |||
1766 | netlink_deliver_tap(skb); | ||
1767 | |||
1768 | -#ifdef CONFIG_NETLINK_MMAP | ||
1769 | - if (netlink_skb_is_mmaped(skb)) | ||
1770 | - netlink_queue_mmaped_skb(sk, skb); | ||
1771 | - else if (netlink_rx_is_mmaped(sk)) | ||
1772 | - netlink_ring_set_copied(sk, skb); | ||
1773 | - else | ||
1774 | -#endif /* CONFIG_NETLINK_MMAP */ | ||
1775 | - skb_queue_tail(&sk->sk_receive_queue, skb); | ||
1776 | + skb_queue_tail(&sk->sk_receive_queue, skb); | ||
1777 | sk->sk_data_ready(sk); | ||
1778 | return len; | ||
1779 | } | ||
1780 | @@ -1815,9 +1190,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) | ||
1781 | int delta; | ||
1782 | |||
1783 | WARN_ON(skb->sk != NULL); | ||
1784 | - if (netlink_skb_is_mmaped(skb)) | ||
1785 | - return skb; | ||
1786 | - | ||
1787 | delta = skb->end - skb->tail; | ||
1788 | if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) | ||
1789 | return skb; | ||
1790 | @@ -1897,71 +1269,6 @@ struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size, | ||
1791 | unsigned int ldiff, u32 dst_portid, | ||
1792 | gfp_t gfp_mask) | ||
1793 | { | ||
1794 | -#ifdef CONFIG_NETLINK_MMAP | ||
1795 | - unsigned int maxlen, linear_size; | ||
1796 | - struct sock *sk = NULL; | ||
1797 | - struct sk_buff *skb; | ||
1798 | - struct netlink_ring *ring; | ||
1799 | - struct nl_mmap_hdr *hdr; | ||
1800 | - | ||
1801 | - sk = netlink_getsockbyportid(ssk, dst_portid); | ||
1802 | - if (IS_ERR(sk)) | ||
1803 | - goto out; | ||
1804 | - | ||
1805 | - ring = &nlk_sk(sk)->rx_ring; | ||
1806 | - /* fast-path without atomic ops for common case: non-mmaped receiver */ | ||
1807 | - if (ring->pg_vec == NULL) | ||
1808 | - goto out_put; | ||
1809 | - | ||
1810 | - /* We need to account the full linear size needed as a ring | ||
1811 | - * slot cannot have non-linear parts. | ||
1812 | - */ | ||
1813 | - linear_size = size + ldiff; | ||
1814 | - if (ring->frame_size - NL_MMAP_HDRLEN < linear_size) | ||
1815 | - goto out_put; | ||
1816 | - | ||
1817 | - skb = alloc_skb_head(gfp_mask); | ||
1818 | - if (skb == NULL) | ||
1819 | - goto err1; | ||
1820 | - | ||
1821 | - spin_lock_bh(&sk->sk_receive_queue.lock); | ||
1822 | - /* check again under lock */ | ||
1823 | - if (ring->pg_vec == NULL) | ||
1824 | - goto out_free; | ||
1825 | - | ||
1826 | - /* check again under lock */ | ||
1827 | - maxlen = ring->frame_size - NL_MMAP_HDRLEN; | ||
1828 | - if (maxlen < linear_size) | ||
1829 | - goto out_free; | ||
1830 | - | ||
1831 | - netlink_forward_ring(ring); | ||
1832 | - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); | ||
1833 | - if (hdr == NULL) | ||
1834 | - goto err2; | ||
1835 | - | ||
1836 | - netlink_ring_setup_skb(skb, sk, ring, hdr); | ||
1837 | - netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); | ||
1838 | - atomic_inc(&ring->pending); | ||
1839 | - netlink_increment_head(ring); | ||
1840 | - | ||
1841 | - spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
1842 | - return skb; | ||
1843 | - | ||
1844 | -err2: | ||
1845 | - kfree_skb(skb); | ||
1846 | - spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
1847 | - netlink_overrun(sk); | ||
1848 | -err1: | ||
1849 | - sock_put(sk); | ||
1850 | - return NULL; | ||
1851 | - | ||
1852 | -out_free: | ||
1853 | - kfree_skb(skb); | ||
1854 | - spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
1855 | -out_put: | ||
1856 | - sock_put(sk); | ||
1857 | -out: | ||
1858 | -#endif | ||
1859 | return alloc_skb(size, gfp_mask); | ||
1860 | } | ||
1861 | EXPORT_SYMBOL_GPL(__netlink_alloc_skb); | ||
1862 | @@ -2242,8 +1549,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, | ||
1863 | if (level != SOL_NETLINK) | ||
1864 | return -ENOPROTOOPT; | ||
1865 | |||
1866 | - if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING && | ||
1867 | - optlen >= sizeof(int) && | ||
1868 | + if (optlen >= sizeof(int) && | ||
1869 | get_user(val, (unsigned int __user *)optval)) | ||
1870 | return -EFAULT; | ||
1871 | |||
1872 | @@ -2296,25 +1602,6 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, | ||
1873 | } | ||
1874 | err = 0; | ||
1875 | break; | ||
1876 | -#ifdef CONFIG_NETLINK_MMAP | ||
1877 | - case NETLINK_RX_RING: | ||
1878 | - case NETLINK_TX_RING: { | ||
1879 | - struct nl_mmap_req req; | ||
1880 | - | ||
1881 | - /* Rings might consume more memory than queue limits, require | ||
1882 | - * CAP_NET_ADMIN. | ||
1883 | - */ | ||
1884 | - if (!capable(CAP_NET_ADMIN)) | ||
1885 | - return -EPERM; | ||
1886 | - if (optlen < sizeof(req)) | ||
1887 | - return -EINVAL; | ||
1888 | - if (copy_from_user(&req, optval, sizeof(req))) | ||
1889 | - return -EFAULT; | ||
1890 | - err = netlink_set_ring(sk, &req, | ||
1891 | - optname == NETLINK_TX_RING); | ||
1892 | - break; | ||
1893 | - } | ||
1894 | -#endif /* CONFIG_NETLINK_MMAP */ | ||
1895 | case NETLINK_LISTEN_ALL_NSID: | ||
1896 | if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) | ||
1897 | return -EPERM; | ||
1898 | @@ -2484,18 +1771,6 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) | ||
1899 | smp_rmb(); | ||
1900 | } | ||
1901 | |||
1902 | - /* It's a really convoluted way for userland to ask for mmaped | ||
1903 | - * sendmsg(), but that's what we've got... | ||
1904 | - */ | ||
1905 | - if (netlink_tx_is_mmaped(sk) && | ||
1906 | - iter_is_iovec(&msg->msg_iter) && | ||
1907 | - msg->msg_iter.nr_segs == 1 && | ||
1908 | - msg->msg_iter.iov->iov_base == NULL) { | ||
1909 | - err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, | ||
1910 | - &scm); | ||
1911 | - goto out; | ||
1912 | - } | ||
1913 | - | ||
1914 | err = -EMSGSIZE; | ||
1915 | if (len > sk->sk_sndbuf - 32) | ||
1916 | goto out; | ||
1917 | @@ -2812,8 +2087,7 @@ static int netlink_dump(struct sock *sk) | ||
1918 | goto errout_skb; | ||
1919 | } | ||
1920 | |||
1921 | - if (!netlink_rx_is_mmaped(sk) && | ||
1922 | - atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) | ||
1923 | + if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) | ||
1924 | goto errout_skb; | ||
1925 | |||
1926 | /* NLMSG_GOODSIZE is small to avoid high order allocations being | ||
1927 | @@ -2902,16 +2176,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, | ||
1928 | struct netlink_sock *nlk; | ||
1929 | int ret; | ||
1930 | |||
1931 | - /* Memory mapped dump requests need to be copied to avoid looping | ||
1932 | - * on the pending state in netlink_mmap_sendmsg() while the CB hold | ||
1933 | - * a reference to the skb. | ||
1934 | - */ | ||
1935 | - if (netlink_skb_is_mmaped(skb)) { | ||
1936 | - skb = skb_copy(skb, GFP_KERNEL); | ||
1937 | - if (skb == NULL) | ||
1938 | - return -ENOBUFS; | ||
1939 | - } else | ||
1940 | - atomic_inc(&skb->users); | ||
1941 | + atomic_inc(&skb->users); | ||
1942 | |||
1943 | sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); | ||
1944 | if (sk == NULL) { | ||
1945 | @@ -3255,7 +2520,7 @@ static const struct proto_ops netlink_ops = { | ||
1946 | .socketpair = sock_no_socketpair, | ||
1947 | .accept = sock_no_accept, | ||
1948 | .getname = netlink_getname, | ||
1949 | - .poll = netlink_poll, | ||
1950 | + .poll = datagram_poll, | ||
1951 | .ioctl = sock_no_ioctl, | ||
1952 | .listen = sock_no_listen, | ||
1953 | .shutdown = sock_no_shutdown, | ||
1954 | @@ -3263,7 +2528,7 @@ static const struct proto_ops netlink_ops = { | ||
1955 | .getsockopt = netlink_getsockopt, | ||
1956 | .sendmsg = netlink_sendmsg, | ||
1957 | .recvmsg = netlink_recvmsg, | ||
1958 | - .mmap = netlink_mmap, | ||
1959 | + .mmap = sock_no_mmap, | ||
1960 | .sendpage = sock_no_sendpage, | ||
1961 | }; | ||
1962 | |||
1963 | diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h | ||
1964 | index df32cb92d9fc..ea4600aea6b0 100644 | ||
1965 | --- a/net/netlink/af_netlink.h | ||
1966 | +++ b/net/netlink/af_netlink.h | ||
1967 | @@ -45,12 +45,6 @@ struct netlink_sock { | ||
1968 | int (*netlink_bind)(struct net *net, int group); | ||
1969 | void (*netlink_unbind)(struct net *net, int group); | ||
1970 | struct module *module; | ||
1971 | -#ifdef CONFIG_NETLINK_MMAP | ||
1972 | - struct mutex pg_vec_lock; | ||
1973 | - struct netlink_ring rx_ring; | ||
1974 | - struct netlink_ring tx_ring; | ||
1975 | - atomic_t mapped; | ||
1976 | -#endif /* CONFIG_NETLINK_MMAP */ | ||
1977 | |||
1978 | struct rhash_head node; | ||
1979 | struct rcu_head rcu; | ||
1980 | @@ -62,15 +56,6 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk) | ||
1981 | return container_of(sk, struct netlink_sock, sk); | ||
1982 | } | ||
1983 | |||
1984 | -static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb) | ||
1985 | -{ | ||
1986 | -#ifdef CONFIG_NETLINK_MMAP | ||
1987 | - return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; | ||
1988 | -#else | ||
1989 | - return false; | ||
1990 | -#endif /* CONFIG_NETLINK_MMAP */ | ||
1991 | -} | ||
1992 | - | ||
1993 | struct netlink_table { | ||
1994 | struct rhashtable hash; | ||
1995 | struct hlist_head mc_list; | ||
1996 | diff --git a/net/netlink/diag.c b/net/netlink/diag.c | ||
1997 | index 3ee63a3cff30..8dd836a8dd60 100644 | ||
1998 | --- a/net/netlink/diag.c | ||
1999 | +++ b/net/netlink/diag.c | ||
2000 | @@ -8,41 +8,6 @@ | ||
2001 | |||
2002 | #include "af_netlink.h" | ||
2003 | |||
2004 | -#ifdef CONFIG_NETLINK_MMAP | ||
2005 | -static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type, | ||
2006 | - struct sk_buff *nlskb) | ||
2007 | -{ | ||
2008 | - struct netlink_diag_ring ndr; | ||
2009 | - | ||
2010 | - ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT; | ||
2011 | - ndr.ndr_block_nr = ring->pg_vec_len; | ||
2012 | - ndr.ndr_frame_size = ring->frame_size; | ||
2013 | - ndr.ndr_frame_nr = ring->frame_max + 1; | ||
2014 | - | ||
2015 | - return nla_put(nlskb, nl_type, sizeof(ndr), &ndr); | ||
2016 | -} | ||
2017 | - | ||
2018 | -static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) | ||
2019 | -{ | ||
2020 | - struct netlink_sock *nlk = nlk_sk(sk); | ||
2021 | - int ret; | ||
2022 | - | ||
2023 | - mutex_lock(&nlk->pg_vec_lock); | ||
2024 | - ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb); | ||
2025 | - if (!ret) | ||
2026 | - ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING, | ||
2027 | - nlskb); | ||
2028 | - mutex_unlock(&nlk->pg_vec_lock); | ||
2029 | - | ||
2030 | - return ret; | ||
2031 | -} | ||
2032 | -#else | ||
2033 | -static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) | ||
2034 | -{ | ||
2035 | - return 0; | ||
2036 | -} | ||
2037 | -#endif | ||
2038 | - | ||
2039 | static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) | ||
2040 | { | ||
2041 | struct netlink_sock *nlk = nlk_sk(sk); | ||
2042 | @@ -87,10 +52,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, | ||
2043 | sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) | ||
2044 | goto out_nlmsg_trim; | ||
2045 | |||
2046 | - if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) && | ||
2047 | - sk_diag_put_rings_cfg(sk, skb)) | ||
2048 | - goto out_nlmsg_trim; | ||
2049 | - | ||
2050 | nlmsg_end(skb, nlh); | ||
2051 | return 0; | ||
2052 | |||
2053 | diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c | ||
2054 | index d805cd577a60..3975ac809934 100644 | ||
2055 | --- a/net/packet/af_packet.c | ||
2056 | +++ b/net/packet/af_packet.c | ||
2057 | @@ -3021,7 +3021,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, | ||
2058 | int addr_len) | ||
2059 | { | ||
2060 | struct sock *sk = sock->sk; | ||
2061 | - char name[15]; | ||
2062 | + char name[sizeof(uaddr->sa_data) + 1]; | ||
2063 | |||
2064 | /* | ||
2065 | * Check legality | ||
2066 | @@ -3029,7 +3029,11 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, | ||
2067 | |||
2068 | if (addr_len != sizeof(struct sockaddr)) | ||
2069 | return -EINVAL; | ||
2070 | - strlcpy(name, uaddr->sa_data, sizeof(name)); | ||
2071 | + /* uaddr->sa_data comes from the userspace, it's not guaranteed to be | ||
2072 | + * zero-terminated. | ||
2073 | + */ | ||
2074 | + memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); | ||
2075 | + name[sizeof(uaddr->sa_data)] = 0; | ||
2076 | |||
2077 | return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); | ||
2078 | } | ||
2079 | diff --git a/net/sched/act_api.c b/net/sched/act_api.c | ||
2080 | index 06e7c4a37245..694a06f1e0d5 100644 | ||
2081 | --- a/net/sched/act_api.c | ||
2082 | +++ b/net/sched/act_api.c | ||
2083 | @@ -820,10 +820,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, | ||
2084 | goto out_module_put; | ||
2085 | |||
2086 | err = a.ops->walk(skb, &dcb, RTM_DELACTION, &a); | ||
2087 | - if (err < 0) | ||
2088 | + if (err <= 0) | ||
2089 | goto out_module_put; | ||
2090 | - if (err == 0) | ||
2091 | - goto noflush_out; | ||
2092 | |||
2093 | nla_nest_end(skb, nest); | ||
2094 | |||
2095 | @@ -840,7 +838,6 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, | ||
2096 | out_module_put: | ||
2097 | module_put(a.ops->owner); | ||
2098 | err_out: | ||
2099 | -noflush_out: | ||
2100 | kfree_skb(skb); | ||
2101 | return err; | ||
2102 | } | ||
2103 | diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c | ||
2104 | index bb41699c6c49..7ecb14f3db54 100644 | ||
2105 | --- a/net/sched/act_connmark.c | ||
2106 | +++ b/net/sched/act_connmark.c | ||
2107 | @@ -109,6 +109,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, | ||
2108 | if (ret < 0) | ||
2109 | return ret; | ||
2110 | |||
2111 | + if (!tb[TCA_CONNMARK_PARMS]) | ||
2112 | + return -EINVAL; | ||
2113 | + | ||
2114 | parm = nla_data(tb[TCA_CONNMARK_PARMS]); | ||
2115 | |||
2116 | if (!tcf_hash_check(parm->index, a, bind)) { |