Magellan Linux

Contents of /trunk/kernel-alx/patches-3.4/0119-3.4.20-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1973 - (show annotations) (download)
Fri Nov 30 10:38:25 2012 UTC (11 years, 5 months ago) by niro
File size: 186405 byte(s)
-linux 3.4.20
1 diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
2 index 9b1067a..68c5411 100644
3 --- a/Documentation/cgroups/memory.txt
4 +++ b/Documentation/cgroups/memory.txt
5 @@ -466,6 +466,10 @@ Note:
6 5.3 swappiness
7
8 Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
9 +Please note that unlike the global swappiness, memcg knob set to 0
10 +really prevents from any swapping even if there is a swap storage
11 +available. This might lead to memcg OOM killer if there are no file
12 +pages to reclaim.
13
14 Following cgroups' swappiness can't be changed.
15 - root cgroup (uses /proc/sys/vm/swappiness).
16 diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/arch/arm/plat-omap/include/plat/omap-serial.h
17 index c369c9d..9ff4444 100644
18 --- a/arch/arm/plat-omap/include/plat/omap-serial.h
19 +++ b/arch/arm/plat-omap/include/plat/omap-serial.h
20 @@ -42,10 +42,10 @@
21 #define OMAP_UART_WER_MOD_WKUP 0X7F
22
23 /* Enable XON/XOFF flow control on output */
24 -#define OMAP_UART_SW_TX 0x8
25 +#define OMAP_UART_SW_TX 0x04
26
27 /* Enable XON/XOFF flow control on input */
28 -#define OMAP_UART_SW_RX 0x2
29 +#define OMAP_UART_SW_RX 0x04
30
31 #define OMAP_UART_SYSC_RESET 0X07
32 #define OMAP_UART_TCR_TRIG 0X0F
33 diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h
34 index 60e8866..93fe83e 100644
35 --- a/arch/m68k/include/asm/signal.h
36 +++ b/arch/m68k/include/asm/signal.h
37 @@ -156,7 +156,7 @@ typedef struct sigaltstack {
38 static inline void sigaddset(sigset_t *set, int _sig)
39 {
40 asm ("bfset %0{%1,#1}"
41 - : "+od" (*set)
42 + : "+o" (*set)
43 : "id" ((_sig - 1) ^ 31)
44 : "cc");
45 }
46 @@ -164,7 +164,7 @@ static inline void sigaddset(sigset_t *set, int _sig)
47 static inline void sigdelset(sigset_t *set, int _sig)
48 {
49 asm ("bfclr %0{%1,#1}"
50 - : "+od" (*set)
51 + : "+o" (*set)
52 : "id" ((_sig - 1) ^ 31)
53 : "cc");
54 }
55 @@ -180,7 +180,7 @@ static inline int __gen_sigismember(sigset_t *set, int _sig)
56 int ret;
57 asm ("bfextu %1{%2,#1},%0"
58 : "=d" (ret)
59 - : "od" (*set), "id" ((_sig-1) ^ 31)
60 + : "o" (*set), "id" ((_sig-1) ^ 31)
61 : "cc");
62 return ret;
63 }
64 diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
65 index 234f1d8..2e0a15b 100644
66 --- a/arch/s390/include/asm/compat.h
67 +++ b/arch/s390/include/asm/compat.h
68 @@ -20,7 +20,7 @@
69 #define PSW32_MASK_CC 0x00003000UL
70 #define PSW32_MASK_PM 0x00000f00UL
71
72 -#define PSW32_MASK_USER 0x00003F00UL
73 +#define PSW32_MASK_USER 0x0000FF00UL
74
75 #define PSW32_ADDR_AMODE 0x80000000UL
76 #define PSW32_ADDR_INSN 0x7FFFFFFFUL
77 diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
78 index aeb77f0..d3750e7 100644
79 --- a/arch/s390/include/asm/ptrace.h
80 +++ b/arch/s390/include/asm/ptrace.h
81 @@ -240,7 +240,7 @@ typedef struct
82 #define PSW_MASK_EA 0x00000000UL
83 #define PSW_MASK_BA 0x00000000UL
84
85 -#define PSW_MASK_USER 0x00003F00UL
86 +#define PSW_MASK_USER 0x0000FF00UL
87
88 #define PSW_ADDR_AMODE 0x80000000UL
89 #define PSW_ADDR_INSN 0x7FFFFFFFUL
90 @@ -269,7 +269,7 @@ typedef struct
91 #define PSW_MASK_EA 0x0000000100000000UL
92 #define PSW_MASK_BA 0x0000000080000000UL
93
94 -#define PSW_MASK_USER 0x00003F0180000000UL
95 +#define PSW_MASK_USER 0x0000FF0180000000UL
96
97 #define PSW_ADDR_AMODE 0x0000000000000000UL
98 #define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL
99 diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
100 index 28040fd..0bdca3a 100644
101 --- a/arch/s390/kernel/compat_signal.c
102 +++ b/arch/s390/kernel/compat_signal.c
103 @@ -313,6 +313,10 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
104 regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
105 (__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32 |
106 (__u64)(regs32.psw.addr & PSW32_ADDR_AMODE);
107 + /* Check for invalid user address space control. */
108 + if ((regs->psw.mask & PSW_MASK_ASC) >= (psw_kernel_bits & PSW_MASK_ASC))
109 + regs->psw.mask = (psw_user_bits & PSW_MASK_ASC) |
110 + (regs->psw.mask & ~PSW_MASK_ASC);
111 regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN);
112 for (i = 0; i < NUM_GPRS; i++)
113 regs->gprs[i] = (__u64) regs32.gprs[i];
114 @@ -494,7 +498,10 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
115
116 /* Set up registers for signal handler */
117 regs->gprs[15] = (__force __u64) frame;
118 - regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */
119 + /* Force 31 bit amode and default user address space control. */
120 + regs->psw.mask = PSW_MASK_BA |
121 + (psw_user_bits & PSW_MASK_ASC) |
122 + (regs->psw.mask & ~PSW_MASK_ASC);
123 regs->psw.addr = (__force __u64) ka->sa.sa_handler;
124
125 regs->gprs[2] = map_signal(sig);
126 @@ -562,7 +569,10 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
127
128 /* Set up registers for signal handler */
129 regs->gprs[15] = (__force __u64) frame;
130 - regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */
131 + /* Force 31 bit amode and default user address space control. */
132 + regs->psw.mask = PSW_MASK_BA |
133 + (psw_user_bits & PSW_MASK_ASC) |
134 + (regs->psw.mask & ~PSW_MASK_ASC);
135 regs->psw.addr = (__u64) ka->sa.sa_handler;
136
137 regs->gprs[2] = map_signal(sig);
138 diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
139 index f7582b2..74f58e2 100644
140 --- a/arch/s390/kernel/signal.c
141 +++ b/arch/s390/kernel/signal.c
142 @@ -148,6 +148,10 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
143 /* Use regs->psw.mask instead of psw_user_bits to preserve PER bit. */
144 regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
145 (user_sregs.regs.psw.mask & PSW_MASK_USER);
146 + /* Check for invalid user address space control. */
147 + if ((regs->psw.mask & PSW_MASK_ASC) >= (psw_kernel_bits & PSW_MASK_ASC))
148 + regs->psw.mask = (psw_user_bits & PSW_MASK_ASC) |
149 + (regs->psw.mask & ~PSW_MASK_ASC);
150 /* Check for invalid amode */
151 if (regs->psw.mask & PSW_MASK_EA)
152 regs->psw.mask |= PSW_MASK_BA;
153 @@ -294,7 +298,10 @@ static int setup_frame(int sig, struct k_sigaction *ka,
154
155 /* Set up registers for signal handler */
156 regs->gprs[15] = (unsigned long) frame;
157 - regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */
158 + /* Force default amode and default user address space control. */
159 + regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
160 + (psw_user_bits & PSW_MASK_ASC) |
161 + (regs->psw.mask & ~PSW_MASK_ASC);
162 regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
163
164 regs->gprs[2] = map_signal(sig);
165 @@ -367,7 +374,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
166
167 /* Set up registers for signal handler */
168 regs->gprs[15] = (unsigned long) frame;
169 - regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */
170 + /* Force default amode and default user address space control. */
171 + regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
172 + (psw_user_bits & PSW_MASK_ASC) |
173 + (regs->psw.mask & ~PSW_MASK_ASC);
174 regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
175
176 regs->gprs[2] = map_signal(sig);
177 diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
178 index 65cb06e..4ccf9f5 100644
179 --- a/arch/s390/mm/gup.c
180 +++ b/arch/s390/mm/gup.c
181 @@ -183,7 +183,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
182 addr = start;
183 len = (unsigned long) nr_pages << PAGE_SHIFT;
184 end = start + len;
185 - if (end < start)
186 + if ((end < start) || (end > TASK_SIZE))
187 goto slow_irqon;
188
189 /*
190 diff --git a/crypto/cryptd.c b/crypto/cryptd.c
191 index 671d4d6..7bdd61b 100644
192 --- a/crypto/cryptd.c
193 +++ b/crypto/cryptd.c
194 @@ -137,13 +137,18 @@ static void cryptd_queue_worker(struct work_struct *work)
195 struct crypto_async_request *req, *backlog;
196
197 cpu_queue = container_of(work, struct cryptd_cpu_queue, work);
198 - /* Only handle one request at a time to avoid hogging crypto
199 - * workqueue. preempt_disable/enable is used to prevent
200 - * being preempted by cryptd_enqueue_request() */
201 + /*
202 + * Only handle one request at a time to avoid hogging crypto workqueue.
203 + * preempt_disable/enable is used to prevent being preempted by
204 + * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent
205 + * cryptd_enqueue_request() being accessed from software interrupts.
206 + */
207 + local_bh_disable();
208 preempt_disable();
209 backlog = crypto_get_backlog(&cpu_queue->queue);
210 req = crypto_dequeue_request(&cpu_queue->queue);
211 preempt_enable();
212 + local_bh_enable();
213
214 if (!req)
215 return;
216 diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
217 index 48b5a3c..62d9ee6 100644
218 --- a/drivers/acpi/video.c
219 +++ b/drivers/acpi/video.c
220 @@ -1345,12 +1345,15 @@ static int
221 acpi_video_bus_get_devices(struct acpi_video_bus *video,
222 struct acpi_device *device)
223 {
224 - int status;
225 + int status = 0;
226 struct acpi_device *dev;
227
228 - status = acpi_video_device_enumerate(video);
229 - if (status)
230 - return status;
231 + /*
232 + * There are systems where video module known to work fine regardless
233 + * of broken _DOD and ignoring returned value here doesn't cause
234 + * any issues later.
235 + */
236 + acpi_video_device_enumerate(video);
237
238 list_for_each_entry(dev, &device->children, node) {
239
240 diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
241 index 013c7a5..7b33136 100644
242 --- a/drivers/block/rbd.c
243 +++ b/drivers/block/rbd.c
244 @@ -175,8 +175,7 @@ struct rbd_device {
245 /* protects updating the header */
246 struct rw_semaphore header_rwsem;
247 char snap_name[RBD_MAX_SNAP_NAME_LEN];
248 - u32 cur_snap; /* index+1 of current snapshot within snap context
249 - 0 - for the head */
250 + u64 snap_id; /* current snapshot id */
251 int read_only;
252
253 struct list_head node;
254 @@ -450,7 +449,9 @@ static void rbd_client_release(struct kref *kref)
255 struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
256
257 dout("rbd_release_client %p\n", rbdc);
258 + spin_lock(&rbd_client_list_lock);
259 list_del(&rbdc->node);
260 + spin_unlock(&rbd_client_list_lock);
261
262 ceph_destroy_client(rbdc->client);
263 kfree(rbdc->rbd_opts);
264 @@ -463,9 +464,7 @@ static void rbd_client_release(struct kref *kref)
265 */
266 static void rbd_put_client(struct rbd_device *rbd_dev)
267 {
268 - spin_lock(&rbd_client_list_lock);
269 kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
270 - spin_unlock(&rbd_client_list_lock);
271 rbd_dev->rbd_client = NULL;
272 }
273
274 @@ -498,7 +497,7 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
275
276 snap_count = le32_to_cpu(ondisk->snap_count);
277 header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
278 - snap_count * sizeof (*ondisk),
279 + snap_count * sizeof(u64),
280 gfp_flags);
281 if (!header->snapc)
282 return -ENOMEM;
283 @@ -552,21 +551,6 @@ err_snapc:
284 return -ENOMEM;
285 }
286
287 -static int snap_index(struct rbd_image_header *header, int snap_num)
288 -{
289 - return header->total_snaps - snap_num;
290 -}
291 -
292 -static u64 cur_snap_id(struct rbd_device *rbd_dev)
293 -{
294 - struct rbd_image_header *header = &rbd_dev->header;
295 -
296 - if (!rbd_dev->cur_snap)
297 - return 0;
298 -
299 - return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)];
300 -}
301 -
302 static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
303 u64 *seq, u64 *size)
304 {
305 @@ -605,7 +589,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
306 snapc->seq = header->snap_seq;
307 else
308 snapc->seq = 0;
309 - dev->cur_snap = 0;
310 + dev->snap_id = CEPH_NOSNAP;
311 dev->read_only = 0;
312 if (size)
313 *size = header->image_size;
314 @@ -613,8 +597,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
315 ret = snap_by_name(header, dev->snap_name, &snapc->seq, size);
316 if (ret < 0)
317 goto done;
318 -
319 - dev->cur_snap = header->total_snaps - ret;
320 + dev->snap_id = snapc->seq;
321 dev->read_only = 1;
322 }
323
324 @@ -1521,7 +1504,7 @@ static void rbd_rq_fn(struct request_queue *q)
325 coll, cur_seg);
326 else
327 rbd_req_read(rq, rbd_dev,
328 - cur_snap_id(rbd_dev),
329 + rbd_dev->snap_id,
330 ofs,
331 op_size, bio,
332 coll, cur_seg);
333 @@ -1656,7 +1639,7 @@ static int rbd_header_add_snap(struct rbd_device *dev,
334 struct ceph_mon_client *monc;
335
336 /* we should create a snapshot only if we're pointing at the head */
337 - if (dev->cur_snap)
338 + if (dev->snap_id != CEPH_NOSNAP)
339 return -EINVAL;
340
341 monc = &dev->rbd_client->client->monc;
342 @@ -1683,7 +1666,9 @@ static int rbd_header_add_snap(struct rbd_device *dev,
343 if (ret < 0)
344 return ret;
345
346 - dev->header.snapc->seq = new_snapid;
347 + down_write(&dev->header_rwsem);
348 + dev->header.snapc->seq = new_snapid;
349 + up_write(&dev->header_rwsem);
350
351 return 0;
352 bad:
353 diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
354 index 80b331c..5ba5e66 100644
355 --- a/drivers/gpu/drm/i915/intel_overlay.c
356 +++ b/drivers/gpu/drm/i915/intel_overlay.c
357 @@ -427,9 +427,17 @@ static int intel_overlay_off(struct intel_overlay *overlay)
358 OUT_RING(flip_addr);
359 OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
360 /* turn overlay off */
361 - OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
362 - OUT_RING(flip_addr);
363 - OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
364 + if (IS_I830(dev)) {
365 + /* Workaround: Don't disable the overlay fully, since otherwise
366 + * it dies on the next OVERLAY_ON cmd. */
367 + OUT_RING(MI_NOOP);
368 + OUT_RING(MI_NOOP);
369 + OUT_RING(MI_NOOP);
370 + } else {
371 + OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
372 + OUT_RING(flip_addr);
373 + OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
374 + }
375 ADVANCE_LP_RING();
376
377 return intel_overlay_do_wait_request(overlay, request,
378 diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
379 index 2d07fbf..f6176bc 100644
380 --- a/drivers/gpu/drm/radeon/atombios_encoders.c
381 +++ b/drivers/gpu/drm/radeon/atombios_encoders.c
382 @@ -1421,7 +1421,7 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode)
383 atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0);
384 atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
385 /* some early dce3.2 boards have a bug in their transmitter control table */
386 - if ((rdev->family != CHIP_RV710) || (rdev->family != CHIP_RV730))
387 + if ((rdev->family != CHIP_RV710) && (rdev->family != CHIP_RV730))
388 atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT, 0, 0);
389 }
390 if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) {
391 diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
392 index ebc6fac..578207e 100644
393 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
394 +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
395 @@ -749,7 +749,10 @@ static int ttm_get_pages(struct page **pages, unsigned npages, int flags,
396 /* clear the pages coming from the pool if requested */
397 if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) {
398 list_for_each_entry(p, &plist, lru) {
399 - clear_page(page_address(p));
400 + if (PageHighMem(p))
401 + clear_highpage(p);
402 + else
403 + clear_page(page_address(p));
404 }
405 }
406
407 diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
408 index 589753f..2b78ddd 100644
409 --- a/drivers/net/ethernet/marvell/sky2.c
410 +++ b/drivers/net/ethernet/marvell/sky2.c
411 @@ -3079,8 +3079,10 @@ static irqreturn_t sky2_intr(int irq, void *dev_id)
412
413 /* Reading this mask interrupts as side effect */
414 status = sky2_read32(hw, B0_Y2_SP_ISRC2);
415 - if (status == 0 || status == ~0)
416 + if (status == 0 || status == ~0) {
417 + sky2_write32(hw, B0_Y2_SP_ICR, 2);
418 return IRQ_NONE;
419 + }
420
421 prefetch(&hw->st_le[hw->st_idx]);
422
423 diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
424 index 482dcd3..0dc70c2 100644
425 --- a/drivers/net/ethernet/realtek/r8169.c
426 +++ b/drivers/net/ethernet/realtek/r8169.c
427 @@ -73,7 +73,7 @@
428 static const int multicast_filter_limit = 32;
429
430 #define MAX_READ_REQUEST_SHIFT 12
431 -#define TX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */
432 +#define TX_DMA_BURST 7 /* Maximum PCI burst, '7' is unlimited */
433 #define SafeMtu 0x1c20 /* ... actually life sucks beyond ~7k */
434 #define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */
435
436 @@ -3488,6 +3488,8 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
437 void __iomem *ioaddr = tp->mmio_addr;
438
439 switch (tp->mac_version) {
440 + case RTL_GIGA_MAC_VER_25:
441 + case RTL_GIGA_MAC_VER_26:
442 case RTL_GIGA_MAC_VER_29:
443 case RTL_GIGA_MAC_VER_30:
444 case RTL_GIGA_MAC_VER_32:
445 @@ -4129,6 +4131,9 @@ static void rtl_set_rx_mode(struct net_device *dev)
446 mc_filter[1] = swab32(data);
447 }
448
449 + if (tp->mac_version == RTL_GIGA_MAC_VER_35)
450 + mc_filter[1] = mc_filter[0] = 0xffffffff;
451 +
452 RTL_W32(MAR0 + 4, mc_filter[1]);
453 RTL_W32(MAR0 + 0, mc_filter[0]);
454
455 diff --git a/drivers/staging/android/android_alarm.h b/drivers/staging/android/android_alarm.h
456 index 66b6e3d..6eecbde 100644
457 --- a/drivers/staging/android/android_alarm.h
458 +++ b/drivers/staging/android/android_alarm.h
459 @@ -110,12 +110,10 @@ enum android_alarm_return_flags {
460 #define ANDROID_ALARM_WAIT _IO('a', 1)
461
462 #define ALARM_IOW(c, type, size) _IOW('a', (c) | ((type) << 4), size)
463 -#define ALARM_IOR(c, type, size) _IOR('a', (c) | ((type) << 4), size)
464 -
465 /* Set alarm */
466 #define ANDROID_ALARM_SET(type) ALARM_IOW(2, type, struct timespec)
467 #define ANDROID_ALARM_SET_AND_WAIT(type) ALARM_IOW(3, type, struct timespec)
468 -#define ANDROID_ALARM_GET_TIME(type) ALARM_IOR(4, type, struct timespec)
469 +#define ANDROID_ALARM_GET_TIME(type) ALARM_IOW(4, type, struct timespec)
470 #define ANDROID_ALARM_SET_RTC _IOW('a', 5, struct timespec)
471 #define ANDROID_ALARM_BASE_CMD(cmd) (cmd & ~(_IOC(0, 0, 0xf0, 0)))
472 #define ANDROID_ALARM_IOCTL_TO_TYPE(cmd) (_IOC_NR(cmd) >> 4)
473 diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
474 index 6189923..d00b38e 100644
475 --- a/drivers/tty/serial/omap-serial.c
476 +++ b/drivers/tty/serial/omap-serial.c
477 @@ -649,19 +649,19 @@ serial_omap_configure_xonxoff
478
479 /*
480 * IXON Flag:
481 - * Flow control for OMAP.TX
482 - * OMAP.RX should listen for XON/XOFF
483 + * Enable XON/XOFF flow control on output.
484 + * Transmit XON1, XOFF1
485 */
486 if (termios->c_iflag & IXON)
487 - up->efr |= OMAP_UART_SW_RX;
488 + up->efr |= OMAP_UART_SW_TX;
489
490 /*
491 * IXOFF Flag:
492 - * Flow control for OMAP.RX
493 - * OMAP.TX should send XON/XOFF
494 + * Enable XON/XOFF flow control on input.
495 + * Receiver compares XON1, XOFF1.
496 */
497 if (termios->c_iflag & IXOFF)
498 - up->efr |= OMAP_UART_SW_TX;
499 + up->efr |= OMAP_UART_SW_RX;
500
501 serial_out(up, UART_EFR, up->efr | UART_EFR_ECB);
502 serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
503 diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
504 index 17ec21e..43aa36b 100644
505 --- a/drivers/usb/serial/option.c
506 +++ b/drivers/usb/serial/option.c
507 @@ -157,6 +157,7 @@ static void option_instat_callback(struct urb *urb);
508 #define NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_HIGHSPEED 0x8001
509 #define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_FULLSPEED 0x9000
510 #define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED 0x9001
511 +#define NOVATELWIRELESS_PRODUCT_E362 0x9010
512 #define NOVATELWIRELESS_PRODUCT_G1 0xA001
513 #define NOVATELWIRELESS_PRODUCT_G1_M 0xA002
514 #define NOVATELWIRELESS_PRODUCT_G2 0xA010
515 @@ -192,6 +193,9 @@ static void option_instat_callback(struct urb *urb);
516 #define DELL_PRODUCT_5730_MINICARD_TELUS 0x8181
517 #define DELL_PRODUCT_5730_MINICARD_VZW 0x8182
518
519 +#define DELL_PRODUCT_5800_MINICARD_VZW 0x8195 /* Novatel E362 */
520 +#define DELL_PRODUCT_5800_V2_MINICARD_VZW 0x8196 /* Novatel E362 */
521 +
522 #define KYOCERA_VENDOR_ID 0x0c88
523 #define KYOCERA_PRODUCT_KPC650 0x17da
524 #define KYOCERA_PRODUCT_KPC680 0x180a
525 @@ -282,6 +286,7 @@ static void option_instat_callback(struct urb *urb);
526 /* ALCATEL PRODUCTS */
527 #define ALCATEL_VENDOR_ID 0x1bbb
528 #define ALCATEL_PRODUCT_X060S_X200 0x0000
529 +#define ALCATEL_PRODUCT_X220_X500D 0x0017
530
531 #define PIRELLI_VENDOR_ID 0x1266
532 #define PIRELLI_PRODUCT_C100_1 0x1002
533 @@ -705,6 +710,7 @@ static const struct usb_device_id option_ids[] = {
534 { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G2) },
535 /* Novatel Ovation MC551 a.k.a. Verizon USB551L */
536 { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC551, 0xff, 0xff, 0xff) },
537 + { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E362, 0xff, 0xff, 0xff) },
538
539 { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) },
540 { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) },
541 @@ -727,6 +733,8 @@ static const struct usb_device_id option_ids[] = {
542 { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_SPRINT) }, /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */
543 { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_TELUS) }, /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */
544 { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_VZW) }, /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */
545 + { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_MINICARD_VZW, 0xff, 0xff, 0xff) },
546 + { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_V2_MINICARD_VZW, 0xff, 0xff, 0xff) },
547 { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */
548 { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) },
549 { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) },
550 @@ -1156,6 +1164,7 @@ static const struct usb_device_id option_ids[] = {
551 { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200),
552 .driver_info = (kernel_ulong_t)&alcatel_x200_blacklist
553 },
554 + { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X220_X500D) },
555 { USB_DEVICE(AIRPLUS_VENDOR_ID, AIRPLUS_PRODUCT_MCD650) },
556 { USB_DEVICE(TLAYTECH_VENDOR_ID, TLAYTECH_PRODUCT_TEU800) },
557 { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14),
558 diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
559 index bcf2617..c627ba2 100644
560 --- a/drivers/usb/serial/usb-serial.c
561 +++ b/drivers/usb/serial/usb-serial.c
562 @@ -768,7 +768,7 @@ int usb_serial_probe(struct usb_interface *interface,
563
564 if (retval) {
565 dbg("sub driver rejected device");
566 - kfree(serial);
567 + usb_serial_put(serial);
568 module_put(type->driver.owner);
569 return retval;
570 }
571 @@ -840,7 +840,7 @@ int usb_serial_probe(struct usb_interface *interface,
572 */
573 if (num_bulk_in == 0 || num_bulk_out == 0) {
574 dev_info(&interface->dev, "PL-2303 hack: descriptors matched but endpoints did not\n");
575 - kfree(serial);
576 + usb_serial_put(serial);
577 module_put(type->driver.owner);
578 return -ENODEV;
579 }
580 @@ -854,7 +854,7 @@ int usb_serial_probe(struct usb_interface *interface,
581 if (num_ports == 0) {
582 dev_err(&interface->dev,
583 "Generic device with no bulk out, not allowed.\n");
584 - kfree(serial);
585 + usb_serial_put(serial);
586 module_put(type->driver.owner);
587 return -EIO;
588 }
589 diff --git a/drivers/xen/events.c b/drivers/xen/events.c
590 index 6908e4c..26c47a4 100644
591 --- a/drivers/xen/events.c
592 +++ b/drivers/xen/events.c
593 @@ -1365,8 +1365,8 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
594 {
595 struct pt_regs *old_regs = set_irq_regs(regs);
596
597 - exit_idle();
598 irq_enter();
599 + exit_idle();
600
601 __xen_evtchn_do_upcall();
602
603 diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
604 index 173b1d2..32ee086 100644
605 --- a/fs/ceph/addr.c
606 +++ b/fs/ceph/addr.c
607 @@ -54,7 +54,12 @@
608 (CONGESTION_ON_THRESH(congestion_kb) - \
609 (CONGESTION_ON_THRESH(congestion_kb) >> 2))
610
611 -
612 +static inline struct ceph_snap_context *page_snap_context(struct page *page)
613 +{
614 + if (PagePrivate(page))
615 + return (void *)page->private;
616 + return NULL;
617 +}
618
619 /*
620 * Dirty a page. Optimistically adjust accounting, on the assumption
621 @@ -142,10 +147,9 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset)
622 {
623 struct inode *inode;
624 struct ceph_inode_info *ci;
625 - struct ceph_snap_context *snapc = (void *)page->private;
626 + struct ceph_snap_context *snapc = page_snap_context(page);
627
628 BUG_ON(!PageLocked(page));
629 - BUG_ON(!page->private);
630 BUG_ON(!PagePrivate(page));
631 BUG_ON(!page->mapping);
632
633 @@ -182,7 +186,6 @@ static int ceph_releasepage(struct page *page, gfp_t g)
634 struct inode *inode = page->mapping ? page->mapping->host : NULL;
635 dout("%p releasepage %p idx %lu\n", inode, page, page->index);
636 WARN_ON(PageDirty(page));
637 - WARN_ON(page->private);
638 WARN_ON(PagePrivate(page));
639 return 0;
640 }
641 @@ -202,7 +205,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
642 dout("readpage inode %p file %p page %p index %lu\n",
643 inode, filp, page, page->index);
644 err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
645 - page->index << PAGE_CACHE_SHIFT, &len,
646 + (u64) page_offset(page), &len,
647 ci->i_truncate_seq, ci->i_truncate_size,
648 &page, 1, 0);
649 if (err == -ENOENT)
650 @@ -283,7 +286,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
651 int nr_pages = 0;
652 int ret;
653
654 - off = page->index << PAGE_CACHE_SHIFT;
655 + off = (u64) page_offset(page);
656
657 /* count pages */
658 next_index = page->index;
659 @@ -423,7 +426,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
660 struct ceph_inode_info *ci;
661 struct ceph_fs_client *fsc;
662 struct ceph_osd_client *osdc;
663 - loff_t page_off = page->index << PAGE_CACHE_SHIFT;
664 + loff_t page_off = page_offset(page);
665 int len = PAGE_CACHE_SIZE;
666 loff_t i_size;
667 int err = 0;
668 @@ -443,7 +446,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
669 osdc = &fsc->client->osdc;
670
671 /* verify this is a writeable snap context */
672 - snapc = (void *)page->private;
673 + snapc = page_snap_context(page);
674 if (snapc == NULL) {
675 dout("writepage %p page %p not dirty?\n", inode, page);
676 goto out;
677 @@ -451,7 +454,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
678 oldest = get_oldest_context(inode, &snap_size);
679 if (snapc->seq > oldest->seq) {
680 dout("writepage %p page %p snapc %p not writeable - noop\n",
681 - inode, page, (void *)page->private);
682 + inode, page, snapc);
683 /* we should only noop if called by kswapd */
684 WARN_ON((current->flags & PF_MEMALLOC) == 0);
685 ceph_put_snap_context(oldest);
686 @@ -591,7 +594,7 @@ static void writepages_finish(struct ceph_osd_request *req,
687 clear_bdi_congested(&fsc->backing_dev_info,
688 BLK_RW_ASYNC);
689
690 - ceph_put_snap_context((void *)page->private);
691 + ceph_put_snap_context(page_snap_context(page));
692 page->private = 0;
693 ClearPagePrivate(page);
694 dout("unlocking %d %p\n", i, page);
695 @@ -795,7 +798,7 @@ get_more_pages:
696 }
697
698 /* only if matching snap context */
699 - pgsnapc = (void *)page->private;
700 + pgsnapc = page_snap_context(page);
701 if (pgsnapc->seq > snapc->seq) {
702 dout("page snapc %p %lld > oldest %p %lld\n",
703 pgsnapc, pgsnapc->seq, snapc, snapc->seq);
704 @@ -814,8 +817,7 @@ get_more_pages:
705 /* ok */
706 if (locked_pages == 0) {
707 /* prepare async write request */
708 - offset = (unsigned long long)page->index
709 - << PAGE_CACHE_SHIFT;
710 + offset = (u64) page_offset(page);
711 len = wsize;
712 req = ceph_osdc_new_request(&fsc->client->osdc,
713 &ci->i_layout,
714 @@ -984,7 +986,7 @@ retry_locked:
715 BUG_ON(!ci->i_snap_realm);
716 down_read(&mdsc->snap_rwsem);
717 BUG_ON(!ci->i_snap_realm->cached_context);
718 - snapc = (void *)page->private;
719 + snapc = page_snap_context(page);
720 if (snapc && snapc != ci->i_head_snapc) {
721 /*
722 * this page is already dirty in another (older) snap
723 @@ -1177,7 +1179,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
724 struct inode *inode = vma->vm_file->f_dentry->d_inode;
725 struct page *page = vmf->page;
726 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
727 - loff_t off = page->index << PAGE_CACHE_SHIFT;
728 + loff_t off = page_offset(page);
729 loff_t size, len;
730 int ret;
731
732 diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
733 index fb962ef..6d59006 100644
734 --- a/fs/ceph/debugfs.c
735 +++ b/fs/ceph/debugfs.c
736 @@ -201,6 +201,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
737 int err = -ENOMEM;
738
739 dout("ceph_fs_debugfs_init\n");
740 + BUG_ON(!fsc->client->debugfs_dir);
741 fsc->debugfs_congestion_kb =
742 debugfs_create_file("writeback_congestion_kb",
743 0600,
744 diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
745 index 89971e1..7f1682d 100644
746 --- a/fs/ceph/mds_client.c
747 +++ b/fs/ceph/mds_client.c
748 @@ -334,10 +334,10 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
749 dout("mdsc put_session %p %d -> %d\n", s,
750 atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
751 if (atomic_dec_and_test(&s->s_ref)) {
752 - if (s->s_authorizer)
753 + if (s->s_auth.authorizer)
754 s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer(
755 s->s_mdsc->fsc->client->monc.auth,
756 - s->s_authorizer);
757 + s->s_auth.authorizer);
758 kfree(s);
759 }
760 }
761 @@ -394,11 +394,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
762 s->s_seq = 0;
763 mutex_init(&s->s_mutex);
764
765 - ceph_con_init(mdsc->fsc->client->msgr, &s->s_con);
766 - s->s_con.private = s;
767 - s->s_con.ops = &mds_con_ops;
768 - s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
769 - s->s_con.peer_name.num = cpu_to_le64(mds);
770 + ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr);
771
772 spin_lock_init(&s->s_gen_ttl_lock);
773 s->s_cap_gen = 0;
774 @@ -440,7 +436,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
775 mdsc->sessions[mds] = s;
776 atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */
777
778 - ceph_con_open(&s->s_con, ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
779 + ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds,
780 + ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
781
782 return s;
783
784 @@ -2532,6 +2529,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
785 session->s_seq = 0;
786
787 ceph_con_open(&session->s_con,
788 + CEPH_ENTITY_TYPE_MDS, mds,
789 ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
790
791 /* replay unsafe requests */
792 @@ -2636,7 +2634,8 @@ static void check_new_map(struct ceph_mds_client *mdsc,
793 ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
794 session_state_name(s->s_state));
795
796 - if (memcmp(ceph_mdsmap_get_addr(oldmap, i),
797 + if (i >= newmap->m_max_mds ||
798 + memcmp(ceph_mdsmap_get_addr(oldmap, i),
799 ceph_mdsmap_get_addr(newmap, i),
800 sizeof(struct ceph_entity_addr))) {
801 if (s->s_state == CEPH_MDS_SESSION_OPENING) {
802 @@ -3395,39 +3394,33 @@ out:
803 /*
804 * authentication
805 */
806 -static int get_authorizer(struct ceph_connection *con,
807 - void **buf, int *len, int *proto,
808 - void **reply_buf, int *reply_len, int force_new)
809 +
810 +/*
811 + * Note: returned pointer is the address of a structure that's
812 + * managed separately. Caller must *not* attempt to free it.
813 + */
814 +static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
815 + int *proto, int force_new)
816 {
817 struct ceph_mds_session *s = con->private;
818 struct ceph_mds_client *mdsc = s->s_mdsc;
819 struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
820 - int ret = 0;
821 -
822 - if (force_new && s->s_authorizer) {
823 - ac->ops->destroy_authorizer(ac, s->s_authorizer);
824 - s->s_authorizer = NULL;
825 - }
826 - if (s->s_authorizer == NULL) {
827 - if (ac->ops->create_authorizer) {
828 - ret = ac->ops->create_authorizer(
829 - ac, CEPH_ENTITY_TYPE_MDS,
830 - &s->s_authorizer,
831 - &s->s_authorizer_buf,
832 - &s->s_authorizer_buf_len,
833 - &s->s_authorizer_reply_buf,
834 - &s->s_authorizer_reply_buf_len);
835 - if (ret)
836 - return ret;
837 - }
838 - }
839 + struct ceph_auth_handshake *auth = &s->s_auth;
840
841 + if (force_new && auth->authorizer) {
842 + if (ac->ops && ac->ops->destroy_authorizer)
843 + ac->ops->destroy_authorizer(ac, auth->authorizer);
844 + auth->authorizer = NULL;
845 + }
846 + if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) {
847 + int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
848 + auth);
849 + if (ret)
850 + return ERR_PTR(ret);
851 + }
852 *proto = ac->protocol;
853 - *buf = s->s_authorizer_buf;
854 - *len = s->s_authorizer_buf_len;
855 - *reply_buf = s->s_authorizer_reply_buf;
856 - *reply_len = s->s_authorizer_reply_buf_len;
857 - return 0;
858 +
859 + return auth;
860 }
861
862
863 @@ -3437,7 +3430,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
864 struct ceph_mds_client *mdsc = s->s_mdsc;
865 struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
866
867 - return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len);
868 + return ac->ops->verify_authorizer_reply(ac, s->s_auth.authorizer, len);
869 }
870
871 static int invalidate_authorizer(struct ceph_connection *con)
872 diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
873 index 8c7c04e..dd26846 100644
874 --- a/fs/ceph/mds_client.h
875 +++ b/fs/ceph/mds_client.h
876 @@ -11,6 +11,7 @@
877 #include <linux/ceph/types.h>
878 #include <linux/ceph/messenger.h>
879 #include <linux/ceph/mdsmap.h>
880 +#include <linux/ceph/auth.h>
881
882 /*
883 * Some lock dependencies:
884 @@ -113,9 +114,7 @@ struct ceph_mds_session {
885
886 struct ceph_connection s_con;
887
888 - struct ceph_authorizer *s_authorizer;
889 - void *s_authorizer_buf, *s_authorizer_reply_buf;
890 - size_t s_authorizer_buf_len, s_authorizer_reply_buf_len;
891 + struct ceph_auth_handshake s_auth;
892
893 /* protected by s_gen_ttl_lock */
894 spinlock_t s_gen_ttl_lock;
895 diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
896 index 3cc1b25..6ccf176 100644
897 --- a/fs/cifs/cifsacl.c
898 +++ b/fs/cifs/cifsacl.c
899 @@ -225,6 +225,13 @@ sid_to_str(struct cifs_sid *sidptr, char *sidstr)
900 }
901
902 static void
903 +cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
904 +{
905 + memcpy(dst, src, sizeof(*dst));
906 + dst->num_subauth = min_t(u8, src->num_subauth, NUM_SUBAUTHS);
907 +}
908 +
909 +static void
910 id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
911 struct cifs_sid_id **psidid, char *typestr)
912 {
913 @@ -248,7 +255,7 @@ id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
914 }
915 }
916
917 - memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid));
918 + cifs_copy_sid(&(*psidid)->sid, sidptr);
919 (*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
920 (*psidid)->refcount = 0;
921
922 @@ -354,7 +361,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
923 * any fields of the node after a reference is put .
924 */
925 if (test_bit(SID_ID_MAPPED, &psidid->state)) {
926 - memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
927 + cifs_copy_sid(ssid, &psidid->sid);
928 psidid->time = jiffies; /* update ts for accessing */
929 goto id_sid_out;
930 }
931 @@ -370,14 +377,14 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
932 if (IS_ERR(sidkey)) {
933 rc = -EINVAL;
934 cFYI(1, "%s: Can't map and id to a SID", __func__);
935 + } else if (sidkey->datalen < sizeof(struct cifs_sid)) {
936 + rc = -EIO;
937 + cFYI(1, "%s: Downcall contained malformed key "
938 + "(datalen=%hu)", __func__, sidkey->datalen);
939 } else {
940 lsid = (struct cifs_sid *)sidkey->payload.data;
941 - memcpy(&psidid->sid, lsid,
942 - sidkey->datalen < sizeof(struct cifs_sid) ?
943 - sidkey->datalen : sizeof(struct cifs_sid));
944 - memcpy(ssid, &psidid->sid,
945 - sidkey->datalen < sizeof(struct cifs_sid) ?
946 - sidkey->datalen : sizeof(struct cifs_sid));
947 + cifs_copy_sid(&psidid->sid, lsid);
948 + cifs_copy_sid(ssid, &psidid->sid);
949 set_bit(SID_ID_MAPPED, &psidid->state);
950 key_put(sidkey);
951 kfree(psidid->sidstr);
952 @@ -396,7 +403,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
953 return rc;
954 }
955 if (test_bit(SID_ID_MAPPED, &psidid->state))
956 - memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
957 + cifs_copy_sid(ssid, &psidid->sid);
958 else
959 rc = -EINVAL;
960 }
961 @@ -675,8 +682,6 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
962 static void copy_sec_desc(const struct cifs_ntsd *pntsd,
963 struct cifs_ntsd *pnntsd, __u32 sidsoffset)
964 {
965 - int i;
966 -
967 struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
968 struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
969
970 @@ -692,26 +697,14 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
971 owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
972 le32_to_cpu(pntsd->osidoffset));
973 nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset);
974 -
975 - nowner_sid_ptr->revision = owner_sid_ptr->revision;
976 - nowner_sid_ptr->num_subauth = owner_sid_ptr->num_subauth;
977 - for (i = 0; i < 6; i++)
978 - nowner_sid_ptr->authority[i] = owner_sid_ptr->authority[i];
979 - for (i = 0; i < 5; i++)
980 - nowner_sid_ptr->sub_auth[i] = owner_sid_ptr->sub_auth[i];
981 + cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr);
982
983 /* copy group sid */
984 group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
985 le32_to_cpu(pntsd->gsidoffset));
986 ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset +
987 sizeof(struct cifs_sid));
988 -
989 - ngroup_sid_ptr->revision = group_sid_ptr->revision;
990 - ngroup_sid_ptr->num_subauth = group_sid_ptr->num_subauth;
991 - for (i = 0; i < 6; i++)
992 - ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i];
993 - for (i = 0; i < 5; i++)
994 - ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i];
995 + cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr);
996
997 return;
998 }
999 @@ -1120,8 +1113,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
1000 kfree(nowner_sid_ptr);
1001 return rc;
1002 }
1003 - memcpy(owner_sid_ptr, nowner_sid_ptr,
1004 - sizeof(struct cifs_sid));
1005 + cifs_copy_sid(owner_sid_ptr, nowner_sid_ptr);
1006 kfree(nowner_sid_ptr);
1007 *aclflag = CIFS_ACL_OWNER;
1008 }
1009 @@ -1139,8 +1131,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
1010 kfree(ngroup_sid_ptr);
1011 return rc;
1012 }
1013 - memcpy(group_sid_ptr, ngroup_sid_ptr,
1014 - sizeof(struct cifs_sid));
1015 + cifs_copy_sid(group_sid_ptr, ngroup_sid_ptr);
1016 kfree(ngroup_sid_ptr);
1017 *aclflag = CIFS_ACL_GROUP;
1018 }
1019 diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
1020 index 0f04d2e..240832e 100644
1021 --- a/fs/ecryptfs/main.c
1022 +++ b/fs/ecryptfs/main.c
1023 @@ -280,6 +280,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
1024 char *fnek_src;
1025 char *cipher_key_bytes_src;
1026 char *fn_cipher_key_bytes_src;
1027 + u8 cipher_code;
1028
1029 *check_ruid = 0;
1030
1031 @@ -421,6 +422,18 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
1032 && !fn_cipher_key_bytes_set)
1033 mount_crypt_stat->global_default_fn_cipher_key_bytes =
1034 mount_crypt_stat->global_default_cipher_key_size;
1035 +
1036 + cipher_code = ecryptfs_code_for_cipher_string(
1037 + mount_crypt_stat->global_default_cipher_name,
1038 + mount_crypt_stat->global_default_cipher_key_size);
1039 + if (!cipher_code) {
1040 + ecryptfs_printk(KERN_ERR,
1041 + "eCryptfs doesn't support cipher: %s",
1042 + mount_crypt_stat->global_default_cipher_name);
1043 + rc = -EINVAL;
1044 + goto out;
1045 + }
1046 +
1047 mutex_lock(&key_tfm_list_mutex);
1048 if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name,
1049 NULL)) {
1050 @@ -506,7 +519,6 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
1051 goto out;
1052 }
1053
1054 - s->s_flags = flags;
1055 rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY);
1056 if (rc)
1057 goto out1;
1058 @@ -542,6 +554,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
1059 }
1060
1061 ecryptfs_set_superblock_lower(s, path.dentry->d_sb);
1062 +
1063 + /**
1064 + * Set the POSIX ACL flag based on whether they're enabled in the lower
1065 + * mount. Force a read-only eCryptfs mount if the lower mount is ro.
1066 + * Allow a ro eCryptfs mount even when the lower mount is rw.
1067 + */
1068 + s->s_flags = flags & ~MS_POSIXACL;
1069 + s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL);
1070 +
1071 s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
1072 s->s_blocksize = path.dentry->d_sb->s_blocksize;
1073 s->s_magic = ECRYPTFS_SUPER_MAGIC;
1074 diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
1075 index 5e80180..8955e36 100644
1076 --- a/fs/nfs/nfs4proc.c
1077 +++ b/fs/nfs/nfs4proc.c
1078 @@ -307,8 +307,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
1079 dprintk("%s ERROR: %d Reset session\n", __func__,
1080 errorcode);
1081 nfs4_schedule_session_recovery(clp->cl_session);
1082 - exception->retry = 1;
1083 - break;
1084 + goto wait_on_recovery;
1085 #endif /* defined(CONFIG_NFS_V4_1) */
1086 case -NFS4ERR_FILE_OPEN:
1087 if (exception->timeout > HZ) {
1088 diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
1089 index f35794b..a506360 100644
1090 --- a/fs/notify/fanotify/fanotify.c
1091 +++ b/fs/notify/fanotify/fanotify.c
1092 @@ -21,6 +21,7 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
1093 if ((old->path.mnt == new->path.mnt) &&
1094 (old->path.dentry == new->path.dentry))
1095 return true;
1096 + break;
1097 case (FSNOTIFY_EVENT_NONE):
1098 return true;
1099 default:
1100 diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
1101 index f99c1b4..c11db51 100644
1102 --- a/fs/reiserfs/inode.c
1103 +++ b/fs/reiserfs/inode.c
1104 @@ -1788,8 +1788,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1105
1106 BUG_ON(!th->t_trans_id);
1107
1108 - dquot_initialize(inode);
1109 + reiserfs_write_unlock(inode->i_sb);
1110 err = dquot_alloc_inode(inode);
1111 + reiserfs_write_lock(inode->i_sb);
1112 if (err)
1113 goto out_end_trans;
1114 if (!dir->i_nlink) {
1115 @@ -1985,8 +1986,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1116
1117 out_end_trans:
1118 journal_end(th, th->t_super, th->t_blocks_allocated);
1119 + reiserfs_write_unlock(inode->i_sb);
1120 /* Drop can be outside and it needs more credits so it's better to have it outside */
1121 dquot_drop(inode);
1122 + reiserfs_write_lock(inode->i_sb);
1123 inode->i_flags |= S_NOQUOTA;
1124 make_bad_inode(inode);
1125
1126 @@ -3109,10 +3112,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
1127 /* must be turned off for recursive notify_change calls */
1128 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
1129
1130 - depth = reiserfs_write_lock_once(inode->i_sb);
1131 if (is_quota_modification(inode, attr))
1132 dquot_initialize(inode);
1133 -
1134 + depth = reiserfs_write_lock_once(inode->i_sb);
1135 if (attr->ia_valid & ATTR_SIZE) {
1136 /* version 2 items will be caught by the s_maxbytes check
1137 ** done for us in vmtruncate
1138 @@ -3176,7 +3178,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
1139 error = journal_begin(&th, inode->i_sb, jbegin_count);
1140 if (error)
1141 goto out;
1142 + reiserfs_write_unlock_once(inode->i_sb, depth);
1143 error = dquot_transfer(inode, attr);
1144 + depth = reiserfs_write_lock_once(inode->i_sb);
1145 if (error) {
1146 journal_end(&th, inode->i_sb, jbegin_count);
1147 goto out;
1148 diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
1149 index f8afa4b..2f40a4c 100644
1150 --- a/fs/reiserfs/stree.c
1151 +++ b/fs/reiserfs/stree.c
1152 @@ -1968,7 +1968,9 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
1153 key2type(&(key->on_disk_key)));
1154 #endif
1155
1156 + reiserfs_write_unlock(inode->i_sb);
1157 retval = dquot_alloc_space_nodirty(inode, pasted_size);
1158 + reiserfs_write_lock(inode->i_sb);
1159 if (retval) {
1160 pathrelse(search_path);
1161 return retval;
1162 @@ -2061,9 +2063,11 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
1163 "reiserquota insert_item(): allocating %u id=%u type=%c",
1164 quota_bytes, inode->i_uid, head2type(ih));
1165 #endif
1166 + reiserfs_write_unlock(inode->i_sb);
1167 /* We can't dirty inode here. It would be immediately written but
1168 * appropriate stat item isn't inserted yet... */
1169 retval = dquot_alloc_space_nodirty(inode, quota_bytes);
1170 + reiserfs_write_lock(inode->i_sb);
1171 if (retval) {
1172 pathrelse(path);
1173 return retval;
1174 diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
1175 index 8b7616e..8169be9 100644
1176 --- a/fs/reiserfs/super.c
1177 +++ b/fs/reiserfs/super.c
1178 @@ -256,7 +256,9 @@ static int finish_unfinished(struct super_block *s)
1179 retval = remove_save_link_only(s, &save_link_key, 0);
1180 continue;
1181 }
1182 + reiserfs_write_unlock(s);
1183 dquot_initialize(inode);
1184 + reiserfs_write_lock(s);
1185
1186 if (truncate && S_ISDIR(inode->i_mode)) {
1187 /* We got a truncate request for a dir which is impossible.
1188 @@ -1292,7 +1294,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1189 kfree(qf_names[i]);
1190 #endif
1191 err = -EINVAL;
1192 - goto out_err;
1193 + goto out_unlock;
1194 }
1195 #ifdef CONFIG_QUOTA
1196 handle_quota_files(s, qf_names, &qfmt);
1197 @@ -1336,7 +1338,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1198 if (blocks) {
1199 err = reiserfs_resize(s, blocks);
1200 if (err != 0)
1201 - goto out_err;
1202 + goto out_unlock;
1203 }
1204
1205 if (*mount_flags & MS_RDONLY) {
1206 @@ -1346,9 +1348,15 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1207 /* it is read-only already */
1208 goto out_ok;
1209
1210 + /*
1211 + * Drop write lock. Quota will retake it when needed and lock
1212 + * ordering requires calling dquot_suspend() without it.
1213 + */
1214 + reiserfs_write_unlock(s);
1215 err = dquot_suspend(s, -1);
1216 if (err < 0)
1217 goto out_err;
1218 + reiserfs_write_lock(s);
1219
1220 /* try to remount file system with read-only permissions */
1221 if (sb_umount_state(rs) == REISERFS_VALID_FS
1222 @@ -1358,7 +1366,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1223
1224 err = journal_begin(&th, s, 10);
1225 if (err)
1226 - goto out_err;
1227 + goto out_unlock;
1228
1229 /* Mounting a rw partition read-only. */
1230 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
1231 @@ -1373,7 +1381,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1232
1233 if (reiserfs_is_journal_aborted(journal)) {
1234 err = journal->j_errno;
1235 - goto out_err;
1236 + goto out_unlock;
1237 }
1238
1239 handle_data_mode(s, mount_options);
1240 @@ -1382,7 +1390,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1241 s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */
1242 err = journal_begin(&th, s, 10);
1243 if (err)
1244 - goto out_err;
1245 + goto out_unlock;
1246
1247 /* Mount a partition which is read-only, read-write */
1248 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
1249 @@ -1399,11 +1407,17 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1250 SB_JOURNAL(s)->j_must_wait = 1;
1251 err = journal_end(&th, s, 10);
1252 if (err)
1253 - goto out_err;
1254 + goto out_unlock;
1255 s->s_dirt = 0;
1256
1257 if (!(*mount_flags & MS_RDONLY)) {
1258 + /*
1259 + * Drop write lock. Quota will retake it when needed and lock
1260 + * ordering requires calling dquot_resume() without it.
1261 + */
1262 + reiserfs_write_unlock(s);
1263 dquot_resume(s, -1);
1264 + reiserfs_write_lock(s);
1265 finish_unfinished(s);
1266 reiserfs_xattr_init(s, *mount_flags);
1267 }
1268 @@ -1413,9 +1427,10 @@ out_ok:
1269 reiserfs_write_unlock(s);
1270 return 0;
1271
1272 +out_unlock:
1273 + reiserfs_write_unlock(s);
1274 out_err:
1275 kfree(new_opts);
1276 - reiserfs_write_unlock(s);
1277 return err;
1278 }
1279
1280 @@ -2049,13 +2064,15 @@ static int reiserfs_write_dquot(struct dquot *dquot)
1281 REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
1282 if (ret)
1283 goto out;
1284 + reiserfs_write_unlock(dquot->dq_sb);
1285 ret = dquot_commit(dquot);
1286 + reiserfs_write_lock(dquot->dq_sb);
1287 err =
1288 journal_end(&th, dquot->dq_sb,
1289 REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
1290 if (!ret && err)
1291 ret = err;
1292 - out:
1293 +out:
1294 reiserfs_write_unlock(dquot->dq_sb);
1295 return ret;
1296 }
1297 @@ -2071,13 +2088,15 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
1298 REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
1299 if (ret)
1300 goto out;
1301 + reiserfs_write_unlock(dquot->dq_sb);
1302 ret = dquot_acquire(dquot);
1303 + reiserfs_write_lock(dquot->dq_sb);
1304 err =
1305 journal_end(&th, dquot->dq_sb,
1306 REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
1307 if (!ret && err)
1308 ret = err;
1309 - out:
1310 +out:
1311 reiserfs_write_unlock(dquot->dq_sb);
1312 return ret;
1313 }
1314 @@ -2091,19 +2110,21 @@ static int reiserfs_release_dquot(struct dquot *dquot)
1315 ret =
1316 journal_begin(&th, dquot->dq_sb,
1317 REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
1318 + reiserfs_write_unlock(dquot->dq_sb);
1319 if (ret) {
1320 /* Release dquot anyway to avoid endless cycle in dqput() */
1321 dquot_release(dquot);
1322 goto out;
1323 }
1324 ret = dquot_release(dquot);
1325 + reiserfs_write_lock(dquot->dq_sb);
1326 err =
1327 journal_end(&th, dquot->dq_sb,
1328 REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
1329 if (!ret && err)
1330 ret = err;
1331 - out:
1332 reiserfs_write_unlock(dquot->dq_sb);
1333 +out:
1334 return ret;
1335 }
1336
1337 @@ -2128,11 +2149,13 @@ static int reiserfs_write_info(struct super_block *sb, int type)
1338 ret = journal_begin(&th, sb, 2);
1339 if (ret)
1340 goto out;
1341 + reiserfs_write_unlock(sb);
1342 ret = dquot_commit_info(sb, type);
1343 + reiserfs_write_lock(sb);
1344 err = journal_end(&th, sb, 2);
1345 if (!ret && err)
1346 ret = err;
1347 - out:
1348 +out:
1349 reiserfs_write_unlock(sb);
1350 return ret;
1351 }
1352 @@ -2157,8 +2180,11 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
1353 struct reiserfs_transaction_handle th;
1354 int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA;
1355
1356 - if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt)))
1357 - return -EINVAL;
1358 + reiserfs_write_lock(sb);
1359 + if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) {
1360 + err = -EINVAL;
1361 + goto out;
1362 + }
1363
1364 /* Quotafile not on the same filesystem? */
1365 if (path->dentry->d_sb != sb) {
1366 @@ -2200,8 +2226,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
1367 if (err)
1368 goto out;
1369 }
1370 - err = dquot_quota_on(sb, type, format_id, path);
1371 + reiserfs_write_unlock(sb);
1372 + return dquot_quota_on(sb, type, format_id, path);
1373 out:
1374 + reiserfs_write_unlock(sb);
1375 return err;
1376 }
1377
1378 @@ -2275,7 +2303,9 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
1379 tocopy = sb->s_blocksize - offset < towrite ?
1380 sb->s_blocksize - offset : towrite;
1381 tmp_bh.b_state = 0;
1382 + reiserfs_write_lock(sb);
1383 err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE);
1384 + reiserfs_write_unlock(sb);
1385 if (err)
1386 goto out;
1387 if (offset || tocopy != sb->s_blocksize)
1388 @@ -2291,10 +2321,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
1389 flush_dcache_page(bh->b_page);
1390 set_buffer_uptodate(bh);
1391 unlock_buffer(bh);
1392 + reiserfs_write_lock(sb);
1393 reiserfs_prepare_for_journal(sb, bh, 1);
1394 journal_mark_dirty(current->journal_info, sb, bh);
1395 if (!journal_quota)
1396 reiserfs_add_ordered_list(inode, bh);
1397 + reiserfs_write_unlock(sb);
1398 brelse(bh);
1399 offset = 0;
1400 towrite -= tocopy;
1401 diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
1402 index 2559d17..5dc48ca 100644
1403 --- a/fs/ubifs/find.c
1404 +++ b/fs/ubifs/find.c
1405 @@ -681,8 +681,16 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c)
1406 if (!lprops) {
1407 lprops = ubifs_fast_find_freeable(c);
1408 if (!lprops) {
1409 - ubifs_assert(c->freeable_cnt == 0);
1410 - if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
1411 + /*
1412 + * The first condition means the following: go scan the
1413 + * LPT if there are uncategorized lprops, which means
1414 + * there may be freeable LEBs there (UBIFS does not
1415 + * store the information about freeable LEBs in the
1416 + * master node).
1417 + */
1418 + if (c->in_a_category_cnt != c->main_lebs ||
1419 + c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
1420 + ubifs_assert(c->freeable_cnt == 0);
1421 lprops = scan_for_leb_for_idx(c);
1422 if (IS_ERR(lprops)) {
1423 err = PTR_ERR(lprops);
1424 diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
1425 index f8a181e..ea9d491 100644
1426 --- a/fs/ubifs/lprops.c
1427 +++ b/fs/ubifs/lprops.c
1428 @@ -300,8 +300,11 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
1429 default:
1430 ubifs_assert(0);
1431 }
1432 +
1433 lprops->flags &= ~LPROPS_CAT_MASK;
1434 lprops->flags |= cat;
1435 + c->in_a_category_cnt += 1;
1436 + ubifs_assert(c->in_a_category_cnt <= c->main_lebs);
1437 }
1438
1439 /**
1440 @@ -334,6 +337,9 @@ static void ubifs_remove_from_cat(struct ubifs_info *c,
1441 default:
1442 ubifs_assert(0);
1443 }
1444 +
1445 + c->in_a_category_cnt -= 1;
1446 + ubifs_assert(c->in_a_category_cnt >= 0);
1447 }
1448
1449 /**
1450 diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
1451 index 93d59ac..4971cb2 100644
1452 --- a/fs/ubifs/ubifs.h
1453 +++ b/fs/ubifs/ubifs.h
1454 @@ -1184,6 +1184,8 @@ struct ubifs_debug_info;
1455 * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size)
1456 * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size)
1457 * @freeable_cnt: number of freeable LEBs in @freeable_list
1458 + * @in_a_category_cnt: count of lprops which are in a certain category, which
1459 + * basically meants that they were loaded from the flash
1460 *
1461 * @ltab_lnum: LEB number of LPT's own lprops table
1462 * @ltab_offs: offset of LPT's own lprops table
1463 @@ -1413,6 +1415,7 @@ struct ubifs_info {
1464 struct list_head freeable_list;
1465 struct list_head frdi_idx_list;
1466 int freeable_cnt;
1467 + int in_a_category_cnt;
1468
1469 int ltab_lnum;
1470 int ltab_offs;
1471 diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
1472 index 6819b51..bb76128 100644
1473 --- a/fs/xfs/xfs_buf.c
1474 +++ b/fs/xfs/xfs_buf.c
1475 @@ -1165,9 +1165,14 @@ xfs_buf_bio_end_io(
1476 {
1477 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
1478
1479 - xfs_buf_ioerror(bp, -error);
1480 + /*
1481 + * don't overwrite existing errors - otherwise we can lose errors on
1482 + * buffers that require multiple bios to complete.
1483 + */
1484 + if (!bp->b_error)
1485 + xfs_buf_ioerror(bp, -error);
1486
1487 - if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1488 + if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1489 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1490
1491 _xfs_buf_ioend(bp, 1);
1492 @@ -1243,6 +1248,11 @@ next_chunk:
1493 if (size)
1494 goto next_chunk;
1495 } else {
1496 + /*
1497 + * This is guaranteed not to be the last io reference count
1498 + * because the caller (xfs_buf_iorequest) holds a count itself.
1499 + */
1500 + atomic_dec(&bp->b_io_remaining);
1501 xfs_buf_ioerror(bp, EIO);
1502 bio_put(bio);
1503 }
1504 diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
1505 index aa13392..d4080f3 100644
1506 --- a/include/linux/ceph/auth.h
1507 +++ b/include/linux/ceph/auth.h
1508 @@ -14,6 +14,14 @@
1509 struct ceph_auth_client;
1510 struct ceph_authorizer;
1511
1512 +struct ceph_auth_handshake {
1513 + struct ceph_authorizer *authorizer;
1514 + void *authorizer_buf;
1515 + size_t authorizer_buf_len;
1516 + void *authorizer_reply_buf;
1517 + size_t authorizer_reply_buf_len;
1518 +};
1519 +
1520 struct ceph_auth_client_ops {
1521 const char *name;
1522
1523 @@ -43,9 +51,7 @@ struct ceph_auth_client_ops {
1524 * the response to authenticate the service.
1525 */
1526 int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type,
1527 - struct ceph_authorizer **a,
1528 - void **buf, size_t *len,
1529 - void **reply_buf, size_t *reply_len);
1530 + struct ceph_auth_handshake *auth);
1531 int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
1532 struct ceph_authorizer *a, size_t len);
1533 void (*destroy_authorizer)(struct ceph_auth_client *ac,
1534 diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
1535 index e71d683..98ec36a 100644
1536 --- a/include/linux/ceph/libceph.h
1537 +++ b/include/linux/ceph/libceph.h
1538 @@ -132,7 +132,7 @@ struct ceph_client {
1539 u32 supported_features;
1540 u32 required_features;
1541
1542 - struct ceph_messenger *msgr; /* messenger instance */
1543 + struct ceph_messenger msgr; /* messenger instance */
1544 struct ceph_mon_client monc;
1545 struct ceph_osd_client osdc;
1546
1547 diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
1548 index 3bff047..189ae06 100644
1549 --- a/include/linux/ceph/messenger.h
1550 +++ b/include/linux/ceph/messenger.h
1551 @@ -25,15 +25,12 @@ struct ceph_connection_operations {
1552 void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m);
1553
1554 /* authorize an outgoing connection */
1555 - int (*get_authorizer) (struct ceph_connection *con,
1556 - void **buf, int *len, int *proto,
1557 - void **reply_buf, int *reply_len, int force_new);
1558 + struct ceph_auth_handshake *(*get_authorizer) (
1559 + struct ceph_connection *con,
1560 + int *proto, int force_new);
1561 int (*verify_authorizer_reply) (struct ceph_connection *con, int len);
1562 int (*invalidate_authorizer)(struct ceph_connection *con);
1563
1564 - /* protocol version mismatch */
1565 - void (*bad_proto) (struct ceph_connection *con);
1566 -
1567 /* there was some error on the socket (disconnect, whatever) */
1568 void (*fault) (struct ceph_connection *con);
1569
1570 @@ -53,6 +50,7 @@ struct ceph_messenger {
1571 struct ceph_entity_inst inst; /* my name+address */
1572 struct ceph_entity_addr my_enc_addr;
1573
1574 + atomic_t stopping;
1575 bool nocrc;
1576
1577 /*
1578 @@ -80,7 +78,10 @@ struct ceph_msg {
1579 unsigned nr_pages; /* size of page array */
1580 unsigned page_alignment; /* io offset in first page */
1581 struct ceph_pagelist *pagelist; /* instead of pages */
1582 +
1583 + struct ceph_connection *con;
1584 struct list_head list_head;
1585 +
1586 struct kref kref;
1587 struct bio *bio; /* instead of pages/pagelist */
1588 struct bio *bio_iter; /* bio iterator */
1589 @@ -106,23 +107,6 @@ struct ceph_msg_pos {
1590 #define MAX_DELAY_INTERVAL (5 * 60 * HZ)
1591
1592 /*
1593 - * ceph_connection state bit flags
1594 - */
1595 -#define LOSSYTX 0 /* we can close channel or drop messages on errors */
1596 -#define CONNECTING 1
1597 -#define NEGOTIATING 2
1598 -#define KEEPALIVE_PENDING 3
1599 -#define WRITE_PENDING 4 /* we have data ready to send */
1600 -#define STANDBY 8 /* no outgoing messages, socket closed. we keep
1601 - * the ceph_connection around to maintain shared
1602 - * state with the peer. */
1603 -#define CLOSED 10 /* we've closed the connection */
1604 -#define SOCK_CLOSED 11 /* socket state changed to closed */
1605 -#define OPENING 13 /* open connection w/ (possibly new) peer */
1606 -#define DEAD 14 /* dead, about to kfree */
1607 -#define BACKOFF 15
1608 -
1609 -/*
1610 * A single connection with another host.
1611 *
1612 * We maintain a queue of outgoing messages, and some session state to
1613 @@ -131,18 +115,22 @@ struct ceph_msg_pos {
1614 */
1615 struct ceph_connection {
1616 void *private;
1617 - atomic_t nref;
1618
1619 const struct ceph_connection_operations *ops;
1620
1621 struct ceph_messenger *msgr;
1622 +
1623 + atomic_t sock_state;
1624 struct socket *sock;
1625 - unsigned long state; /* connection state (see flags above) */
1626 + struct ceph_entity_addr peer_addr; /* peer address */
1627 + struct ceph_entity_addr peer_addr_for_me;
1628 +
1629 + unsigned long flags;
1630 + unsigned long state;
1631 const char *error_msg; /* error message, if any */
1632
1633 - struct ceph_entity_addr peer_addr; /* peer address */
1634 struct ceph_entity_name peer_name; /* peer name */
1635 - struct ceph_entity_addr peer_addr_for_me;
1636 +
1637 unsigned peer_features;
1638 u32 connect_seq; /* identify the most recent connection
1639 attempt for this connection, client */
1640 @@ -163,16 +151,8 @@ struct ceph_connection {
1641
1642 /* connection negotiation temps */
1643 char in_banner[CEPH_BANNER_MAX_LEN];
1644 - union {
1645 - struct { /* outgoing connection */
1646 - struct ceph_msg_connect out_connect;
1647 - struct ceph_msg_connect_reply in_reply;
1648 - };
1649 - struct { /* incoming */
1650 - struct ceph_msg_connect in_connect;
1651 - struct ceph_msg_connect_reply out_reply;
1652 - };
1653 - };
1654 + struct ceph_msg_connect out_connect;
1655 + struct ceph_msg_connect_reply in_reply;
1656 struct ceph_entity_addr actual_peer_addr;
1657
1658 /* message out temps */
1659 @@ -215,24 +195,26 @@ extern int ceph_msgr_init(void);
1660 extern void ceph_msgr_exit(void);
1661 extern void ceph_msgr_flush(void);
1662
1663 -extern struct ceph_messenger *ceph_messenger_create(
1664 - struct ceph_entity_addr *myaddr,
1665 - u32 features, u32 required);
1666 -extern void ceph_messenger_destroy(struct ceph_messenger *);
1667 +extern void ceph_messenger_init(struct ceph_messenger *msgr,
1668 + struct ceph_entity_addr *myaddr,
1669 + u32 supported_features,
1670 + u32 required_features,
1671 + bool nocrc);
1672
1673 -extern void ceph_con_init(struct ceph_messenger *msgr,
1674 - struct ceph_connection *con);
1675 +extern void ceph_con_init(struct ceph_connection *con, void *private,
1676 + const struct ceph_connection_operations *ops,
1677 + struct ceph_messenger *msgr);
1678 extern void ceph_con_open(struct ceph_connection *con,
1679 + __u8 entity_type, __u64 entity_num,
1680 struct ceph_entity_addr *addr);
1681 extern bool ceph_con_opened(struct ceph_connection *con);
1682 extern void ceph_con_close(struct ceph_connection *con);
1683 extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
1684 -extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
1685 -extern void ceph_con_revoke_message(struct ceph_connection *con,
1686 - struct ceph_msg *msg);
1687 +
1688 +extern void ceph_msg_revoke(struct ceph_msg *msg);
1689 +extern void ceph_msg_revoke_incoming(struct ceph_msg *msg);
1690 +
1691 extern void ceph_con_keepalive(struct ceph_connection *con);
1692 -extern struct ceph_connection *ceph_con_get(struct ceph_connection *con);
1693 -extern void ceph_con_put(struct ceph_connection *con);
1694
1695 extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
1696 bool can_fail);
1697 diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
1698 index 545f859..2113e38 100644
1699 --- a/include/linux/ceph/mon_client.h
1700 +++ b/include/linux/ceph/mon_client.h
1701 @@ -70,7 +70,7 @@ struct ceph_mon_client {
1702 bool hunting;
1703 int cur_mon; /* last monitor i contacted */
1704 unsigned long sub_sent, sub_renew_after;
1705 - struct ceph_connection *con;
1706 + struct ceph_connection con;
1707 bool have_fsid;
1708
1709 /* pending generic requests */
1710 diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h
1711 index a362605..09fa96b 100644
1712 --- a/include/linux/ceph/msgpool.h
1713 +++ b/include/linux/ceph/msgpool.h
1714 @@ -11,10 +11,11 @@
1715 struct ceph_msgpool {
1716 const char *name;
1717 mempool_t *pool;
1718 + int type; /* preallocated message type */
1719 int front_len; /* preallocated payload size */
1720 };
1721
1722 -extern int ceph_msgpool_init(struct ceph_msgpool *pool,
1723 +extern int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
1724 int front_len, int size, bool blocking,
1725 const char *name);
1726 extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
1727 diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
1728 index 7c05ac2..d9b880e 100644
1729 --- a/include/linux/ceph/osd_client.h
1730 +++ b/include/linux/ceph/osd_client.h
1731 @@ -6,9 +6,10 @@
1732 #include <linux/mempool.h>
1733 #include <linux/rbtree.h>
1734
1735 -#include "types.h"
1736 -#include "osdmap.h"
1737 -#include "messenger.h"
1738 +#include <linux/ceph/types.h>
1739 +#include <linux/ceph/osdmap.h>
1740 +#include <linux/ceph/messenger.h>
1741 +#include <linux/ceph/auth.h>
1742
1743 /*
1744 * Maximum object name size
1745 @@ -40,9 +41,7 @@ struct ceph_osd {
1746 struct list_head o_requests;
1747 struct list_head o_linger_requests;
1748 struct list_head o_osd_lru;
1749 - struct ceph_authorizer *o_authorizer;
1750 - void *o_authorizer_buf, *o_authorizer_reply_buf;
1751 - size_t o_authorizer_buf_len, o_authorizer_reply_buf_len;
1752 + struct ceph_auth_handshake o_auth;
1753 unsigned long lru_ttl;
1754 int o_marked_for_keepalive;
1755 struct list_head o_keepalive_item;
1756 @@ -208,7 +207,7 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
1757 extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
1758 struct ceph_msg *msg);
1759
1760 -extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
1761 +extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
1762 struct ceph_file_layout *layout,
1763 u64 snapid,
1764 u64 off, u64 *plen, u64 *bno,
1765 diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
1766 index ba4c205..11db454 100644
1767 --- a/include/linux/ceph/osdmap.h
1768 +++ b/include/linux/ceph/osdmap.h
1769 @@ -111,9 +111,9 @@ extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
1770 extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
1771
1772 /* calculate mapping of a file extent to an object */
1773 -extern void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
1774 - u64 off, u64 *plen,
1775 - u64 *bno, u64 *oxoff, u64 *oxlen);
1776 +extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
1777 + u64 off, u64 *plen,
1778 + u64 *bno, u64 *oxoff, u64 *oxlen);
1779
1780 /* calculate mapping of object to a placement group */
1781 extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
1782 diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
1783 index 97e435b..e7a8c90 100644
1784 --- a/include/linux/crush/crush.h
1785 +++ b/include/linux/crush/crush.h
1786 @@ -168,7 +168,7 @@ struct crush_map {
1787
1788
1789 /* crush.c */
1790 -extern int crush_get_bucket_item_weight(struct crush_bucket *b, int pos);
1791 +extern int crush_get_bucket_item_weight(const struct crush_bucket *b, int pos);
1792 extern void crush_calc_parents(struct crush_map *map);
1793 extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
1794 extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
1795 @@ -177,4 +177,9 @@ extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
1796 extern void crush_destroy_bucket(struct crush_bucket *b);
1797 extern void crush_destroy(struct crush_map *map);
1798
1799 +static inline int crush_calc_tree_node(int i)
1800 +{
1801 + return ((i+1) << 1)-1;
1802 +}
1803 +
1804 #endif
1805 diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
1806 index c46b99c..9322ab8 100644
1807 --- a/include/linux/crush/mapper.h
1808 +++ b/include/linux/crush/mapper.h
1809 @@ -10,11 +10,11 @@
1810
1811 #include "crush.h"
1812
1813 -extern int crush_find_rule(struct crush_map *map, int pool, int type, int size);
1814 -extern int crush_do_rule(struct crush_map *map,
1815 +extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size);
1816 +extern int crush_do_rule(const struct crush_map *map,
1817 int ruleno,
1818 int x, int *result, int result_max,
1819 int forcefeed, /* -1 for none */
1820 - __u32 *weights);
1821 + const __u32 *weights);
1822
1823 #endif
1824 diff --git a/kernel/module.c b/kernel/module.c
1825 index 61ea75e..8597217 100644
1826 --- a/kernel/module.c
1827 +++ b/kernel/module.c
1828 @@ -2273,12 +2273,17 @@ static void layout_symtab(struct module *mod, struct load_info *info)
1829 src = (void *)info->hdr + symsect->sh_offset;
1830 nsrc = symsect->sh_size / sizeof(*src);
1831
1832 + /* strtab always starts with a nul, so offset 0 is the empty string. */
1833 + strtab_size = 1;
1834 +
1835 /* Compute total space required for the core symbols' strtab. */
1836 - for (ndst = i = strtab_size = 1; i < nsrc; ++i, ++src)
1837 - if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) {
1838 - strtab_size += strlen(&info->strtab[src->st_name]) + 1;
1839 + for (ndst = i = 0; i < nsrc; i++) {
1840 + if (i == 0 ||
1841 + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
1842 + strtab_size += strlen(&info->strtab[src[i].st_name])+1;
1843 ndst++;
1844 }
1845 + }
1846
1847 /* Append room for core symbols at end of core part. */
1848 info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
1849 @@ -2312,15 +2317,15 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
1850 mod->core_symtab = dst = mod->module_core + info->symoffs;
1851 mod->core_strtab = s = mod->module_core + info->stroffs;
1852 src = mod->symtab;
1853 - *dst = *src;
1854 *s++ = 0;
1855 - for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
1856 - if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum))
1857 - continue;
1858 -
1859 - dst[ndst] = *src;
1860 - dst[ndst++].st_name = s - mod->core_strtab;
1861 - s += strlcpy(s, &mod->strtab[src->st_name], KSYM_NAME_LEN) + 1;
1862 + for (ndst = i = 0; i < mod->num_symtab; i++) {
1863 + if (i == 0 ||
1864 + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
1865 + dst[ndst] = src[i];
1866 + dst[ndst++].st_name = s - mod->core_strtab;
1867 + s += strlcpy(s, &mod->strtab[src[i].st_name],
1868 + KSYM_NAME_LEN) + 1;
1869 + }
1870 }
1871 mod->core_num_syms = ndst;
1872 }
1873 diff --git a/mm/memcontrol.c b/mm/memcontrol.c
1874 index 7685d4a..81c275b 100644
1875 --- a/mm/memcontrol.c
1876 +++ b/mm/memcontrol.c
1877 @@ -1489,17 +1489,26 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)
1878 u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
1879 {
1880 u64 limit;
1881 - u64 memsw;
1882
1883 limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
1884 - limit += total_swap_pages << PAGE_SHIFT;
1885
1886 - memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
1887 /*
1888 - * If memsw is finite and limits the amount of swap space available
1889 - * to this memcg, return that limit.
1890 + * Do not consider swap space if we cannot swap due to swappiness
1891 */
1892 - return min(limit, memsw);
1893 + if (mem_cgroup_swappiness(memcg)) {
1894 + u64 memsw;
1895 +
1896 + limit += total_swap_pages << PAGE_SHIFT;
1897 + memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
1898 +
1899 + /*
1900 + * If memsw is finite and limits the amount of swap space
1901 + * available to this memcg, return that limit.
1902 + */
1903 + limit = min(limit, memsw);
1904 + }
1905 +
1906 + return limit;
1907 }
1908
1909 static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
1910 diff --git a/mm/shmem.c b/mm/shmem.c
1911 index 40383cd..a859b06 100644
1912 --- a/mm/shmem.c
1913 +++ b/mm/shmem.c
1914 @@ -595,7 +595,7 @@ static void shmem_evict_inode(struct inode *inode)
1915 kfree(xattr->name);
1916 kfree(xattr);
1917 }
1918 - BUG_ON(inode->i_blocks);
1919 + WARN_ON(inode->i_blocks);
1920 shmem_free_inode(inode->i_sb);
1921 end_writeback(inode);
1922 }
1923 diff --git a/mm/vmscan.c b/mm/vmscan.c
1924 index e989ee2..e6ca505 100644
1925 --- a/mm/vmscan.c
1926 +++ b/mm/vmscan.c
1927 @@ -3128,6 +3128,8 @@ static int kswapd(void *p)
1928 &balanced_classzone_idx);
1929 }
1930 }
1931 +
1932 + current->reclaim_state = NULL;
1933 return 0;
1934 }
1935
1936 diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
1937 index 214c2bb..925ca58 100644
1938 --- a/net/ceph/auth_none.c
1939 +++ b/net/ceph/auth_none.c
1940 @@ -59,9 +59,7 @@ static int handle_reply(struct ceph_auth_client *ac, int result,
1941 */
1942 static int ceph_auth_none_create_authorizer(
1943 struct ceph_auth_client *ac, int peer_type,
1944 - struct ceph_authorizer **a,
1945 - void **buf, size_t *len,
1946 - void **reply_buf, size_t *reply_len)
1947 + struct ceph_auth_handshake *auth)
1948 {
1949 struct ceph_auth_none_info *ai = ac->private;
1950 struct ceph_none_authorizer *au = &ai->au;
1951 @@ -82,11 +80,12 @@ static int ceph_auth_none_create_authorizer(
1952 dout("built authorizer len %d\n", au->buf_len);
1953 }
1954
1955 - *a = (struct ceph_authorizer *)au;
1956 - *buf = au->buf;
1957 - *len = au->buf_len;
1958 - *reply_buf = au->reply_buf;
1959 - *reply_len = sizeof(au->reply_buf);
1960 + auth->authorizer = (struct ceph_authorizer *) au;
1961 + auth->authorizer_buf = au->buf;
1962 + auth->authorizer_buf_len = au->buf_len;
1963 + auth->authorizer_reply_buf = au->reply_buf;
1964 + auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
1965 +
1966 return 0;
1967
1968 bad2:
1969 diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
1970 index 1587dc6..a16bf14 100644
1971 --- a/net/ceph/auth_x.c
1972 +++ b/net/ceph/auth_x.c
1973 @@ -526,9 +526,7 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
1974
1975 static int ceph_x_create_authorizer(
1976 struct ceph_auth_client *ac, int peer_type,
1977 - struct ceph_authorizer **a,
1978 - void **buf, size_t *len,
1979 - void **reply_buf, size_t *reply_len)
1980 + struct ceph_auth_handshake *auth)
1981 {
1982 struct ceph_x_authorizer *au;
1983 struct ceph_x_ticket_handler *th;
1984 @@ -548,11 +546,12 @@ static int ceph_x_create_authorizer(
1985 return ret;
1986 }
1987
1988 - *a = (struct ceph_authorizer *)au;
1989 - *buf = au->buf->vec.iov_base;
1990 - *len = au->buf->vec.iov_len;
1991 - *reply_buf = au->reply_buf;
1992 - *reply_len = sizeof(au->reply_buf);
1993 + auth->authorizer = (struct ceph_authorizer *) au;
1994 + auth->authorizer_buf = au->buf->vec.iov_base;
1995 + auth->authorizer_buf_len = au->buf->vec.iov_len;
1996 + auth->authorizer_reply_buf = au->reply_buf;
1997 + auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
1998 +
1999 return 0;
2000 }
2001
2002 diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
2003 index cc91319..8e74e8c 100644
2004 --- a/net/ceph/ceph_common.c
2005 +++ b/net/ceph/ceph_common.c
2006 @@ -83,7 +83,6 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
2007 return -1;
2008 }
2009 } else {
2010 - pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid);
2011 memcpy(&client->fsid, fsid, sizeof(*fsid));
2012 }
2013 return 0;
2014 @@ -468,19 +467,15 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
2015 /* msgr */
2016 if (ceph_test_opt(client, MYIP))
2017 myaddr = &client->options->my_addr;
2018 - client->msgr = ceph_messenger_create(myaddr,
2019 - client->supported_features,
2020 - client->required_features);
2021 - if (IS_ERR(client->msgr)) {
2022 - err = PTR_ERR(client->msgr);
2023 - goto fail;
2024 - }
2025 - client->msgr->nocrc = ceph_test_opt(client, NOCRC);
2026 + ceph_messenger_init(&client->msgr, myaddr,
2027 + client->supported_features,
2028 + client->required_features,
2029 + ceph_test_opt(client, NOCRC));
2030
2031 /* subsystems */
2032 err = ceph_monc_init(&client->monc, client);
2033 if (err < 0)
2034 - goto fail_msgr;
2035 + goto fail;
2036 err = ceph_osdc_init(&client->osdc, client);
2037 if (err < 0)
2038 goto fail_monc;
2039 @@ -489,8 +484,6 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
2040
2041 fail_monc:
2042 ceph_monc_stop(&client->monc);
2043 -fail_msgr:
2044 - ceph_messenger_destroy(client->msgr);
2045 fail:
2046 kfree(client);
2047 return ERR_PTR(err);
2048 @@ -501,22 +494,15 @@ void ceph_destroy_client(struct ceph_client *client)
2049 {
2050 dout("destroy_client %p\n", client);
2051
2052 + atomic_set(&client->msgr.stopping, 1);
2053 +
2054 /* unmount */
2055 ceph_osdc_stop(&client->osdc);
2056
2057 - /*
2058 - * make sure osd connections close out before destroying the
2059 - * auth module, which is needed to free those connections'
2060 - * ceph_authorizers.
2061 - */
2062 - ceph_msgr_flush();
2063 -
2064 ceph_monc_stop(&client->monc);
2065
2066 ceph_debugfs_client_cleanup(client);
2067
2068 - ceph_messenger_destroy(client->msgr);
2069 -
2070 ceph_destroy_options(client->options);
2071
2072 kfree(client);
2073 diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
2074 index d6ebb13..fbda052 100644
2075 --- a/net/ceph/crush/crush.c
2076 +++ b/net/ceph/crush/crush.c
2077 @@ -26,9 +26,9 @@ const char *crush_bucket_alg_name(int alg)
2078 * @b: bucket pointer
2079 * @p: item index in bucket
2080 */
2081 -int crush_get_bucket_item_weight(struct crush_bucket *b, int p)
2082 +int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
2083 {
2084 - if (p >= b->size)
2085 + if ((__u32)p >= b->size)
2086 return 0;
2087
2088 switch (b->alg) {
2089 @@ -37,9 +37,7 @@ int crush_get_bucket_item_weight(struct crush_bucket *b, int p)
2090 case CRUSH_BUCKET_LIST:
2091 return ((struct crush_bucket_list *)b)->item_weights[p];
2092 case CRUSH_BUCKET_TREE:
2093 - if (p & 1)
2094 - return ((struct crush_bucket_tree *)b)->node_weights[p];
2095 - return 0;
2096 + return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
2097 case CRUSH_BUCKET_STRAW:
2098 return ((struct crush_bucket_straw *)b)->item_weights[p];
2099 }
2100 @@ -87,6 +85,8 @@ void crush_destroy_bucket_list(struct crush_bucket_list *b)
2101
2102 void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
2103 {
2104 + kfree(b->h.perm);
2105 + kfree(b->h.items);
2106 kfree(b->node_weights);
2107 kfree(b);
2108 }
2109 @@ -124,10 +124,9 @@ void crush_destroy_bucket(struct crush_bucket *b)
2110 */
2111 void crush_destroy(struct crush_map *map)
2112 {
2113 - int b;
2114 -
2115 /* buckets */
2116 if (map->buckets) {
2117 + __s32 b;
2118 for (b = 0; b < map->max_buckets; b++) {
2119 if (map->buckets[b] == NULL)
2120 continue;
2121 @@ -138,6 +137,7 @@ void crush_destroy(struct crush_map *map)
2122
2123 /* rules */
2124 if (map->rules) {
2125 + __u32 b;
2126 for (b = 0; b < map->max_rules; b++)
2127 kfree(map->rules[b]);
2128 kfree(map->rules);
2129 diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
2130 index b79747c..00baad5 100644
2131 --- a/net/ceph/crush/mapper.c
2132 +++ b/net/ceph/crush/mapper.c
2133 @@ -32,9 +32,9 @@
2134 * @type: storage ruleset type (user defined)
2135 * @size: output set size
2136 */
2137 -int crush_find_rule(struct crush_map *map, int ruleset, int type, int size)
2138 +int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size)
2139 {
2140 - int i;
2141 + __u32 i;
2142
2143 for (i = 0; i < map->max_rules; i++) {
2144 if (map->rules[i] &&
2145 @@ -72,7 +72,7 @@ static int bucket_perm_choose(struct crush_bucket *bucket,
2146 unsigned i, s;
2147
2148 /* start a new permutation if @x has changed */
2149 - if (bucket->perm_x != x || bucket->perm_n == 0) {
2150 + if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) {
2151 dprintk("bucket %d new x=%d\n", bucket->id, x);
2152 bucket->perm_x = x;
2153
2154 @@ -152,8 +152,8 @@ static int bucket_list_choose(struct crush_bucket_list *bucket,
2155 return bucket->h.items[i];
2156 }
2157
2158 - BUG_ON(1);
2159 - return 0;
2160 + dprintk("bad list sums for bucket %d\n", bucket->h.id);
2161 + return bucket->h.items[0];
2162 }
2163
2164
2165 @@ -219,7 +219,7 @@ static int bucket_tree_choose(struct crush_bucket_tree *bucket,
2166 static int bucket_straw_choose(struct crush_bucket_straw *bucket,
2167 int x, int r)
2168 {
2169 - int i;
2170 + __u32 i;
2171 int high = 0;
2172 __u64 high_draw = 0;
2173 __u64 draw;
2174 @@ -239,6 +239,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
2175 static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
2176 {
2177 dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
2178 + BUG_ON(in->size == 0);
2179 switch (in->alg) {
2180 case CRUSH_BUCKET_UNIFORM:
2181 return bucket_uniform_choose((struct crush_bucket_uniform *)in,
2182 @@ -253,7 +254,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
2183 return bucket_straw_choose((struct crush_bucket_straw *)in,
2184 x, r);
2185 default:
2186 - BUG_ON(1);
2187 + dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
2188 return in->items[0];
2189 }
2190 }
2191 @@ -262,7 +263,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
2192 * true if device is marked "out" (failed, fully offloaded)
2193 * of the cluster
2194 */
2195 -static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
2196 +static int is_out(const struct crush_map *map, const __u32 *weight, int item, int x)
2197 {
2198 if (weight[item] >= 0x10000)
2199 return 0;
2200 @@ -287,16 +288,16 @@ static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
2201 * @recurse_to_leaf: true if we want one device under each item of given type
2202 * @out2: second output vector for leaf items (if @recurse_to_leaf)
2203 */
2204 -static int crush_choose(struct crush_map *map,
2205 +static int crush_choose(const struct crush_map *map,
2206 struct crush_bucket *bucket,
2207 - __u32 *weight,
2208 + const __u32 *weight,
2209 int x, int numrep, int type,
2210 int *out, int outpos,
2211 int firstn, int recurse_to_leaf,
2212 int *out2)
2213 {
2214 int rep;
2215 - int ftotal, flocal;
2216 + unsigned int ftotal, flocal;
2217 int retry_descent, retry_bucket, skip_rep;
2218 struct crush_bucket *in = bucket;
2219 int r;
2220 @@ -304,7 +305,7 @@ static int crush_choose(struct crush_map *map,
2221 int item = 0;
2222 int itemtype;
2223 int collide, reject;
2224 - const int orig_tries = 5; /* attempts before we fall back to search */
2225 + const unsigned int orig_tries = 5; /* attempts before we fall back to search */
2226
2227 dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
2228 bucket->id, x, outpos, numrep);
2229 @@ -325,7 +326,7 @@ static int crush_choose(struct crush_map *map,
2230 r = rep;
2231 if (in->alg == CRUSH_BUCKET_UNIFORM) {
2232 /* be careful */
2233 - if (firstn || numrep >= in->size)
2234 + if (firstn || (__u32)numrep >= in->size)
2235 /* r' = r + f_total */
2236 r += ftotal;
2237 else if (in->size % numrep == 0)
2238 @@ -354,7 +355,11 @@ static int crush_choose(struct crush_map *map,
2239 item = bucket_perm_choose(in, x, r);
2240 else
2241 item = crush_bucket_choose(in, x, r);
2242 - BUG_ON(item >= map->max_devices);
2243 + if (item >= map->max_devices) {
2244 + dprintk(" bad item %d\n", item);
2245 + skip_rep = 1;
2246 + break;
2247 + }
2248
2249 /* desired type? */
2250 if (item < 0)
2251 @@ -365,8 +370,12 @@ static int crush_choose(struct crush_map *map,
2252
2253 /* keep going? */
2254 if (itemtype != type) {
2255 - BUG_ON(item >= 0 ||
2256 - (-1-item) >= map->max_buckets);
2257 + if (item >= 0 ||
2258 + (-1-item) >= map->max_buckets) {
2259 + dprintk(" bad item type %d\n", type);
2260 + skip_rep = 1;
2261 + break;
2262 + }
2263 in = map->buckets[-1-item];
2264 retry_bucket = 1;
2265 continue;
2266 @@ -415,7 +424,7 @@ reject:
2267 if (collide && flocal < 3)
2268 /* retry locally a few times */
2269 retry_bucket = 1;
2270 - else if (flocal < in->size + orig_tries)
2271 + else if (flocal <= in->size + orig_tries)
2272 /* exhaustive bucket search */
2273 retry_bucket = 1;
2274 else if (ftotal < 20)
2275 @@ -425,7 +434,7 @@ reject:
2276 /* else give up */
2277 skip_rep = 1;
2278 dprintk(" reject %d collide %d "
2279 - "ftotal %d flocal %d\n",
2280 + "ftotal %u flocal %u\n",
2281 reject, collide, ftotal,
2282 flocal);
2283 }
2284 @@ -456,9 +465,9 @@ reject:
2285 * @result_max: maximum result size
2286 * @force: force initial replica choice; -1 for none
2287 */
2288 -int crush_do_rule(struct crush_map *map,
2289 +int crush_do_rule(const struct crush_map *map,
2290 int ruleno, int x, int *result, int result_max,
2291 - int force, __u32 *weight)
2292 + int force, const __u32 *weight)
2293 {
2294 int result_len;
2295 int force_context[CRUSH_MAX_DEPTH];
2296 @@ -473,12 +482,15 @@ int crush_do_rule(struct crush_map *map,
2297 int osize;
2298 int *tmp;
2299 struct crush_rule *rule;
2300 - int step;
2301 + __u32 step;
2302 int i, j;
2303 int numrep;
2304 int firstn;
2305
2306 - BUG_ON(ruleno >= map->max_rules);
2307 + if ((__u32)ruleno >= map->max_rules) {
2308 + dprintk(" bad ruleno %d\n", ruleno);
2309 + return 0;
2310 + }
2311
2312 rule = map->rules[ruleno];
2313 result_len = 0;
2314 @@ -488,7 +500,8 @@ int crush_do_rule(struct crush_map *map,
2315 /*
2316 * determine hierarchical context of force, if any. note
2317 * that this may or may not correspond to the specific types
2318 - * referenced by the crush rule.
2319 + * referenced by the crush rule. it will also only affect
2320 + * the first descent (TAKE).
2321 */
2322 if (force >= 0 &&
2323 force < map->max_devices &&
2324 @@ -527,7 +540,8 @@ int crush_do_rule(struct crush_map *map,
2325 firstn = 1;
2326 case CRUSH_RULE_CHOOSE_LEAF_INDEP:
2327 case CRUSH_RULE_CHOOSE_INDEP:
2328 - BUG_ON(wsize == 0);
2329 + if (wsize == 0)
2330 + break;
2331
2332 recurse_to_leaf =
2333 rule->steps[step].op ==
2334 @@ -596,7 +610,9 @@ int crush_do_rule(struct crush_map *map,
2335 break;
2336
2337 default:
2338 - BUG_ON(1);
2339 + dprintk(" unknown op %d at step %d\n",
2340 + curstep->op, step);
2341 + break;
2342 }
2343 }
2344 return result_len;
2345 diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
2346 index b780cb7..9da7fdd 100644
2347 --- a/net/ceph/crypto.c
2348 +++ b/net/ceph/crypto.c
2349 @@ -466,6 +466,7 @@ void ceph_key_destroy(struct key *key) {
2350 struct ceph_crypto_key *ckey = key->payload.data;
2351
2352 ceph_crypto_key_destroy(ckey);
2353 + kfree(ckey);
2354 }
2355
2356 struct key_type key_type_ceph = {
2357 diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
2358 index 1919d15..3572dc5 100644
2359 --- a/net/ceph/crypto.h
2360 +++ b/net/ceph/crypto.h
2361 @@ -16,7 +16,8 @@ struct ceph_crypto_key {
2362
2363 static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
2364 {
2365 - kfree(key->key);
2366 + if (key)
2367 + kfree(key->key);
2368 }
2369
2370 extern int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
2371 diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
2372 index 27d4ea3..680978d 100644
2373 --- a/net/ceph/debugfs.c
2374 +++ b/net/ceph/debugfs.c
2375 @@ -189,6 +189,9 @@ int ceph_debugfs_client_init(struct ceph_client *client)
2376 snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
2377 client->monc.auth->global_id);
2378
2379 + dout("ceph_debugfs_client_init %p %s\n", client, name);
2380 +
2381 + BUG_ON(client->debugfs_dir);
2382 client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
2383 if (!client->debugfs_dir)
2384 goto out;
2385 @@ -234,6 +237,7 @@ out:
2386
2387 void ceph_debugfs_client_cleanup(struct ceph_client *client)
2388 {
2389 + dout("ceph_debugfs_client_cleanup %p\n", client);
2390 debugfs_remove(client->debugfs_osdmap);
2391 debugfs_remove(client->debugfs_monmap);
2392 debugfs_remove(client->osdc.debugfs_file);
2393 diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
2394 index f0993af..aa71a67 100644
2395 --- a/net/ceph/messenger.c
2396 +++ b/net/ceph/messenger.c
2397 @@ -29,6 +29,74 @@
2398 * the sender.
2399 */
2400
2401 +/*
2402 + * We track the state of the socket on a given connection using
2403 + * values defined below. The transition to a new socket state is
2404 + * handled by a function which verifies we aren't coming from an
2405 + * unexpected state.
2406 + *
2407 + * --------
2408 + * | NEW* | transient initial state
2409 + * --------
2410 + * | con_sock_state_init()
2411 + * v
2412 + * ----------
2413 + * | CLOSED | initialized, but no socket (and no
2414 + * ---------- TCP connection)
2415 + * ^ \
2416 + * | \ con_sock_state_connecting()
2417 + * | ----------------------
2418 + * | \
2419 + * + con_sock_state_closed() \
2420 + * |+--------------------------- \
2421 + * | \ \ \
2422 + * | ----------- \ \
2423 + * | | CLOSING | socket event; \ \
2424 + * | ----------- await close \ \
2425 + * | ^ \ |
2426 + * | | \ |
2427 + * | + con_sock_state_closing() \ |
2428 + * | / \ | |
2429 + * | / --------------- | |
2430 + * | / \ v v
2431 + * | / --------------
2432 + * | / -----------------| CONNECTING | socket created, TCP
2433 + * | | / -------------- connect initiated
2434 + * | | | con_sock_state_connected()
2435 + * | | v
2436 + * -------------
2437 + * | CONNECTED | TCP connection established
2438 + * -------------
2439 + *
2440 + * State values for ceph_connection->sock_state; NEW is assumed to be 0.
2441 + */
2442 +
2443 +#define CON_SOCK_STATE_NEW 0 /* -> CLOSED */
2444 +#define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */
2445 +#define CON_SOCK_STATE_CONNECTING 2 /* -> CONNECTED or -> CLOSING */
2446 +#define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED */
2447 +#define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */
2448 +
2449 +/*
2450 + * connection states
2451 + */
2452 +#define CON_STATE_CLOSED 1 /* -> PREOPEN */
2453 +#define CON_STATE_PREOPEN 2 /* -> CONNECTING, CLOSED */
2454 +#define CON_STATE_CONNECTING 3 /* -> NEGOTIATING, CLOSED */
2455 +#define CON_STATE_NEGOTIATING 4 /* -> OPEN, CLOSED */
2456 +#define CON_STATE_OPEN 5 /* -> STANDBY, CLOSED */
2457 +#define CON_STATE_STANDBY 6 /* -> PREOPEN, CLOSED */
2458 +
2459 +/*
2460 + * ceph_connection flag bits
2461 + */
2462 +#define CON_FLAG_LOSSYTX 0 /* we can close channel or drop
2463 + * messages on errors */
2464 +#define CON_FLAG_KEEPALIVE_PENDING 1 /* we need to send a keepalive */
2465 +#define CON_FLAG_WRITE_PENDING 2 /* we have data ready to send */
2466 +#define CON_FLAG_SOCK_CLOSED 3 /* socket state changed to closed */
2467 +#define CON_FLAG_BACKOFF 4 /* need to retry queuing delayed work */
2468 +
2469 /* static tag bytes (protocol control messages) */
2470 static char tag_msg = CEPH_MSGR_TAG_MSG;
2471 static char tag_ack = CEPH_MSGR_TAG_ACK;
2472 @@ -147,72 +215,130 @@ void ceph_msgr_flush(void)
2473 }
2474 EXPORT_SYMBOL(ceph_msgr_flush);
2475
2476 +/* Connection socket state transition functions */
2477 +
2478 +static void con_sock_state_init(struct ceph_connection *con)
2479 +{
2480 + int old_state;
2481 +
2482 + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
2483 + if (WARN_ON(old_state != CON_SOCK_STATE_NEW))
2484 + printk("%s: unexpected old state %d\n", __func__, old_state);
2485 + dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
2486 + CON_SOCK_STATE_CLOSED);
2487 +}
2488 +
2489 +static void con_sock_state_connecting(struct ceph_connection *con)
2490 +{
2491 + int old_state;
2492 +
2493 + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING);
2494 + if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED))
2495 + printk("%s: unexpected old state %d\n", __func__, old_state);
2496 + dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
2497 + CON_SOCK_STATE_CONNECTING);
2498 +}
2499 +
2500 +static void con_sock_state_connected(struct ceph_connection *con)
2501 +{
2502 + int old_state;
2503 +
2504 + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED);
2505 + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING))
2506 + printk("%s: unexpected old state %d\n", __func__, old_state);
2507 + dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
2508 + CON_SOCK_STATE_CONNECTED);
2509 +}
2510 +
2511 +static void con_sock_state_closing(struct ceph_connection *con)
2512 +{
2513 + int old_state;
2514 +
2515 + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING);
2516 + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING &&
2517 + old_state != CON_SOCK_STATE_CONNECTED &&
2518 + old_state != CON_SOCK_STATE_CLOSING))
2519 + printk("%s: unexpected old state %d\n", __func__, old_state);
2520 + dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
2521 + CON_SOCK_STATE_CLOSING);
2522 +}
2523 +
2524 +static void con_sock_state_closed(struct ceph_connection *con)
2525 +{
2526 + int old_state;
2527 +
2528 + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
2529 + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED &&
2530 + old_state != CON_SOCK_STATE_CLOSING &&
2531 + old_state != CON_SOCK_STATE_CONNECTING &&
2532 + old_state != CON_SOCK_STATE_CLOSED))
2533 + printk("%s: unexpected old state %d\n", __func__, old_state);
2534 + dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
2535 + CON_SOCK_STATE_CLOSED);
2536 +}
2537
2538 /*
2539 * socket callback functions
2540 */
2541
2542 /* data available on socket, or listen socket received a connect */
2543 -static void ceph_data_ready(struct sock *sk, int count_unused)
2544 +static void ceph_sock_data_ready(struct sock *sk, int count_unused)
2545 {
2546 struct ceph_connection *con = sk->sk_user_data;
2547 + if (atomic_read(&con->msgr->stopping)) {
2548 + return;
2549 + }
2550
2551 if (sk->sk_state != TCP_CLOSE_WAIT) {
2552 - dout("ceph_data_ready on %p state = %lu, queueing work\n",
2553 + dout("%s on %p state = %lu, queueing work\n", __func__,
2554 con, con->state);
2555 queue_con(con);
2556 }
2557 }
2558
2559 /* socket has buffer space for writing */
2560 -static void ceph_write_space(struct sock *sk)
2561 +static void ceph_sock_write_space(struct sock *sk)
2562 {
2563 struct ceph_connection *con = sk->sk_user_data;
2564
2565 /* only queue to workqueue if there is data we want to write,
2566 * and there is sufficient space in the socket buffer to accept
2567 - * more data. clear SOCK_NOSPACE so that ceph_write_space()
2568 + * more data. clear SOCK_NOSPACE so that ceph_sock_write_space()
2569 * doesn't get called again until try_write() fills the socket
2570 * buffer. See net/ipv4/tcp_input.c:tcp_check_space()
2571 * and net/core/stream.c:sk_stream_write_space().
2572 */
2573 - if (test_bit(WRITE_PENDING, &con->state)) {
2574 + if (test_bit(CON_FLAG_WRITE_PENDING, &con->flags)) {
2575 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
2576 - dout("ceph_write_space %p queueing write work\n", con);
2577 + dout("%s %p queueing write work\n", __func__, con);
2578 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2579 queue_con(con);
2580 }
2581 } else {
2582 - dout("ceph_write_space %p nothing to write\n", con);
2583 + dout("%s %p nothing to write\n", __func__, con);
2584 }
2585 }
2586
2587 /* socket's state has changed */
2588 -static void ceph_state_change(struct sock *sk)
2589 +static void ceph_sock_state_change(struct sock *sk)
2590 {
2591 struct ceph_connection *con = sk->sk_user_data;
2592
2593 - dout("ceph_state_change %p state = %lu sk_state = %u\n",
2594 + dout("%s %p state = %lu sk_state = %u\n", __func__,
2595 con, con->state, sk->sk_state);
2596
2597 - if (test_bit(CLOSED, &con->state))
2598 - return;
2599 -
2600 switch (sk->sk_state) {
2601 case TCP_CLOSE:
2602 - dout("ceph_state_change TCP_CLOSE\n");
2603 + dout("%s TCP_CLOSE\n", __func__);
2604 case TCP_CLOSE_WAIT:
2605 - dout("ceph_state_change TCP_CLOSE_WAIT\n");
2606 - if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) {
2607 - if (test_bit(CONNECTING, &con->state))
2608 - con->error_msg = "connection failed";
2609 - else
2610 - con->error_msg = "socket closed";
2611 - queue_con(con);
2612 - }
2613 + dout("%s TCP_CLOSE_WAIT\n", __func__);
2614 + con_sock_state_closing(con);
2615 + set_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
2616 + queue_con(con);
2617 break;
2618 case TCP_ESTABLISHED:
2619 - dout("ceph_state_change TCP_ESTABLISHED\n");
2620 + dout("%s TCP_ESTABLISHED\n", __func__);
2621 + con_sock_state_connected(con);
2622 queue_con(con);
2623 break;
2624 default: /* Everything else is uninteresting */
2625 @@ -228,9 +354,9 @@ static void set_sock_callbacks(struct socket *sock,
2626 {
2627 struct sock *sk = sock->sk;
2628 sk->sk_user_data = con;
2629 - sk->sk_data_ready = ceph_data_ready;
2630 - sk->sk_write_space = ceph_write_space;
2631 - sk->sk_state_change = ceph_state_change;
2632 + sk->sk_data_ready = ceph_sock_data_ready;
2633 + sk->sk_write_space = ceph_sock_write_space;
2634 + sk->sk_state_change = ceph_sock_state_change;
2635 }
2636
2637
2638 @@ -262,6 +388,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
2639
2640 dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
2641
2642 + con_sock_state_connecting(con);
2643 ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
2644 O_NONBLOCK);
2645 if (ret == -EINPROGRESS) {
2646 @@ -277,7 +404,6 @@ static int ceph_tcp_connect(struct ceph_connection *con)
2647 return ret;
2648 }
2649 con->sock = sock;
2650 -
2651 return 0;
2652 }
2653
2654 @@ -333,16 +459,24 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
2655 */
2656 static int con_close_socket(struct ceph_connection *con)
2657 {
2658 - int rc;
2659 + int rc = 0;
2660
2661 dout("con_close_socket on %p sock %p\n", con, con->sock);
2662 - if (!con->sock)
2663 - return 0;
2664 - set_bit(SOCK_CLOSED, &con->state);
2665 - rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
2666 - sock_release(con->sock);
2667 - con->sock = NULL;
2668 - clear_bit(SOCK_CLOSED, &con->state);
2669 + if (con->sock) {
2670 + rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
2671 + sock_release(con->sock);
2672 + con->sock = NULL;
2673 + }
2674 +
2675 + /*
2676 + * Forcibly clear the SOCK_CLOSED flag. It gets set
2677 + * independent of the connection mutex, and we could have
2678 + * received a socket close event before we had the chance to
2679 + * shut the socket down.
2680 + */
2681 + clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
2682 +
2683 + con_sock_state_closed(con);
2684 return rc;
2685 }
2686
2687 @@ -353,6 +487,10 @@ static int con_close_socket(struct ceph_connection *con)
2688 static void ceph_msg_remove(struct ceph_msg *msg)
2689 {
2690 list_del_init(&msg->list_head);
2691 + BUG_ON(msg->con == NULL);
2692 + msg->con->ops->put(msg->con);
2693 + msg->con = NULL;
2694 +
2695 ceph_msg_put(msg);
2696 }
2697 static void ceph_msg_remove_list(struct list_head *head)
2698 @@ -372,8 +510,11 @@ static void reset_connection(struct ceph_connection *con)
2699 ceph_msg_remove_list(&con->out_sent);
2700
2701 if (con->in_msg) {
2702 + BUG_ON(con->in_msg->con != con);
2703 + con->in_msg->con = NULL;
2704 ceph_msg_put(con->in_msg);
2705 con->in_msg = NULL;
2706 + con->ops->put(con);
2707 }
2708
2709 con->connect_seq = 0;
2710 @@ -391,32 +532,44 @@ static void reset_connection(struct ceph_connection *con)
2711 */
2712 void ceph_con_close(struct ceph_connection *con)
2713 {
2714 + mutex_lock(&con->mutex);
2715 dout("con_close %p peer %s\n", con,
2716 ceph_pr_addr(&con->peer_addr.in_addr));
2717 - set_bit(CLOSED, &con->state); /* in case there's queued work */
2718 - clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */
2719 - clear_bit(LOSSYTX, &con->state); /* so we retry next connect */
2720 - clear_bit(KEEPALIVE_PENDING, &con->state);
2721 - clear_bit(WRITE_PENDING, &con->state);
2722 - mutex_lock(&con->mutex);
2723 + con->state = CON_STATE_CLOSED;
2724 +
2725 + clear_bit(CON_FLAG_LOSSYTX, &con->flags); /* so we retry next connect */
2726 + clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
2727 + clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
2728 + clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
2729 + clear_bit(CON_FLAG_BACKOFF, &con->flags);
2730 +
2731 reset_connection(con);
2732 con->peer_global_seq = 0;
2733 cancel_delayed_work(&con->work);
2734 + con_close_socket(con);
2735 mutex_unlock(&con->mutex);
2736 - queue_con(con);
2737 }
2738 EXPORT_SYMBOL(ceph_con_close);
2739
2740 /*
2741 * Reopen a closed connection, with a new peer address.
2742 */
2743 -void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
2744 +void ceph_con_open(struct ceph_connection *con,
2745 + __u8 entity_type, __u64 entity_num,
2746 + struct ceph_entity_addr *addr)
2747 {
2748 + mutex_lock(&con->mutex);
2749 dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
2750 - set_bit(OPENING, &con->state);
2751 - clear_bit(CLOSED, &con->state);
2752 +
2753 + BUG_ON(con->state != CON_STATE_CLOSED);
2754 + con->state = CON_STATE_PREOPEN;
2755 +
2756 + con->peer_name.type = (__u8) entity_type;
2757 + con->peer_name.num = cpu_to_le64(entity_num);
2758 +
2759 memcpy(&con->peer_addr, addr, sizeof(*addr));
2760 con->delay = 0; /* reset backoff memory */
2761 + mutex_unlock(&con->mutex);
2762 queue_con(con);
2763 }
2764 EXPORT_SYMBOL(ceph_con_open);
2765 @@ -430,42 +583,26 @@ bool ceph_con_opened(struct ceph_connection *con)
2766 }
2767
2768 /*
2769 - * generic get/put
2770 - */
2771 -struct ceph_connection *ceph_con_get(struct ceph_connection *con)
2772 -{
2773 - int nref = __atomic_add_unless(&con->nref, 1, 0);
2774 -
2775 - dout("con_get %p nref = %d -> %d\n", con, nref, nref + 1);
2776 -
2777 - return nref ? con : NULL;
2778 -}
2779 -
2780 -void ceph_con_put(struct ceph_connection *con)
2781 -{
2782 - int nref = atomic_dec_return(&con->nref);
2783 -
2784 - BUG_ON(nref < 0);
2785 - if (nref == 0) {
2786 - BUG_ON(con->sock);
2787 - kfree(con);
2788 - }
2789 - dout("con_put %p nref = %d -> %d\n", con, nref + 1, nref);
2790 -}
2791 -
2792 -/*
2793 * initialize a new connection.
2794 */
2795 -void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con)
2796 +void ceph_con_init(struct ceph_connection *con, void *private,
2797 + const struct ceph_connection_operations *ops,
2798 + struct ceph_messenger *msgr)
2799 {
2800 dout("con_init %p\n", con);
2801 memset(con, 0, sizeof(*con));
2802 - atomic_set(&con->nref, 1);
2803 + con->private = private;
2804 + con->ops = ops;
2805 con->msgr = msgr;
2806 +
2807 + con_sock_state_init(con);
2808 +
2809 mutex_init(&con->mutex);
2810 INIT_LIST_HEAD(&con->out_queue);
2811 INIT_LIST_HEAD(&con->out_sent);
2812 INIT_DELAYED_WORK(&con->work, con_work);
2813 +
2814 + con->state = CON_STATE_CLOSED;
2815 }
2816 EXPORT_SYMBOL(ceph_con_init);
2817
2818 @@ -486,14 +623,14 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
2819 return ret;
2820 }
2821
2822 -static void ceph_con_out_kvec_reset(struct ceph_connection *con)
2823 +static void con_out_kvec_reset(struct ceph_connection *con)
2824 {
2825 con->out_kvec_left = 0;
2826 con->out_kvec_bytes = 0;
2827 con->out_kvec_cur = &con->out_kvec[0];
2828 }
2829
2830 -static void ceph_con_out_kvec_add(struct ceph_connection *con,
2831 +static void con_out_kvec_add(struct ceph_connection *con,
2832 size_t size, void *data)
2833 {
2834 int index;
2835 @@ -507,6 +644,53 @@ static void ceph_con_out_kvec_add(struct ceph_connection *con,
2836 con->out_kvec_bytes += size;
2837 }
2838
2839 +#ifdef CONFIG_BLOCK
2840 +static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
2841 +{
2842 + if (!bio) {
2843 + *iter = NULL;
2844 + *seg = 0;
2845 + return;
2846 + }
2847 + *iter = bio;
2848 + *seg = bio->bi_idx;
2849 +}
2850 +
2851 +static void iter_bio_next(struct bio **bio_iter, int *seg)
2852 +{
2853 + if (*bio_iter == NULL)
2854 + return;
2855 +
2856 + BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
2857 +
2858 + (*seg)++;
2859 + if (*seg == (*bio_iter)->bi_vcnt)
2860 + init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
2861 +}
2862 +#endif
2863 +
2864 +static void prepare_write_message_data(struct ceph_connection *con)
2865 +{
2866 + struct ceph_msg *msg = con->out_msg;
2867 +
2868 + BUG_ON(!msg);
2869 + BUG_ON(!msg->hdr.data_len);
2870 +
2871 + /* initialize page iterator */
2872 + con->out_msg_pos.page = 0;
2873 + if (msg->pages)
2874 + con->out_msg_pos.page_pos = msg->page_alignment;
2875 + else
2876 + con->out_msg_pos.page_pos = 0;
2877 +#ifdef CONFIG_BLOCK
2878 + if (msg->bio)
2879 + init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
2880 +#endif
2881 + con->out_msg_pos.data_pos = 0;
2882 + con->out_msg_pos.did_page_crc = false;
2883 + con->out_more = 1; /* data + footer will follow */
2884 +}
2885 +
2886 /*
2887 * Prepare footer for currently outgoing message, and finish things
2888 * off. Assumes out_kvec* are already valid.. we just add on to the end.
2889 @@ -516,6 +700,8 @@ static void prepare_write_message_footer(struct ceph_connection *con)
2890 struct ceph_msg *m = con->out_msg;
2891 int v = con->out_kvec_left;
2892
2893 + m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
2894 +
2895 dout("prepare_write_message_footer %p\n", con);
2896 con->out_kvec_is_msg = true;
2897 con->out_kvec[v].iov_base = &m->footer;
2898 @@ -534,7 +720,7 @@ static void prepare_write_message(struct ceph_connection *con)
2899 struct ceph_msg *m;
2900 u32 crc;
2901
2902 - ceph_con_out_kvec_reset(con);
2903 + con_out_kvec_reset(con);
2904 con->out_kvec_is_msg = true;
2905 con->out_msg_done = false;
2906
2907 @@ -542,14 +728,16 @@ static void prepare_write_message(struct ceph_connection *con)
2908 * TCP packet that's a good thing. */
2909 if (con->in_seq > con->in_seq_acked) {
2910 con->in_seq_acked = con->in_seq;
2911 - ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
2912 + con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
2913 con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
2914 - ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack),
2915 + con_out_kvec_add(con, sizeof (con->out_temp_ack),
2916 &con->out_temp_ack);
2917 }
2918
2919 + BUG_ON(list_empty(&con->out_queue));
2920 m = list_first_entry(&con->out_queue, struct ceph_msg, list_head);
2921 con->out_msg = m;
2922 + BUG_ON(m->con != con);
2923
2924 /* put message on sent list */
2925 ceph_msg_get(m);
2926 @@ -572,18 +760,18 @@ static void prepare_write_message(struct ceph_connection *con)
2927 BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
2928
2929 /* tag + hdr + front + middle */
2930 - ceph_con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
2931 - ceph_con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
2932 - ceph_con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
2933 + con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
2934 + con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
2935 + con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
2936
2937 if (m->middle)
2938 - ceph_con_out_kvec_add(con, m->middle->vec.iov_len,
2939 + con_out_kvec_add(con, m->middle->vec.iov_len,
2940 m->middle->vec.iov_base);
2941
2942 /* fill in crc (except data pages), footer */
2943 crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
2944 con->out_msg->hdr.crc = cpu_to_le32(crc);
2945 - con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE;
2946 + con->out_msg->footer.flags = 0;
2947
2948 crc = crc32c(0, m->front.iov_base, m->front.iov_len);
2949 con->out_msg->footer.front_crc = cpu_to_le32(crc);
2950 @@ -593,28 +781,19 @@ static void prepare_write_message(struct ceph_connection *con)
2951 con->out_msg->footer.middle_crc = cpu_to_le32(crc);
2952 } else
2953 con->out_msg->footer.middle_crc = 0;
2954 - con->out_msg->footer.data_crc = 0;
2955 - dout("prepare_write_message front_crc %u data_crc %u\n",
2956 + dout("%s front_crc %u middle_crc %u\n", __func__,
2957 le32_to_cpu(con->out_msg->footer.front_crc),
2958 le32_to_cpu(con->out_msg->footer.middle_crc));
2959
2960 /* is there a data payload? */
2961 - if (le32_to_cpu(m->hdr.data_len) > 0) {
2962 - /* initialize page iterator */
2963 - con->out_msg_pos.page = 0;
2964 - if (m->pages)
2965 - con->out_msg_pos.page_pos = m->page_alignment;
2966 - else
2967 - con->out_msg_pos.page_pos = 0;
2968 - con->out_msg_pos.data_pos = 0;
2969 - con->out_msg_pos.did_page_crc = false;
2970 - con->out_more = 1; /* data + footer will follow */
2971 - } else {
2972 + con->out_msg->footer.data_crc = 0;
2973 + if (m->hdr.data_len)
2974 + prepare_write_message_data(con);
2975 + else
2976 /* no, queue up footer too and be done */
2977 prepare_write_message_footer(con);
2978 - }
2979
2980 - set_bit(WRITE_PENDING, &con->state);
2981 + set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
2982 }
2983
2984 /*
2985 @@ -626,16 +805,16 @@ static void prepare_write_ack(struct ceph_connection *con)
2986 con->in_seq_acked, con->in_seq);
2987 con->in_seq_acked = con->in_seq;
2988
2989 - ceph_con_out_kvec_reset(con);
2990 + con_out_kvec_reset(con);
2991
2992 - ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
2993 + con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
2994
2995 con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
2996 - ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack),
2997 + con_out_kvec_add(con, sizeof (con->out_temp_ack),
2998 &con->out_temp_ack);
2999
3000 con->out_more = 1; /* more will follow.. eventually.. */
3001 - set_bit(WRITE_PENDING, &con->state);
3002 + set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
3003 }
3004
3005 /*
3006 @@ -644,63 +823,60 @@ static void prepare_write_ack(struct ceph_connection *con)
3007 static void prepare_write_keepalive(struct ceph_connection *con)
3008 {
3009 dout("prepare_write_keepalive %p\n", con);
3010 - ceph_con_out_kvec_reset(con);
3011 - ceph_con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
3012 - set_bit(WRITE_PENDING, &con->state);
3013 + con_out_kvec_reset(con);
3014 + con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
3015 + set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
3016 }
3017
3018 /*
3019 * Connection negotiation.
3020 */
3021
3022 -static int prepare_connect_authorizer(struct ceph_connection *con)
3023 +static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con,
3024 + int *auth_proto)
3025 {
3026 - void *auth_buf;
3027 - int auth_len = 0;
3028 - int auth_protocol = 0;
3029 + struct ceph_auth_handshake *auth;
3030 +
3031 + if (!con->ops->get_authorizer) {
3032 + con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
3033 + con->out_connect.authorizer_len = 0;
3034 + return NULL;
3035 + }
3036
3037 + /* Can't hold the mutex while getting authorizer */
3038 mutex_unlock(&con->mutex);
3039 - if (con->ops->get_authorizer)
3040 - con->ops->get_authorizer(con, &auth_buf, &auth_len,
3041 - &auth_protocol, &con->auth_reply_buf,
3042 - &con->auth_reply_buf_len,
3043 - con->auth_retry);
3044 + auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
3045 mutex_lock(&con->mutex);
3046
3047 - if (test_bit(CLOSED, &con->state) ||
3048 - test_bit(OPENING, &con->state))
3049 - return -EAGAIN;
3050 + if (IS_ERR(auth))
3051 + return auth;
3052 + if (con->state != CON_STATE_NEGOTIATING)
3053 + return ERR_PTR(-EAGAIN);
3054
3055 - con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol);
3056 - con->out_connect.authorizer_len = cpu_to_le32(auth_len);
3057 -
3058 - if (auth_len)
3059 - ceph_con_out_kvec_add(con, auth_len, auth_buf);
3060 -
3061 - return 0;
3062 + con->auth_reply_buf = auth->authorizer_reply_buf;
3063 + con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
3064 + return auth;
3065 }
3066
3067 /*
3068 * We connected to a peer and are saying hello.
3069 */
3070 -static void prepare_write_banner(struct ceph_messenger *msgr,
3071 - struct ceph_connection *con)
3072 +static void prepare_write_banner(struct ceph_connection *con)
3073 {
3074 - ceph_con_out_kvec_reset(con);
3075 - ceph_con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
3076 - ceph_con_out_kvec_add(con, sizeof (msgr->my_enc_addr),
3077 - &msgr->my_enc_addr);
3078 + con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
3079 + con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
3080 + &con->msgr->my_enc_addr);
3081
3082 con->out_more = 0;
3083 - set_bit(WRITE_PENDING, &con->state);
3084 + set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
3085 }
3086
3087 -static int prepare_write_connect(struct ceph_messenger *msgr,
3088 - struct ceph_connection *con,
3089 - int include_banner)
3090 +static int prepare_write_connect(struct ceph_connection *con)
3091 {
3092 unsigned global_seq = get_global_seq(con->msgr, 0);
3093 int proto;
3094 + int auth_proto;
3095 + struct ceph_auth_handshake *auth;
3096
3097 switch (con->peer_name.type) {
3098 case CEPH_ENTITY_TYPE_MON:
3099 @@ -719,23 +895,32 @@ static int prepare_write_connect(struct ceph_messenger *msgr,
3100 dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
3101 con->connect_seq, global_seq, proto);
3102
3103 - con->out_connect.features = cpu_to_le64(msgr->supported_features);
3104 + con->out_connect.features = cpu_to_le64(con->msgr->supported_features);
3105 con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
3106 con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
3107 con->out_connect.global_seq = cpu_to_le32(global_seq);
3108 con->out_connect.protocol_version = cpu_to_le32(proto);
3109 con->out_connect.flags = 0;
3110
3111 - if (include_banner)
3112 - prepare_write_banner(msgr, con);
3113 - else
3114 - ceph_con_out_kvec_reset(con);
3115 - ceph_con_out_kvec_add(con, sizeof (con->out_connect), &con->out_connect);
3116 + auth_proto = CEPH_AUTH_UNKNOWN;
3117 + auth = get_connect_authorizer(con, &auth_proto);
3118 + if (IS_ERR(auth))
3119 + return PTR_ERR(auth);
3120 +
3121 + con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
3122 + con->out_connect.authorizer_len = auth ?
3123 + cpu_to_le32(auth->authorizer_buf_len) : 0;
3124 +
3125 + con_out_kvec_add(con, sizeof (con->out_connect),
3126 + &con->out_connect);
3127 + if (auth && auth->authorizer_buf_len)
3128 + con_out_kvec_add(con, auth->authorizer_buf_len,
3129 + auth->authorizer_buf);
3130
3131 con->out_more = 0;
3132 - set_bit(WRITE_PENDING, &con->state);
3133 + set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
3134
3135 - return prepare_connect_authorizer(con);
3136 + return 0;
3137 }
3138
3139 /*
3140 @@ -781,30 +966,34 @@ out:
3141 return ret; /* done! */
3142 }
3143
3144 -#ifdef CONFIG_BLOCK
3145 -static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
3146 +static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
3147 + size_t len, size_t sent, bool in_trail)
3148 {
3149 - if (!bio) {
3150 - *iter = NULL;
3151 - *seg = 0;
3152 - return;
3153 - }
3154 - *iter = bio;
3155 - *seg = bio->bi_idx;
3156 -}
3157 + struct ceph_msg *msg = con->out_msg;
3158
3159 -static void iter_bio_next(struct bio **bio_iter, int *seg)
3160 -{
3161 - if (*bio_iter == NULL)
3162 - return;
3163 + BUG_ON(!msg);
3164 + BUG_ON(!sent);
3165
3166 - BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
3167 + con->out_msg_pos.data_pos += sent;
3168 + con->out_msg_pos.page_pos += sent;
3169 + if (sent < len)
3170 + return;
3171
3172 - (*seg)++;
3173 - if (*seg == (*bio_iter)->bi_vcnt)
3174 - init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
3175 -}
3176 + BUG_ON(sent != len);
3177 + con->out_msg_pos.page_pos = 0;
3178 + con->out_msg_pos.page++;
3179 + con->out_msg_pos.did_page_crc = false;
3180 + if (in_trail)
3181 + list_move_tail(&page->lru,
3182 + &msg->trail->head);
3183 + else if (msg->pagelist)
3184 + list_move_tail(&page->lru,
3185 + &msg->pagelist->head);
3186 +#ifdef CONFIG_BLOCK
3187 + else if (msg->bio)
3188 + iter_bio_next(&msg->bio_iter, &msg->bio_seg);
3189 #endif
3190 +}
3191
3192 /*
3193 * Write as much message data payload as we can. If we finish, queue
3194 @@ -821,41 +1010,36 @@ static int write_partial_msg_pages(struct ceph_connection *con)
3195 bool do_datacrc = !con->msgr->nocrc;
3196 int ret;
3197 int total_max_write;
3198 - int in_trail = 0;
3199 - size_t trail_len = (msg->trail ? msg->trail->length : 0);
3200 + bool in_trail = false;
3201 + const size_t trail_len = (msg->trail ? msg->trail->length : 0);
3202 + const size_t trail_off = data_len - trail_len;
3203
3204 dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
3205 - con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages,
3206 + con, msg, con->out_msg_pos.page, msg->nr_pages,
3207 con->out_msg_pos.page_pos);
3208
3209 -#ifdef CONFIG_BLOCK
3210 - if (msg->bio && !msg->bio_iter)
3211 - init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
3212 -#endif
3213 -
3214 + /*
3215 + * Iterate through each page that contains data to be
3216 + * written, and send as much as possible for each.
3217 + *
3218 + * If we are calculating the data crc (the default), we will
3219 + * need to map the page. If we have no pages, they have
3220 + * been revoked, so use the zero page.
3221 + */
3222 while (data_len > con->out_msg_pos.data_pos) {
3223 struct page *page = NULL;
3224 int max_write = PAGE_SIZE;
3225 int bio_offset = 0;
3226
3227 - total_max_write = data_len - trail_len -
3228 - con->out_msg_pos.data_pos;
3229 -
3230 - /*
3231 - * if we are calculating the data crc (the default), we need
3232 - * to map the page. if our pages[] has been revoked, use the
3233 - * zero page.
3234 - */
3235 -
3236 - /* have we reached the trail part of the data? */
3237 - if (con->out_msg_pos.data_pos >= data_len - trail_len) {
3238 - in_trail = 1;
3239 + in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off;
3240 + if (!in_trail)
3241 + total_max_write = trail_off - con->out_msg_pos.data_pos;
3242
3243 + if (in_trail) {
3244 total_max_write = data_len - con->out_msg_pos.data_pos;
3245
3246 page = list_first_entry(&msg->trail->head,
3247 struct page, lru);
3248 - max_write = PAGE_SIZE;
3249 } else if (msg->pages) {
3250 page = msg->pages[con->out_msg_pos.page];
3251 } else if (msg->pagelist) {
3252 @@ -878,52 +1062,32 @@ static int write_partial_msg_pages(struct ceph_connection *con)
3253
3254 if (do_datacrc && !con->out_msg_pos.did_page_crc) {
3255 void *base;
3256 - u32 crc;
3257 - u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc);
3258 + u32 crc = le32_to_cpu(msg->footer.data_crc);
3259 char *kaddr;
3260
3261 kaddr = kmap(page);
3262 BUG_ON(kaddr == NULL);
3263 base = kaddr + con->out_msg_pos.page_pos + bio_offset;
3264 - crc = crc32c(tmpcrc, base, len);
3265 - con->out_msg->footer.data_crc = cpu_to_le32(crc);
3266 + crc = crc32c(crc, base, len);
3267 + kunmap(page);
3268 + msg->footer.data_crc = cpu_to_le32(crc);
3269 con->out_msg_pos.did_page_crc = true;
3270 }
3271 ret = ceph_tcp_sendpage(con->sock, page,
3272 con->out_msg_pos.page_pos + bio_offset,
3273 len, 1);
3274 -
3275 - if (do_datacrc)
3276 - kunmap(page);
3277 -
3278 if (ret <= 0)
3279 goto out;
3280
3281 - con->out_msg_pos.data_pos += ret;
3282 - con->out_msg_pos.page_pos += ret;
3283 - if (ret == len) {
3284 - con->out_msg_pos.page_pos = 0;
3285 - con->out_msg_pos.page++;
3286 - con->out_msg_pos.did_page_crc = false;
3287 - if (in_trail)
3288 - list_move_tail(&page->lru,
3289 - &msg->trail->head);
3290 - else if (msg->pagelist)
3291 - list_move_tail(&page->lru,
3292 - &msg->pagelist->head);
3293 -#ifdef CONFIG_BLOCK
3294 - else if (msg->bio)
3295 - iter_bio_next(&msg->bio_iter, &msg->bio_seg);
3296 -#endif
3297 - }
3298 + out_msg_pos_next(con, page, len, (size_t) ret, in_trail);
3299 }
3300
3301 dout("write_partial_msg_pages %p msg %p done\n", con, msg);
3302
3303 /* prepare and queue up footer, too */
3304 if (!do_datacrc)
3305 - con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
3306 - ceph_con_out_kvec_reset(con);
3307 + msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
3308 + con_out_kvec_reset(con);
3309 prepare_write_message_footer(con);
3310 ret = 1;
3311 out:
3312 @@ -992,11 +1156,10 @@ static int prepare_read_message(struct ceph_connection *con)
3313
3314
3315 static int read_partial(struct ceph_connection *con,
3316 - int *to, int size, void *object)
3317 + int end, int size, void *object)
3318 {
3319 - *to += size;
3320 - while (con->in_base_pos < *to) {
3321 - int left = *to - con->in_base_pos;
3322 + while (con->in_base_pos < end) {
3323 + int left = end - con->in_base_pos;
3324 int have = size - left;
3325 int ret = ceph_tcp_recvmsg(con->sock, object + have, left);
3326 if (ret <= 0)
3327 @@ -1012,37 +1175,52 @@ static int read_partial(struct ceph_connection *con,
3328 */
3329 static int read_partial_banner(struct ceph_connection *con)
3330 {
3331 - int ret, to = 0;
3332 + int size;
3333 + int end;
3334 + int ret;
3335
3336 dout("read_partial_banner %p at %d\n", con, con->in_base_pos);
3337
3338 /* peer's banner */
3339 - ret = read_partial(con, &to, strlen(CEPH_BANNER), con->in_banner);
3340 + size = strlen(CEPH_BANNER);
3341 + end = size;
3342 + ret = read_partial(con, end, size, con->in_banner);
3343 if (ret <= 0)
3344 goto out;
3345 - ret = read_partial(con, &to, sizeof(con->actual_peer_addr),
3346 - &con->actual_peer_addr);
3347 +
3348 + size = sizeof (con->actual_peer_addr);
3349 + end += size;
3350 + ret = read_partial(con, end, size, &con->actual_peer_addr);
3351 if (ret <= 0)
3352 goto out;
3353 - ret = read_partial(con, &to, sizeof(con->peer_addr_for_me),
3354 - &con->peer_addr_for_me);
3355 +
3356 + size = sizeof (con->peer_addr_for_me);
3357 + end += size;
3358 + ret = read_partial(con, end, size, &con->peer_addr_for_me);
3359 if (ret <= 0)
3360 goto out;
3361 +
3362 out:
3363 return ret;
3364 }
3365
3366 static int read_partial_connect(struct ceph_connection *con)
3367 {
3368 - int ret, to = 0;
3369 + int size;
3370 + int end;
3371 + int ret;
3372
3373 dout("read_partial_connect %p at %d\n", con, con->in_base_pos);
3374
3375 - ret = read_partial(con, &to, sizeof(con->in_reply), &con->in_reply);
3376 + size = sizeof (con->in_reply);
3377 + end = size;
3378 + ret = read_partial(con, end, size, &con->in_reply);
3379 if (ret <= 0)
3380 goto out;
3381 - ret = read_partial(con, &to, le32_to_cpu(con->in_reply.authorizer_len),
3382 - con->auth_reply_buf);
3383 +
3384 + size = le32_to_cpu(con->in_reply.authorizer_len);
3385 + end += size;
3386 + ret = read_partial(con, end, size, con->auth_reply_buf);
3387 if (ret <= 0)
3388 goto out;
3389
3390 @@ -1321,20 +1499,14 @@ static int process_banner(struct ceph_connection *con)
3391 ceph_pr_addr(&con->msgr->inst.addr.in_addr));
3392 }
3393
3394 - set_bit(NEGOTIATING, &con->state);
3395 - prepare_read_connect(con);
3396 return 0;
3397 }
3398
3399 static void fail_protocol(struct ceph_connection *con)
3400 {
3401 reset_connection(con);
3402 - set_bit(CLOSED, &con->state); /* in case there's queued work */
3403 -
3404 - mutex_unlock(&con->mutex);
3405 - if (con->ops->bad_proto)
3406 - con->ops->bad_proto(con);
3407 - mutex_lock(&con->mutex);
3408 + BUG_ON(con->state != CON_STATE_NEGOTIATING);
3409 + con->state = CON_STATE_CLOSED;
3410 }
3411
3412 static int process_connect(struct ceph_connection *con)
3413 @@ -1377,7 +1549,8 @@ static int process_connect(struct ceph_connection *con)
3414 return -1;
3415 }
3416 con->auth_retry = 1;
3417 - ret = prepare_write_connect(con->msgr, con, 0);
3418 + con_out_kvec_reset(con);
3419 + ret = prepare_write_connect(con);
3420 if (ret < 0)
3421 return ret;
3422 prepare_read_connect(con);
3423 @@ -1392,12 +1565,15 @@ static int process_connect(struct ceph_connection *con)
3424 * dropped messages.
3425 */
3426 dout("process_connect got RESET peer seq %u\n",
3427 - le32_to_cpu(con->in_connect.connect_seq));
3428 + le32_to_cpu(con->in_reply.connect_seq));
3429 pr_err("%s%lld %s connection reset\n",
3430 ENTITY_NAME(con->peer_name),
3431 ceph_pr_addr(&con->peer_addr.in_addr));
3432 reset_connection(con);
3433 - prepare_write_connect(con->msgr, con, 0);
3434 + con_out_kvec_reset(con);
3435 + ret = prepare_write_connect(con);
3436 + if (ret < 0)
3437 + return ret;
3438 prepare_read_connect(con);
3439
3440 /* Tell ceph about it. */
3441 @@ -1406,8 +1582,7 @@ static int process_connect(struct ceph_connection *con)
3442 if (con->ops->peer_reset)
3443 con->ops->peer_reset(con);
3444 mutex_lock(&con->mutex);
3445 - if (test_bit(CLOSED, &con->state) ||
3446 - test_bit(OPENING, &con->state))
3447 + if (con->state != CON_STATE_NEGOTIATING)
3448 return -EAGAIN;
3449 break;
3450
3451 @@ -1416,11 +1591,14 @@ static int process_connect(struct ceph_connection *con)
3452 * If we sent a smaller connect_seq than the peer has, try
3453 * again with a larger value.
3454 */
3455 - dout("process_connect got RETRY my seq = %u, peer_seq = %u\n",
3456 + dout("process_connect got RETRY_SESSION my seq %u, peer %u\n",
3457 le32_to_cpu(con->out_connect.connect_seq),
3458 - le32_to_cpu(con->in_connect.connect_seq));
3459 - con->connect_seq = le32_to_cpu(con->in_connect.connect_seq);
3460 - prepare_write_connect(con->msgr, con, 0);
3461 + le32_to_cpu(con->in_reply.connect_seq));
3462 + con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
3463 + con_out_kvec_reset(con);
3464 + ret = prepare_write_connect(con);
3465 + if (ret < 0)
3466 + return ret;
3467 prepare_read_connect(con);
3468 break;
3469
3470 @@ -1431,10 +1609,13 @@ static int process_connect(struct ceph_connection *con)
3471 */
3472 dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n",
3473 con->peer_global_seq,
3474 - le32_to_cpu(con->in_connect.global_seq));
3475 + le32_to_cpu(con->in_reply.global_seq));
3476 get_global_seq(con->msgr,
3477 - le32_to_cpu(con->in_connect.global_seq));
3478 - prepare_write_connect(con->msgr, con, 0);
3479 + le32_to_cpu(con->in_reply.global_seq));
3480 + con_out_kvec_reset(con);
3481 + ret = prepare_write_connect(con);
3482 + if (ret < 0)
3483 + return ret;
3484 prepare_read_connect(con);
3485 break;
3486
3487 @@ -1449,7 +1630,10 @@ static int process_connect(struct ceph_connection *con)
3488 fail_protocol(con);
3489 return -1;
3490 }
3491 - clear_bit(CONNECTING, &con->state);
3492 +
3493 + BUG_ON(con->state != CON_STATE_NEGOTIATING);
3494 + con->state = CON_STATE_OPEN;
3495 +
3496 con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
3497 con->connect_seq++;
3498 con->peer_features = server_feat;
3499 @@ -1461,7 +1645,9 @@ static int process_connect(struct ceph_connection *con)
3500 le32_to_cpu(con->in_reply.connect_seq));
3501
3502 if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
3503 - set_bit(LOSSYTX, &con->state);
3504 + set_bit(CON_FLAG_LOSSYTX, &con->flags);
3505 +
3506 + con->delay = 0; /* reset backoff memory */
3507
3508 prepare_read_tag(con);
3509 break;
3510 @@ -1491,10 +1677,10 @@ static int process_connect(struct ceph_connection *con)
3511 */
3512 static int read_partial_ack(struct ceph_connection *con)
3513 {
3514 - int to = 0;
3515 + int size = sizeof (con->in_temp_ack);
3516 + int end = size;
3517
3518 - return read_partial(con, &to, sizeof(con->in_temp_ack),
3519 - &con->in_temp_ack);
3520 + return read_partial(con, end, size, &con->in_temp_ack);
3521 }
3522
3523
3524 @@ -1547,10 +1733,7 @@ static int read_partial_message_section(struct ceph_connection *con,
3525 return 1;
3526 }
3527
3528 -static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
3529 - struct ceph_msg_header *hdr,
3530 - int *skip);
3531 -
3532 +static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
3533
3534 static int read_partial_message_pages(struct ceph_connection *con,
3535 struct page **pages,
3536 @@ -1593,9 +1776,6 @@ static int read_partial_message_bio(struct ceph_connection *con,
3537 void *p;
3538 int ret, left;
3539
3540 - if (IS_ERR(bv))
3541 - return PTR_ERR(bv);
3542 -
3543 left = min((int)(data_len - con->in_msg_pos.data_pos),
3544 (int)(bv->bv_len - con->in_msg_pos.page_pos));
3545
3546 @@ -1627,26 +1807,22 @@ static int read_partial_message_bio(struct ceph_connection *con,
3547 static int read_partial_message(struct ceph_connection *con)
3548 {
3549 struct ceph_msg *m = con->in_msg;
3550 + int size;
3551 + int end;
3552 int ret;
3553 - int to, left;
3554 unsigned front_len, middle_len, data_len;
3555 bool do_datacrc = !con->msgr->nocrc;
3556 - int skip;
3557 u64 seq;
3558 u32 crc;
3559
3560 dout("read_partial_message con %p msg %p\n", con, m);
3561
3562 /* header */
3563 - while (con->in_base_pos < sizeof(con->in_hdr)) {
3564 - left = sizeof(con->in_hdr) - con->in_base_pos;
3565 - ret = ceph_tcp_recvmsg(con->sock,
3566 - (char *)&con->in_hdr + con->in_base_pos,
3567 - left);
3568 - if (ret <= 0)
3569 - return ret;
3570 - con->in_base_pos += ret;
3571 - }
3572 + size = sizeof (con->in_hdr);
3573 + end = size;
3574 + ret = read_partial(con, end, size, &con->in_hdr);
3575 + if (ret <= 0)
3576 + return ret;
3577
3578 crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc));
3579 if (cpu_to_le32(crc) != con->in_hdr.crc) {
3580 @@ -1686,10 +1862,13 @@ static int read_partial_message(struct ceph_connection *con)
3581
3582 /* allocate message? */
3583 if (!con->in_msg) {
3584 + int skip = 0;
3585 +
3586 dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
3587 con->in_hdr.front_len, con->in_hdr.data_len);
3588 - skip = 0;
3589 - con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
3590 + ret = ceph_con_in_msg_alloc(con, &skip);
3591 + if (ret < 0)
3592 + return ret;
3593 if (skip) {
3594 /* skip this message */
3595 dout("alloc_msg said skip message\n");
3596 @@ -1700,11 +1879,9 @@ static int read_partial_message(struct ceph_connection *con)
3597 con->in_seq++;
3598 return 0;
3599 }
3600 - if (!con->in_msg) {
3601 - con->error_msg =
3602 - "error allocating memory for incoming message";
3603 - return -ENOMEM;
3604 - }
3605 +
3606 + BUG_ON(!con->in_msg);
3607 + BUG_ON(con->in_msg->con != con);
3608 m = con->in_msg;
3609 m->front.iov_len = 0; /* haven't read it yet */
3610 if (m->middle)
3611 @@ -1716,6 +1893,11 @@ static int read_partial_message(struct ceph_connection *con)
3612 else
3613 con->in_msg_pos.page_pos = 0;
3614 con->in_msg_pos.data_pos = 0;
3615 +
3616 +#ifdef CONFIG_BLOCK
3617 + if (m->bio)
3618 + init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
3619 +#endif
3620 }
3621
3622 /* front */
3623 @@ -1732,10 +1914,6 @@ static int read_partial_message(struct ceph_connection *con)
3624 if (ret <= 0)
3625 return ret;
3626 }
3627 -#ifdef CONFIG_BLOCK
3628 - if (m->bio && !m->bio_iter)
3629 - init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
3630 -#endif
3631
3632 /* (page) data */
3633 while (con->in_msg_pos.data_pos < data_len) {
3634 @@ -1746,7 +1924,7 @@ static int read_partial_message(struct ceph_connection *con)
3635 return ret;
3636 #ifdef CONFIG_BLOCK
3637 } else if (m->bio) {
3638 -
3639 + BUG_ON(!m->bio_iter);
3640 ret = read_partial_message_bio(con,
3641 &m->bio_iter, &m->bio_seg,
3642 data_len, do_datacrc);
3643 @@ -1759,16 +1937,12 @@ static int read_partial_message(struct ceph_connection *con)
3644 }
3645
3646 /* footer */
3647 - to = sizeof(m->hdr) + sizeof(m->footer);
3648 - while (con->in_base_pos < to) {
3649 - left = to - con->in_base_pos;
3650 - ret = ceph_tcp_recvmsg(con->sock, (char *)&m->footer +
3651 - (con->in_base_pos - sizeof(m->hdr)),
3652 - left);
3653 - if (ret <= 0)
3654 - return ret;
3655 - con->in_base_pos += ret;
3656 - }
3657 + size = sizeof (m->footer);
3658 + end += size;
3659 + ret = read_partial(con, end, size, &m->footer);
3660 + if (ret <= 0)
3661 + return ret;
3662 +
3663 dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
3664 m, front_len, m->footer.front_crc, middle_len,
3665 m->footer.middle_crc, data_len, m->footer.data_crc);
3666 @@ -1804,8 +1978,11 @@ static void process_message(struct ceph_connection *con)
3667 {
3668 struct ceph_msg *msg;
3669
3670 + BUG_ON(con->in_msg->con != con);
3671 + con->in_msg->con = NULL;
3672 msg = con->in_msg;
3673 con->in_msg = NULL;
3674 + con->ops->put(con);
3675
3676 /* if first message, set peer_name */
3677 if (con->peer_name.type == 0)
3678 @@ -1825,7 +2002,6 @@ static void process_message(struct ceph_connection *con)
3679 con->ops->dispatch(con, msg);
3680
3681 mutex_lock(&con->mutex);
3682 - prepare_read_tag(con);
3683 }
3684
3685
3686 @@ -1835,21 +2011,21 @@ static void process_message(struct ceph_connection *con)
3687 */
3688 static int try_write(struct ceph_connection *con)
3689 {
3690 - struct ceph_messenger *msgr = con->msgr;
3691 int ret = 1;
3692
3693 - dout("try_write start %p state %lu nref %d\n", con, con->state,
3694 - atomic_read(&con->nref));
3695 + dout("try_write start %p state %lu\n", con, con->state);
3696
3697 more:
3698 dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
3699
3700 /* open the socket first? */
3701 - if (con->sock == NULL) {
3702 - prepare_write_connect(msgr, con, 1);
3703 + if (con->state == CON_STATE_PREOPEN) {
3704 + BUG_ON(con->sock);
3705 + con->state = CON_STATE_CONNECTING;
3706 +
3707 + con_out_kvec_reset(con);
3708 + prepare_write_banner(con);
3709 prepare_read_banner(con);
3710 - set_bit(CONNECTING, &con->state);
3711 - clear_bit(NEGOTIATING, &con->state);
3712
3713 BUG_ON(con->in_msg);
3714 con->in_tag = CEPH_MSGR_TAG_READY;
3715 @@ -1896,7 +2072,7 @@ more_kvec:
3716 }
3717
3718 do_next:
3719 - if (!test_bit(CONNECTING, &con->state)) {
3720 + if (con->state == CON_STATE_OPEN) {
3721 /* is anything else pending? */
3722 if (!list_empty(&con->out_queue)) {
3723 prepare_write_message(con);
3724 @@ -1906,14 +2082,15 @@ do_next:
3725 prepare_write_ack(con);
3726 goto more;
3727 }
3728 - if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) {
3729 + if (test_and_clear_bit(CON_FLAG_KEEPALIVE_PENDING,
3730 + &con->flags)) {
3731 prepare_write_keepalive(con);
3732 goto more;
3733 }
3734 }
3735
3736 /* Nothing to do! */
3737 - clear_bit(WRITE_PENDING, &con->state);
3738 + clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
3739 dout("try_write nothing else to write.\n");
3740 ret = 0;
3741 out:
3742 @@ -1930,38 +2107,46 @@ static int try_read(struct ceph_connection *con)
3743 {
3744 int ret = -1;
3745
3746 - if (!con->sock)
3747 - return 0;
3748 -
3749 - if (test_bit(STANDBY, &con->state))
3750 +more:
3751 + dout("try_read start on %p state %lu\n", con, con->state);
3752 + if (con->state != CON_STATE_CONNECTING &&
3753 + con->state != CON_STATE_NEGOTIATING &&
3754 + con->state != CON_STATE_OPEN)
3755 return 0;
3756
3757 - dout("try_read start on %p\n", con);
3758 + BUG_ON(!con->sock);
3759
3760 -more:
3761 dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
3762 con->in_base_pos);
3763
3764 - /*
3765 - * process_connect and process_message drop and re-take
3766 - * con->mutex. make sure we handle a racing close or reopen.
3767 - */
3768 - if (test_bit(CLOSED, &con->state) ||
3769 - test_bit(OPENING, &con->state)) {
3770 - ret = -EAGAIN;
3771 + if (con->state == CON_STATE_CONNECTING) {
3772 + dout("try_read connecting\n");
3773 + ret = read_partial_banner(con);
3774 + if (ret <= 0)
3775 + goto out;
3776 + ret = process_banner(con);
3777 + if (ret < 0)
3778 + goto out;
3779 +
3780 + BUG_ON(con->state != CON_STATE_CONNECTING);
3781 + con->state = CON_STATE_NEGOTIATING;
3782 +
3783 + /*
3784 + * Received banner is good, exchange connection info.
3785 + * Do not reset out_kvec, as sending our banner raced
3786 + * with receiving peer banner after connect completed.
3787 + */
3788 + ret = prepare_write_connect(con);
3789 + if (ret < 0)
3790 + goto out;
3791 + prepare_read_connect(con);
3792 +
3793 + /* Send connection info before awaiting response */
3794 goto out;
3795 }
3796
3797 - if (test_bit(CONNECTING, &con->state)) {
3798 - if (!test_bit(NEGOTIATING, &con->state)) {
3799 - dout("try_read connecting\n");
3800 - ret = read_partial_banner(con);
3801 - if (ret <= 0)
3802 - goto out;
3803 - ret = process_banner(con);
3804 - if (ret < 0)
3805 - goto out;
3806 - }
3807 + if (con->state == CON_STATE_NEGOTIATING) {
3808 + dout("try_read negotiating\n");
3809 ret = read_partial_connect(con);
3810 if (ret <= 0)
3811 goto out;
3812 @@ -1971,6 +2156,8 @@ more:
3813 goto more;
3814 }
3815
3816 + BUG_ON(con->state != CON_STATE_OPEN);
3817 +
3818 if (con->in_base_pos < 0) {
3819 /*
3820 * skipping + discarding content.
3821 @@ -2004,7 +2191,8 @@ more:
3822 prepare_read_ack(con);
3823 break;
3824 case CEPH_MSGR_TAG_CLOSE:
3825 - set_bit(CLOSED, &con->state); /* fixme */
3826 + con_close_socket(con);
3827 + con->state = CON_STATE_CLOSED;
3828 goto out;
3829 default:
3830 goto bad_tag;
3831 @@ -2027,6 +2215,8 @@ more:
3832 if (con->in_tag == CEPH_MSGR_TAG_READY)
3833 goto more;
3834 process_message(con);
3835 + if (con->state == CON_STATE_OPEN)
3836 + prepare_read_tag(con);
3837 goto more;
3838 }
3839 if (con->in_tag == CEPH_MSGR_TAG_ACK) {
3840 @@ -2055,12 +2245,6 @@ bad_tag:
3841 */
3842 static void queue_con(struct ceph_connection *con)
3843 {
3844 - if (test_bit(DEAD, &con->state)) {
3845 - dout("queue_con %p ignoring: DEAD\n",
3846 - con);
3847 - return;
3848 - }
3849 -
3850 if (!con->ops->get(con)) {
3851 dout("queue_con %p ref count 0\n", con);
3852 return;
3853 @@ -2085,7 +2269,26 @@ static void con_work(struct work_struct *work)
3854
3855 mutex_lock(&con->mutex);
3856 restart:
3857 - if (test_and_clear_bit(BACKOFF, &con->state)) {
3858 + if (test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) {
3859 + switch (con->state) {
3860 + case CON_STATE_CONNECTING:
3861 + con->error_msg = "connection failed";
3862 + break;
3863 + case CON_STATE_NEGOTIATING:
3864 + con->error_msg = "negotiation failed";
3865 + break;
3866 + case CON_STATE_OPEN:
3867 + con->error_msg = "socket closed";
3868 + break;
3869 + default:
3870 + dout("unrecognized con state %d\n", (int)con->state);
3871 + con->error_msg = "unrecognized con state";
3872 + BUG();
3873 + }
3874 + goto fault;
3875 + }
3876 +
3877 + if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) {
3878 dout("con_work %p backing off\n", con);
3879 if (queue_delayed_work(ceph_msgr_wq, &con->work,
3880 round_jiffies_relative(con->delay))) {
3881 @@ -2093,41 +2296,42 @@ restart:
3882 mutex_unlock(&con->mutex);
3883 return;
3884 } else {
3885 - con->ops->put(con);
3886 dout("con_work %p FAILED to back off %lu\n", con,
3887 con->delay);
3888 + set_bit(CON_FLAG_BACKOFF, &con->flags);
3889 }
3890 + goto done;
3891 }
3892
3893 - if (test_bit(STANDBY, &con->state)) {
3894 + if (con->state == CON_STATE_STANDBY) {
3895 dout("con_work %p STANDBY\n", con);
3896 goto done;
3897 }
3898 - if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
3899 - dout("con_work CLOSED\n");
3900 - con_close_socket(con);
3901 + if (con->state == CON_STATE_CLOSED) {
3902 + dout("con_work %p CLOSED\n", con);
3903 + BUG_ON(con->sock);
3904 goto done;
3905 }
3906 - if (test_and_clear_bit(OPENING, &con->state)) {
3907 - /* reopen w/ new peer */
3908 + if (con->state == CON_STATE_PREOPEN) {
3909 dout("con_work OPENING\n");
3910 - con_close_socket(con);
3911 + BUG_ON(con->sock);
3912 }
3913
3914 - if (test_and_clear_bit(SOCK_CLOSED, &con->state))
3915 - goto fault;
3916 -
3917 ret = try_read(con);
3918 if (ret == -EAGAIN)
3919 goto restart;
3920 - if (ret < 0)
3921 + if (ret < 0) {
3922 + con->error_msg = "socket error on read";
3923 goto fault;
3924 + }
3925
3926 ret = try_write(con);
3927 if (ret == -EAGAIN)
3928 goto restart;
3929 - if (ret < 0)
3930 + if (ret < 0) {
3931 + con->error_msg = "socket error on write";
3932 goto fault;
3933 + }
3934
3935 done:
3936 mutex_unlock(&con->mutex);
3937 @@ -2136,7 +2340,6 @@ done_unlocked:
3938 return;
3939
3940 fault:
3941 - mutex_unlock(&con->mutex);
3942 ceph_fault(con); /* error/fault path */
3943 goto done_unlocked;
3944 }
3945 @@ -2147,26 +2350,31 @@ fault:
3946 * exponential backoff
3947 */
3948 static void ceph_fault(struct ceph_connection *con)
3949 + __releases(con->mutex)
3950 {
3951 pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
3952 ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
3953 dout("fault %p state %lu to peer %s\n",
3954 con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
3955
3956 - if (test_bit(LOSSYTX, &con->state)) {
3957 - dout("fault on LOSSYTX channel\n");
3958 - goto out;
3959 - }
3960 -
3961 - mutex_lock(&con->mutex);
3962 - if (test_bit(CLOSED, &con->state))
3963 - goto out_unlock;
3964 + BUG_ON(con->state != CON_STATE_CONNECTING &&
3965 + con->state != CON_STATE_NEGOTIATING &&
3966 + con->state != CON_STATE_OPEN);
3967
3968 con_close_socket(con);
3969
3970 + if (test_bit(CON_FLAG_LOSSYTX, &con->flags)) {
3971 + dout("fault on LOSSYTX channel, marking CLOSED\n");
3972 + con->state = CON_STATE_CLOSED;
3973 + goto out_unlock;
3974 + }
3975 +
3976 if (con->in_msg) {
3977 + BUG_ON(con->in_msg->con != con);
3978 + con->in_msg->con = NULL;
3979 ceph_msg_put(con->in_msg);
3980 con->in_msg = NULL;
3981 + con->ops->put(con);
3982 }
3983
3984 /* Requeue anything that hasn't been acked */
3985 @@ -2175,12 +2383,13 @@ static void ceph_fault(struct ceph_connection *con)
3986 /* If there are no messages queued or keepalive pending, place
3987 * the connection in a STANDBY state */
3988 if (list_empty(&con->out_queue) &&
3989 - !test_bit(KEEPALIVE_PENDING, &con->state)) {
3990 + !test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)) {
3991 dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
3992 - clear_bit(WRITE_PENDING, &con->state);
3993 - set_bit(STANDBY, &con->state);
3994 + clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
3995 + con->state = CON_STATE_STANDBY;
3996 } else {
3997 /* retry after a delay. */
3998 + con->state = CON_STATE_PREOPEN;
3999 if (con->delay == 0)
4000 con->delay = BASE_DELAY_INTERVAL;
4001 else if (con->delay < MAX_DELAY_INTERVAL)
4002 @@ -2201,13 +2410,12 @@ static void ceph_fault(struct ceph_connection *con)
4003 * that when con_work restarts we schedule the
4004 * delay then.
4005 */
4006 - set_bit(BACKOFF, &con->state);
4007 + set_bit(CON_FLAG_BACKOFF, &con->flags);
4008 }
4009 }
4010
4011 out_unlock:
4012 mutex_unlock(&con->mutex);
4013 -out:
4014 /*
4015 * in case we faulted due to authentication, invalidate our
4016 * current tickets so that we can get new ones.
4017 @@ -2224,18 +2432,14 @@ out:
4018
4019
4020 /*
4021 - * create a new messenger instance
4022 + * initialize a new messenger instance
4023 */
4024 -struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
4025 - u32 supported_features,
4026 - u32 required_features)
4027 +void ceph_messenger_init(struct ceph_messenger *msgr,
4028 + struct ceph_entity_addr *myaddr,
4029 + u32 supported_features,
4030 + u32 required_features,
4031 + bool nocrc)
4032 {
4033 - struct ceph_messenger *msgr;
4034 -
4035 - msgr = kzalloc(sizeof(*msgr), GFP_KERNEL);
4036 - if (msgr == NULL)
4037 - return ERR_PTR(-ENOMEM);
4038 -
4039 msgr->supported_features = supported_features;
4040 msgr->required_features = required_features;
4041
4042 @@ -2248,30 +2452,23 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
4043 msgr->inst.addr.type = 0;
4044 get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
4045 encode_my_addr(msgr);
4046 + msgr->nocrc = nocrc;
4047
4048 - dout("messenger_create %p\n", msgr);
4049 - return msgr;
4050 -}
4051 -EXPORT_SYMBOL(ceph_messenger_create);
4052 + atomic_set(&msgr->stopping, 0);
4053
4054 -void ceph_messenger_destroy(struct ceph_messenger *msgr)
4055 -{
4056 - dout("destroy %p\n", msgr);
4057 - kfree(msgr);
4058 - dout("destroyed messenger %p\n", msgr);
4059 + dout("%s %p\n", __func__, msgr);
4060 }
4061 -EXPORT_SYMBOL(ceph_messenger_destroy);
4062 +EXPORT_SYMBOL(ceph_messenger_init);
4063
4064 static void clear_standby(struct ceph_connection *con)
4065 {
4066 /* come back from STANDBY? */
4067 - if (test_and_clear_bit(STANDBY, &con->state)) {
4068 - mutex_lock(&con->mutex);
4069 + if (con->state == CON_STATE_STANDBY) {
4070 dout("clear_standby %p and ++connect_seq\n", con);
4071 + con->state = CON_STATE_PREOPEN;
4072 con->connect_seq++;
4073 - WARN_ON(test_bit(WRITE_PENDING, &con->state));
4074 - WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state));
4075 - mutex_unlock(&con->mutex);
4076 + WARN_ON(test_bit(CON_FLAG_WRITE_PENDING, &con->flags));
4077 + WARN_ON(test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags));
4078 }
4079 }
4080
4081 @@ -2280,21 +2477,24 @@ static void clear_standby(struct ceph_connection *con)
4082 */
4083 void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
4084 {
4085 - if (test_bit(CLOSED, &con->state)) {
4086 - dout("con_send %p closed, dropping %p\n", con, msg);
4087 - ceph_msg_put(msg);
4088 - return;
4089 - }
4090 -
4091 /* set src+dst */
4092 msg->hdr.src = con->msgr->inst.name;
4093 -
4094 BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
4095 -
4096 msg->needs_out_seq = true;
4097
4098 - /* queue */
4099 mutex_lock(&con->mutex);
4100 +
4101 + if (con->state == CON_STATE_CLOSED) {
4102 + dout("con_send %p closed, dropping %p\n", con, msg);
4103 + ceph_msg_put(msg);
4104 + mutex_unlock(&con->mutex);
4105 + return;
4106 + }
4107 +
4108 + BUG_ON(msg->con != NULL);
4109 + msg->con = con->ops->get(con);
4110 + BUG_ON(msg->con == NULL);
4111 +
4112 BUG_ON(!list_empty(&msg->list_head));
4113 list_add_tail(&msg->list_head, &con->out_queue);
4114 dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg,
4115 @@ -2303,12 +2503,13 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
4116 le32_to_cpu(msg->hdr.front_len),
4117 le32_to_cpu(msg->hdr.middle_len),
4118 le32_to_cpu(msg->hdr.data_len));
4119 +
4120 + clear_standby(con);
4121 mutex_unlock(&con->mutex);
4122
4123 /* if there wasn't anything waiting to send before, queue
4124 * new work */
4125 - clear_standby(con);
4126 - if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
4127 + if (test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
4128 queue_con(con);
4129 }
4130 EXPORT_SYMBOL(ceph_con_send);
4131 @@ -2316,24 +2517,34 @@ EXPORT_SYMBOL(ceph_con_send);
4132 /*
4133 * Revoke a message that was previously queued for send
4134 */
4135 -void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
4136 +void ceph_msg_revoke(struct ceph_msg *msg)
4137 {
4138 + struct ceph_connection *con = msg->con;
4139 +
4140 + if (!con)
4141 + return; /* Message not in our possession */
4142 +
4143 mutex_lock(&con->mutex);
4144 if (!list_empty(&msg->list_head)) {
4145 - dout("con_revoke %p msg %p - was on queue\n", con, msg);
4146 + dout("%s %p msg %p - was on queue\n", __func__, con, msg);
4147 list_del_init(&msg->list_head);
4148 - ceph_msg_put(msg);
4149 + BUG_ON(msg->con == NULL);
4150 + msg->con->ops->put(msg->con);
4151 + msg->con = NULL;
4152 msg->hdr.seq = 0;
4153 +
4154 + ceph_msg_put(msg);
4155 }
4156 if (con->out_msg == msg) {
4157 - dout("con_revoke %p msg %p - was sending\n", con, msg);
4158 + dout("%s %p msg %p - was sending\n", __func__, con, msg);
4159 con->out_msg = NULL;
4160 if (con->out_kvec_is_msg) {
4161 con->out_skip = con->out_kvec_bytes;
4162 con->out_kvec_is_msg = false;
4163 }
4164 - ceph_msg_put(msg);
4165 msg->hdr.seq = 0;
4166 +
4167 + ceph_msg_put(msg);
4168 }
4169 mutex_unlock(&con->mutex);
4170 }
4171 @@ -2341,17 +2552,27 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
4172 /*
4173 * Revoke a message that we may be reading data into
4174 */
4175 -void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
4176 +void ceph_msg_revoke_incoming(struct ceph_msg *msg)
4177 {
4178 + struct ceph_connection *con;
4179 +
4180 + BUG_ON(msg == NULL);
4181 + if (!msg->con) {
4182 + dout("%s msg %p null con\n", __func__, msg);
4183 +
4184 + return; /* Message not in our possession */
4185 + }
4186 +
4187 + con = msg->con;
4188 mutex_lock(&con->mutex);
4189 - if (con->in_msg && con->in_msg == msg) {
4190 + if (con->in_msg == msg) {
4191 unsigned front_len = le32_to_cpu(con->in_hdr.front_len);
4192 unsigned middle_len = le32_to_cpu(con->in_hdr.middle_len);
4193 unsigned data_len = le32_to_cpu(con->in_hdr.data_len);
4194
4195 /* skip rest of message */
4196 - dout("con_revoke_pages %p msg %p revoked\n", con, msg);
4197 - con->in_base_pos = con->in_base_pos -
4198 + dout("%s %p msg %p revoked\n", __func__, con, msg);
4199 + con->in_base_pos = con->in_base_pos -
4200 sizeof(struct ceph_msg_header) -
4201 front_len -
4202 middle_len -
4203 @@ -2362,8 +2583,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
4204 con->in_tag = CEPH_MSGR_TAG_READY;
4205 con->in_seq++;
4206 } else {
4207 - dout("con_revoke_pages %p msg %p pages %p no-op\n",
4208 - con, con->in_msg, msg);
4209 + dout("%s %p in_msg %p msg %p no-op\n",
4210 + __func__, con, con->in_msg, msg);
4211 }
4212 mutex_unlock(&con->mutex);
4213 }
4214 @@ -2374,9 +2595,11 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
4215 void ceph_con_keepalive(struct ceph_connection *con)
4216 {
4217 dout("con_keepalive %p\n", con);
4218 + mutex_lock(&con->mutex);
4219 clear_standby(con);
4220 - if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
4221 - test_and_set_bit(WRITE_PENDING, &con->state) == 0)
4222 + mutex_unlock(&con->mutex);
4223 + if (test_and_set_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags) == 0 &&
4224 + test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
4225 queue_con(con);
4226 }
4227 EXPORT_SYMBOL(ceph_con_keepalive);
4228 @@ -2395,6 +2618,8 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
4229 if (m == NULL)
4230 goto out;
4231 kref_init(&m->kref);
4232 +
4233 + m->con = NULL;
4234 INIT_LIST_HEAD(&m->list_head);
4235
4236 m->hdr.tid = 0;
4237 @@ -2490,46 +2715,78 @@ static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg)
4238 }
4239
4240 /*
4241 - * Generic message allocator, for incoming messages.
4242 + * Allocate a message for receiving an incoming message on a
4243 + * connection, and save the result in con->in_msg. Uses the
4244 + * connection's private alloc_msg op if available.
4245 + *
4246 + * Returns 0 on success, or a negative error code.
4247 + *
4248 + * On success, if we set *skip = 1:
4249 + * - the next message should be skipped and ignored.
4250 + * - con->in_msg == NULL
4251 + * or if we set *skip = 0:
4252 + * - con->in_msg is non-null.
4253 + * On error (ENOMEM, EAGAIN, ...),
4254 + * - con->in_msg == NULL
4255 */
4256 -static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
4257 - struct ceph_msg_header *hdr,
4258 - int *skip)
4259 +static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
4260 {
4261 + struct ceph_msg_header *hdr = &con->in_hdr;
4262 int type = le16_to_cpu(hdr->type);
4263 int front_len = le32_to_cpu(hdr->front_len);
4264 int middle_len = le32_to_cpu(hdr->middle_len);
4265 - struct ceph_msg *msg = NULL;
4266 - int ret;
4267 + int ret = 0;
4268 +
4269 + BUG_ON(con->in_msg != NULL);
4270
4271 if (con->ops->alloc_msg) {
4272 + struct ceph_msg *msg;
4273 +
4274 mutex_unlock(&con->mutex);
4275 msg = con->ops->alloc_msg(con, hdr, skip);
4276 mutex_lock(&con->mutex);
4277 - if (!msg || *skip)
4278 - return NULL;
4279 + if (con->state != CON_STATE_OPEN) {
4280 + if (msg)
4281 + ceph_msg_put(msg);
4282 + return -EAGAIN;
4283 + }
4284 + con->in_msg = msg;
4285 + if (con->in_msg) {
4286 + con->in_msg->con = con->ops->get(con);
4287 + BUG_ON(con->in_msg->con == NULL);
4288 + }
4289 + if (*skip) {
4290 + con->in_msg = NULL;
4291 + return 0;
4292 + }
4293 + if (!con->in_msg) {
4294 + con->error_msg =
4295 + "error allocating memory for incoming message";
4296 + return -ENOMEM;
4297 + }
4298 }
4299 - if (!msg) {
4300 - *skip = 0;
4301 - msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
4302 - if (!msg) {
4303 + if (!con->in_msg) {
4304 + con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
4305 + if (!con->in_msg) {
4306 pr_err("unable to allocate msg type %d len %d\n",
4307 type, front_len);
4308 - return NULL;
4309 + return -ENOMEM;
4310 }
4311 - msg->page_alignment = le16_to_cpu(hdr->data_off);
4312 + con->in_msg->con = con->ops->get(con);
4313 + BUG_ON(con->in_msg->con == NULL);
4314 + con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
4315 }
4316 - memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
4317 + memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
4318
4319 - if (middle_len && !msg->middle) {
4320 - ret = ceph_alloc_middle(con, msg);
4321 + if (middle_len && !con->in_msg->middle) {
4322 + ret = ceph_alloc_middle(con, con->in_msg);
4323 if (ret < 0) {
4324 - ceph_msg_put(msg);
4325 - return NULL;
4326 + ceph_msg_put(con->in_msg);
4327 + con->in_msg = NULL;
4328 }
4329 }
4330
4331 - return msg;
4332 + return ret;
4333 }
4334
4335
4336 diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
4337 index 1845cde..89a6409 100644
4338 --- a/net/ceph/mon_client.c
4339 +++ b/net/ceph/mon_client.c
4340 @@ -106,9 +106,9 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
4341 monc->pending_auth = 1;
4342 monc->m_auth->front.iov_len = len;
4343 monc->m_auth->hdr.front_len = cpu_to_le32(len);
4344 - ceph_con_revoke(monc->con, monc->m_auth);
4345 + ceph_msg_revoke(monc->m_auth);
4346 ceph_msg_get(monc->m_auth); /* keep our ref */
4347 - ceph_con_send(monc->con, monc->m_auth);
4348 + ceph_con_send(&monc->con, monc->m_auth);
4349 }
4350
4351 /*
4352 @@ -117,8 +117,11 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
4353 static void __close_session(struct ceph_mon_client *monc)
4354 {
4355 dout("__close_session closing mon%d\n", monc->cur_mon);
4356 - ceph_con_revoke(monc->con, monc->m_auth);
4357 - ceph_con_close(monc->con);
4358 + ceph_msg_revoke(monc->m_auth);
4359 + ceph_msg_revoke_incoming(monc->m_auth_reply);
4360 + ceph_msg_revoke(monc->m_subscribe);
4361 + ceph_msg_revoke_incoming(monc->m_subscribe_ack);
4362 + ceph_con_close(&monc->con);
4363 monc->cur_mon = -1;
4364 monc->pending_auth = 0;
4365 ceph_auth_reset(monc->auth);
4366 @@ -142,9 +145,8 @@ static int __open_session(struct ceph_mon_client *monc)
4367 monc->want_next_osdmap = !!monc->want_next_osdmap;
4368
4369 dout("open_session mon%d opening\n", monc->cur_mon);
4370 - monc->con->peer_name.type = CEPH_ENTITY_TYPE_MON;
4371 - monc->con->peer_name.num = cpu_to_le64(monc->cur_mon);
4372 - ceph_con_open(monc->con,
4373 + ceph_con_open(&monc->con,
4374 + CEPH_ENTITY_TYPE_MON, monc->cur_mon,
4375 &monc->monmap->mon_inst[monc->cur_mon].addr);
4376
4377 /* initiatiate authentication handshake */
4378 @@ -226,8 +228,8 @@ static void __send_subscribe(struct ceph_mon_client *monc)
4379
4380 msg->front.iov_len = p - msg->front.iov_base;
4381 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
4382 - ceph_con_revoke(monc->con, msg);
4383 - ceph_con_send(monc->con, ceph_msg_get(msg));
4384 + ceph_msg_revoke(msg);
4385 + ceph_con_send(&monc->con, ceph_msg_get(msg));
4386
4387 monc->sub_sent = jiffies | 1; /* never 0 */
4388 }
4389 @@ -247,7 +249,7 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
4390 if (monc->hunting) {
4391 pr_info("mon%d %s session established\n",
4392 monc->cur_mon,
4393 - ceph_pr_addr(&monc->con->peer_addr.in_addr));
4394 + ceph_pr_addr(&monc->con.peer_addr.in_addr));
4395 monc->hunting = false;
4396 }
4397 dout("handle_subscribe_ack after %d seconds\n", seconds);
4398 @@ -309,6 +311,17 @@ int ceph_monc_open_session(struct ceph_mon_client *monc)
4399 EXPORT_SYMBOL(ceph_monc_open_session);
4400
4401 /*
4402 + * We require the fsid and global_id in order to initialize our
4403 + * debugfs dir.
4404 + */
4405 +static bool have_debugfs_info(struct ceph_mon_client *monc)
4406 +{
4407 + dout("have_debugfs_info fsid %d globalid %lld\n",
4408 + (int)monc->client->have_fsid, monc->auth->global_id);
4409 + return monc->client->have_fsid && monc->auth->global_id > 0;
4410 +}
4411 +
4412 +/*
4413 * The monitor responds with mount ack indicate mount success. The
4414 * included client ticket allows the client to talk to MDSs and OSDs.
4415 */
4416 @@ -318,9 +331,12 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
4417 struct ceph_client *client = monc->client;
4418 struct ceph_monmap *monmap = NULL, *old = monc->monmap;
4419 void *p, *end;
4420 + int had_debugfs_info, init_debugfs = 0;
4421
4422 mutex_lock(&monc->mutex);
4423
4424 + had_debugfs_info = have_debugfs_info(monc);
4425 +
4426 dout("handle_monmap\n");
4427 p = msg->front.iov_base;
4428 end = p + msg->front.iov_len;
4429 @@ -342,12 +358,22 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
4430
4431 if (!client->have_fsid) {
4432 client->have_fsid = true;
4433 + if (!had_debugfs_info && have_debugfs_info(monc)) {
4434 + pr_info("client%lld fsid %pU\n",
4435 + ceph_client_id(monc->client),
4436 + &monc->client->fsid);
4437 + init_debugfs = 1;
4438 + }
4439 mutex_unlock(&monc->mutex);
4440 - /*
4441 - * do debugfs initialization without mutex to avoid
4442 - * creating a locking dependency
4443 - */
4444 - ceph_debugfs_client_init(client);
4445 +
4446 + if (init_debugfs) {
4447 + /*
4448 + * do debugfs initialization without mutex to avoid
4449 + * creating a locking dependency
4450 + */
4451 + ceph_debugfs_client_init(monc->client);
4452 + }
4453 +
4454 goto out_unlocked;
4455 }
4456 out:
4457 @@ -439,6 +465,7 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con,
4458 m = NULL;
4459 } else {
4460 dout("get_generic_reply %lld got %p\n", tid, req->reply);
4461 + *skip = 0;
4462 m = ceph_msg_get(req->reply);
4463 /*
4464 * we don't need to track the connection reading into
4465 @@ -461,7 +488,7 @@ static int do_generic_request(struct ceph_mon_client *monc,
4466 req->request->hdr.tid = cpu_to_le64(req->tid);
4467 __insert_generic_request(monc, req);
4468 monc->num_generic_requests++;
4469 - ceph_con_send(monc->con, ceph_msg_get(req->request));
4470 + ceph_con_send(&monc->con, ceph_msg_get(req->request));
4471 mutex_unlock(&monc->mutex);
4472
4473 err = wait_for_completion_interruptible(&req->completion);
4474 @@ -684,8 +711,9 @@ static void __resend_generic_request(struct ceph_mon_client *monc)
4475
4476 for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) {
4477 req = rb_entry(p, struct ceph_mon_generic_request, node);
4478 - ceph_con_revoke(monc->con, req->request);
4479 - ceph_con_send(monc->con, ceph_msg_get(req->request));
4480 + ceph_msg_revoke(req->request);
4481 + ceph_msg_revoke_incoming(req->reply);
4482 + ceph_con_send(&monc->con, ceph_msg_get(req->request));
4483 }
4484 }
4485
4486 @@ -705,7 +733,7 @@ static void delayed_work(struct work_struct *work)
4487 __close_session(monc);
4488 __open_session(monc); /* continue hunting */
4489 } else {
4490 - ceph_con_keepalive(monc->con);
4491 + ceph_con_keepalive(&monc->con);
4492
4493 __validate_auth(monc);
4494
4495 @@ -760,19 +788,12 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
4496 goto out;
4497
4498 /* connection */
4499 - monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL);
4500 - if (!monc->con)
4501 - goto out_monmap;
4502 - ceph_con_init(monc->client->msgr, monc->con);
4503 - monc->con->private = monc;
4504 - monc->con->ops = &mon_con_ops;
4505 -
4506 /* authentication */
4507 monc->auth = ceph_auth_init(cl->options->name,
4508 cl->options->key);
4509 if (IS_ERR(monc->auth)) {
4510 err = PTR_ERR(monc->auth);
4511 - goto out_con;
4512 + goto out_monmap;
4513 }
4514 monc->auth->want_keys =
4515 CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON |
4516 @@ -801,6 +822,9 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
4517 if (!monc->m_auth)
4518 goto out_auth_reply;
4519
4520 + ceph_con_init(&monc->con, monc, &mon_con_ops,
4521 + &monc->client->msgr);
4522 +
4523 monc->cur_mon = -1;
4524 monc->hunting = true;
4525 monc->sub_renew_after = jiffies;
4526 @@ -824,8 +848,6 @@ out_subscribe_ack:
4527 ceph_msg_put(monc->m_subscribe_ack);
4528 out_auth:
4529 ceph_auth_destroy(monc->auth);
4530 -out_con:
4531 - monc->con->ops->put(monc->con);
4532 out_monmap:
4533 kfree(monc->monmap);
4534 out:
4535 @@ -841,12 +863,16 @@ void ceph_monc_stop(struct ceph_mon_client *monc)
4536 mutex_lock(&monc->mutex);
4537 __close_session(monc);
4538
4539 - monc->con->private = NULL;
4540 - monc->con->ops->put(monc->con);
4541 - monc->con = NULL;
4542 -
4543 mutex_unlock(&monc->mutex);
4544
4545 + /*
4546 + * flush msgr queue before we destroy ourselves to ensure that:
4547 + * - any work that references our embedded con is finished.
4548 + * - any osd_client or other work that may reference an authorizer
4549 + * finishes before we shut down the auth subsystem.
4550 + */
4551 + ceph_msgr_flush();
4552 +
4553 ceph_auth_destroy(monc->auth);
4554
4555 ceph_msg_put(monc->m_auth);
4556 @@ -863,8 +889,10 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
4557 {
4558 int ret;
4559 int was_auth = 0;
4560 + int had_debugfs_info, init_debugfs = 0;
4561
4562 mutex_lock(&monc->mutex);
4563 + had_debugfs_info = have_debugfs_info(monc);
4564 if (monc->auth->ops)
4565 was_auth = monc->auth->ops->is_authenticated(monc->auth);
4566 monc->pending_auth = 0;
4567 @@ -880,14 +908,29 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
4568 } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
4569 dout("authenticated, starting session\n");
4570
4571 - monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
4572 - monc->client->msgr->inst.name.num =
4573 + monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
4574 + monc->client->msgr.inst.name.num =
4575 cpu_to_le64(monc->auth->global_id);
4576
4577 __send_subscribe(monc);
4578 __resend_generic_request(monc);
4579 }
4580 +
4581 + if (!had_debugfs_info && have_debugfs_info(monc)) {
4582 + pr_info("client%lld fsid %pU\n",
4583 + ceph_client_id(monc->client),
4584 + &monc->client->fsid);
4585 + init_debugfs = 1;
4586 + }
4587 mutex_unlock(&monc->mutex);
4588 +
4589 + if (init_debugfs) {
4590 + /*
4591 + * do debugfs initialization without mutex to avoid
4592 + * creating a locking dependency
4593 + */
4594 + ceph_debugfs_client_init(monc->client);
4595 + }
4596 }
4597
4598 static int __validate_auth(struct ceph_mon_client *monc)
4599 @@ -992,6 +1035,8 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
4600 case CEPH_MSG_MDS_MAP:
4601 case CEPH_MSG_OSD_MAP:
4602 m = ceph_msg_new(type, front_len, GFP_NOFS, false);
4603 + if (!m)
4604 + return NULL; /* ENOMEM--return skip == 0 */
4605 break;
4606 }
4607
4608 @@ -1021,7 +1066,7 @@ static void mon_fault(struct ceph_connection *con)
4609 if (!monc->hunting)
4610 pr_info("mon%d %s session lost, "
4611 "hunting for new mon\n", monc->cur_mon,
4612 - ceph_pr_addr(&monc->con->peer_addr.in_addr));
4613 + ceph_pr_addr(&monc->con.peer_addr.in_addr));
4614
4615 __close_session(monc);
4616 if (!monc->hunting) {
4617 @@ -1036,9 +1081,23 @@ out:
4618 mutex_unlock(&monc->mutex);
4619 }
4620
4621 +/*
4622 + * We can ignore refcounting on the connection struct, as all references
4623 + * will come from the messenger workqueue, which is drained prior to
4624 + * mon_client destruction.
4625 + */
4626 +static struct ceph_connection *con_get(struct ceph_connection *con)
4627 +{
4628 + return con;
4629 +}
4630 +
4631 +static void con_put(struct ceph_connection *con)
4632 +{
4633 +}
4634 +
4635 static const struct ceph_connection_operations mon_con_ops = {
4636 - .get = ceph_con_get,
4637 - .put = ceph_con_put,
4638 + .get = con_get,
4639 + .put = con_put,
4640 .dispatch = dispatch,
4641 .fault = mon_fault,
4642 .alloc_msg = mon_alloc_msg,
4643 diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
4644 index 11d5f41..ddec1c1 100644
4645 --- a/net/ceph/msgpool.c
4646 +++ b/net/ceph/msgpool.c
4647 @@ -12,7 +12,7 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg)
4648 struct ceph_msgpool *pool = arg;
4649 struct ceph_msg *msg;
4650
4651 - msg = ceph_msg_new(0, pool->front_len, gfp_mask, true);
4652 + msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true);
4653 if (!msg) {
4654 dout("msgpool_alloc %s failed\n", pool->name);
4655 } else {
4656 @@ -32,10 +32,11 @@ static void msgpool_free(void *element, void *arg)
4657 ceph_msg_put(msg);
4658 }
4659
4660 -int ceph_msgpool_init(struct ceph_msgpool *pool,
4661 +int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
4662 int front_len, int size, bool blocking, const char *name)
4663 {
4664 dout("msgpool %s init\n", name);
4665 + pool->type = type;
4666 pool->front_len = front_len;
4667 pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
4668 if (!pool->pool)
4669 @@ -61,7 +62,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
4670 WARN_ON(1);
4671
4672 /* try to alloc a fresh message */
4673 - return ceph_msg_new(0, front_len, GFP_NOFS, false);
4674 + return ceph_msg_new(pool->type, front_len, GFP_NOFS, false);
4675 }
4676
4677 msg = mempool_alloc(pool->pool, GFP_NOFS);
4678 diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
4679 index 5e25405..a79dbae 100644
4680 --- a/net/ceph/osd_client.c
4681 +++ b/net/ceph/osd_client.c
4682 @@ -52,7 +52,7 @@ static int op_has_extent(int op)
4683 op == CEPH_OSD_OP_WRITE);
4684 }
4685
4686 -void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
4687 +int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
4688 struct ceph_file_layout *layout,
4689 u64 snapid,
4690 u64 off, u64 *plen, u64 *bno,
4691 @@ -62,12 +62,15 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
4692 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
4693 u64 orig_len = *plen;
4694 u64 objoff, objlen; /* extent in object */
4695 + int r;
4696
4697 reqhead->snapid = cpu_to_le64(snapid);
4698
4699 /* object extent? */
4700 - ceph_calc_file_object_mapping(layout, off, plen, bno,
4701 - &objoff, &objlen);
4702 + r = ceph_calc_file_object_mapping(layout, off, plen, bno,
4703 + &objoff, &objlen);
4704 + if (r < 0)
4705 + return r;
4706 if (*plen < orig_len)
4707 dout(" skipping last %llu, final file extent %llu~%llu\n",
4708 orig_len - *plen, off, *plen);
4709 @@ -83,7 +86,7 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
4710
4711 dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
4712 *bno, objoff, objlen, req->r_num_pages);
4713 -
4714 + return 0;
4715 }
4716 EXPORT_SYMBOL(ceph_calc_raw_layout);
4717
4718 @@ -112,20 +115,25 @@ EXPORT_SYMBOL(ceph_calc_raw_layout);
4719 *
4720 * fill osd op in request message.
4721 */
4722 -static void calc_layout(struct ceph_osd_client *osdc,
4723 - struct ceph_vino vino,
4724 - struct ceph_file_layout *layout,
4725 - u64 off, u64 *plen,
4726 - struct ceph_osd_request *req,
4727 - struct ceph_osd_req_op *op)
4728 +static int calc_layout(struct ceph_osd_client *osdc,
4729 + struct ceph_vino vino,
4730 + struct ceph_file_layout *layout,
4731 + u64 off, u64 *plen,
4732 + struct ceph_osd_request *req,
4733 + struct ceph_osd_req_op *op)
4734 {
4735 u64 bno;
4736 + int r;
4737
4738 - ceph_calc_raw_layout(osdc, layout, vino.snap, off,
4739 - plen, &bno, req, op);
4740 + r = ceph_calc_raw_layout(osdc, layout, vino.snap, off,
4741 + plen, &bno, req, op);
4742 + if (r < 0)
4743 + return r;
4744
4745 snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno);
4746 req->r_oid_len = strlen(req->r_oid);
4747 +
4748 + return r;
4749 }
4750
4751 /*
4752 @@ -139,15 +147,14 @@ void ceph_osdc_release_request(struct kref *kref)
4753
4754 if (req->r_request)
4755 ceph_msg_put(req->r_request);
4756 - if (req->r_reply)
4757 - ceph_msg_put(req->r_reply);
4758 if (req->r_con_filling_msg) {
4759 - dout("release_request revoking pages %p from con %p\n",
4760 + dout("%s revoking pages %p from con %p\n", __func__,
4761 req->r_pages, req->r_con_filling_msg);
4762 - ceph_con_revoke_message(req->r_con_filling_msg,
4763 - req->r_reply);
4764 - ceph_con_put(req->r_con_filling_msg);
4765 + ceph_msg_revoke_incoming(req->r_reply);
4766 + req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
4767 }
4768 + if (req->r_reply)
4769 + ceph_msg_put(req->r_reply);
4770 if (req->r_own_pages)
4771 ceph_release_page_vector(req->r_pages,
4772 req->r_num_pages);
4773 @@ -243,6 +250,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
4774 }
4775 ceph_pagelist_init(req->r_trail);
4776 }
4777 +
4778 /* create request message; allow space for oid */
4779 msg_size += MAX_OBJ_NAME_SIZE;
4780 if (snapc)
4781 @@ -256,7 +264,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
4782 return NULL;
4783 }
4784
4785 - msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP);
4786 memset(msg->front.iov_base, 0, msg->front.iov_len);
4787
4788 req->r_request = msg;
4789 @@ -278,7 +285,7 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
4790 {
4791 dst->op = cpu_to_le16(src->op);
4792
4793 - switch (dst->op) {
4794 + switch (src->op) {
4795 case CEPH_OSD_OP_READ:
4796 case CEPH_OSD_OP_WRITE:
4797 dst->extent.offset =
4798 @@ -624,7 +631,7 @@ static void osd_reset(struct ceph_connection *con)
4799 /*
4800 * Track open sessions with osds.
4801 */
4802 -static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
4803 +static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
4804 {
4805 struct ceph_osd *osd;
4806
4807 @@ -634,15 +641,13 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
4808
4809 atomic_set(&osd->o_ref, 1);
4810 osd->o_osdc = osdc;
4811 + osd->o_osd = onum;
4812 INIT_LIST_HEAD(&osd->o_requests);
4813 INIT_LIST_HEAD(&osd->o_linger_requests);
4814 INIT_LIST_HEAD(&osd->o_osd_lru);
4815 osd->o_incarnation = 1;
4816
4817 - ceph_con_init(osdc->client->msgr, &osd->o_con);
4818 - osd->o_con.private = osd;
4819 - osd->o_con.ops = &osd_con_ops;
4820 - osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD;
4821 + ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
4822
4823 INIT_LIST_HEAD(&osd->o_keepalive_item);
4824 return osd;
4825 @@ -664,11 +669,11 @@ static void put_osd(struct ceph_osd *osd)
4826 {
4827 dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
4828 atomic_read(&osd->o_ref) - 1);
4829 - if (atomic_dec_and_test(&osd->o_ref)) {
4830 + if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) {
4831 struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
4832
4833 - if (osd->o_authorizer)
4834 - ac->ops->destroy_authorizer(ac, osd->o_authorizer);
4835 + if (ac->ops && ac->ops->destroy_authorizer)
4836 + ac->ops->destroy_authorizer(ac, osd->o_auth.authorizer);
4837 kfree(osd);
4838 }
4839 }
4840 @@ -752,7 +757,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
4841 ret = -EAGAIN;
4842 } else {
4843 ceph_con_close(&osd->o_con);
4844 - ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]);
4845 + ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd,
4846 + &osdc->osdmap->osd_addr[osd->o_osd]);
4847 osd->o_incarnation++;
4848 }
4849 return ret;
4850 @@ -841,13 +847,19 @@ static void register_request(struct ceph_osd_client *osdc,
4851 static void __unregister_request(struct ceph_osd_client *osdc,
4852 struct ceph_osd_request *req)
4853 {
4854 + if (RB_EMPTY_NODE(&req->r_node)) {
4855 + dout("__unregister_request %p tid %lld not registered\n",
4856 + req, req->r_tid);
4857 + return;
4858 + }
4859 +
4860 dout("__unregister_request %p tid %lld\n", req, req->r_tid);
4861 rb_erase(&req->r_node, &osdc->requests);
4862 osdc->num_requests--;
4863
4864 if (req->r_osd) {
4865 /* make sure the original request isn't in flight. */
4866 - ceph_con_revoke(&req->r_osd->o_con, req->r_request);
4867 + ceph_msg_revoke(req->r_request);
4868
4869 list_del_init(&req->r_osd_item);
4870 if (list_empty(&req->r_osd->o_requests) &&
4871 @@ -874,7 +886,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
4872 static void __cancel_request(struct ceph_osd_request *req)
4873 {
4874 if (req->r_sent && req->r_osd) {
4875 - ceph_con_revoke(&req->r_osd->o_con, req->r_request);
4876 + ceph_msg_revoke(req->r_request);
4877 req->r_sent = 0;
4878 }
4879 }
4880 @@ -884,7 +896,9 @@ static void __register_linger_request(struct ceph_osd_client *osdc,
4881 {
4882 dout("__register_linger_request %p\n", req);
4883 list_add_tail(&req->r_linger_item, &osdc->req_linger);
4884 - list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests);
4885 + if (req->r_osd)
4886 + list_add_tail(&req->r_linger_osd,
4887 + &req->r_osd->o_linger_requests);
4888 }
4889
4890 static void __unregister_linger_request(struct ceph_osd_client *osdc,
4891 @@ -992,18 +1006,18 @@ static int __map_request(struct ceph_osd_client *osdc,
4892 req->r_osd = __lookup_osd(osdc, o);
4893 if (!req->r_osd && o >= 0) {
4894 err = -ENOMEM;
4895 - req->r_osd = create_osd(osdc);
4896 + req->r_osd = create_osd(osdc, o);
4897 if (!req->r_osd) {
4898 list_move(&req->r_req_lru_item, &osdc->req_notarget);
4899 goto out;
4900 }
4901
4902 dout("map_request osd %p is osd%d\n", req->r_osd, o);
4903 - req->r_osd->o_osd = o;
4904 - req->r_osd->o_con.peer_name.num = cpu_to_le64(o);
4905 __insert_osd(osdc, req->r_osd);
4906
4907 - ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]);
4908 + ceph_con_open(&req->r_osd->o_con,
4909 + CEPH_ENTITY_TYPE_OSD, o,
4910 + &osdc->osdmap->osd_addr[o]);
4911 }
4912
4913 if (req->r_osd) {
4914 @@ -1210,7 +1224,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
4915 if (req->r_con_filling_msg == con && req->r_reply == msg) {
4916 dout(" dropping con_filling_msg ref %p\n", con);
4917 req->r_con_filling_msg = NULL;
4918 - ceph_con_put(con);
4919 + con->ops->put(con);
4920 }
4921
4922 if (!req->r_got_reply) {
4923 @@ -1298,8 +1312,9 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
4924
4925 dout("kick_requests %s\n", force_resend ? " (force resend)" : "");
4926 mutex_lock(&osdc->request_mutex);
4927 - for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
4928 + for (p = rb_first(&osdc->requests); p; ) {
4929 req = rb_entry(p, struct ceph_osd_request, r_node);
4930 + p = rb_next(p);
4931 err = __map_request(osdc, req, force_resend);
4932 if (err < 0)
4933 continue; /* error */
4934 @@ -1307,10 +1322,23 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
4935 dout("%p tid %llu maps to no osd\n", req, req->r_tid);
4936 needmap++; /* request a newer map */
4937 } else if (err > 0) {
4938 - dout("%p tid %llu requeued on osd%d\n", req, req->r_tid,
4939 - req->r_osd ? req->r_osd->o_osd : -1);
4940 - if (!req->r_linger)
4941 + if (!req->r_linger) {
4942 + dout("%p tid %llu requeued on osd%d\n", req,
4943 + req->r_tid,
4944 + req->r_osd ? req->r_osd->o_osd : -1);
4945 req->r_flags |= CEPH_OSD_FLAG_RETRY;
4946 + }
4947 + }
4948 + if (req->r_linger && list_empty(&req->r_linger_item)) {
4949 + /*
4950 + * register as a linger so that we will
4951 + * re-submit below and get a new tid
4952 + */
4953 + dout("%p tid %llu restart on osd%d\n",
4954 + req, req->r_tid,
4955 + req->r_osd ? req->r_osd->o_osd : -1);
4956 + __register_linger_request(osdc, req);
4957 + __unregister_request(osdc, req);
4958 }
4959 }
4960
4961 @@ -1385,7 +1413,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
4962 epoch, maplen);
4963 newmap = osdmap_apply_incremental(&p, next,
4964 osdc->osdmap,
4965 - osdc->client->msgr);
4966 + &osdc->client->msgr);
4967 if (IS_ERR(newmap)) {
4968 err = PTR_ERR(newmap);
4969 goto bad;
4970 @@ -1833,11 +1861,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
4971 if (!osdc->req_mempool)
4972 goto out;
4973
4974 - err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true,
4975 + err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP,
4976 + OSD_OP_FRONT_LEN, 10, true,
4977 "osd_op");
4978 if (err < 0)
4979 goto out_mempool;
4980 - err = ceph_msgpool_init(&osdc->msgpool_op_reply,
4981 + err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY,
4982 OSD_OPREPLY_FRONT_LEN, 10, true,
4983 "osd_op_reply");
4984 if (err < 0)
4985 @@ -2019,10 +2048,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
4986 }
4987
4988 if (req->r_con_filling_msg) {
4989 - dout("get_reply revoking msg %p from old con %p\n",
4990 + dout("%s revoking msg %p from old con %p\n", __func__,
4991 req->r_reply, req->r_con_filling_msg);
4992 - ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply);
4993 - ceph_con_put(req->r_con_filling_msg);
4994 + ceph_msg_revoke_incoming(req->r_reply);
4995 + req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
4996 req->r_con_filling_msg = NULL;
4997 }
4998
4999 @@ -2057,7 +2086,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
5000 #endif
5001 }
5002 *skip = 0;
5003 - req->r_con_filling_msg = ceph_con_get(con);
5004 + req->r_con_filling_msg = con->ops->get(con);
5005 dout("get_reply tid %lld %p\n", tid, m);
5006
5007 out:
5008 @@ -2074,6 +2103,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
5009 int type = le16_to_cpu(hdr->type);
5010 int front = le32_to_cpu(hdr->front_len);
5011
5012 + *skip = 0;
5013 switch (type) {
5014 case CEPH_MSG_OSD_MAP:
5015 case CEPH_MSG_WATCH_NOTIFY:
5016 @@ -2108,37 +2138,32 @@ static void put_osd_con(struct ceph_connection *con)
5017 /*
5018 * authentication
5019 */
5020 -static int get_authorizer(struct ceph_connection *con,
5021 - void **buf, int *len, int *proto,
5022 - void **reply_buf, int *reply_len, int force_new)
5023 +/*
5024 + * Note: returned pointer is the address of a structure that's
5025 + * managed separately. Caller must *not* attempt to free it.
5026 + */
5027 +static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
5028 + int *proto, int force_new)
5029 {
5030 struct ceph_osd *o = con->private;
5031 struct ceph_osd_client *osdc = o->o_osdc;
5032 struct ceph_auth_client *ac = osdc->client->monc.auth;
5033 - int ret = 0;
5034 + struct ceph_auth_handshake *auth = &o->o_auth;
5035
5036 - if (force_new && o->o_authorizer) {
5037 - ac->ops->destroy_authorizer(ac, o->o_authorizer);
5038 - o->o_authorizer = NULL;
5039 - }
5040 - if (o->o_authorizer == NULL) {
5041 - ret = ac->ops->create_authorizer(
5042 - ac, CEPH_ENTITY_TYPE_OSD,
5043 - &o->o_authorizer,
5044 - &o->o_authorizer_buf,
5045 - &o->o_authorizer_buf_len,
5046 - &o->o_authorizer_reply_buf,
5047 - &o->o_authorizer_reply_buf_len);
5048 + if (force_new && auth->authorizer) {
5049 + if (ac->ops && ac->ops->destroy_authorizer)
5050 + ac->ops->destroy_authorizer(ac, auth->authorizer);
5051 + auth->authorizer = NULL;
5052 + }
5053 + if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) {
5054 + int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
5055 + auth);
5056 if (ret)
5057 - return ret;
5058 + return ERR_PTR(ret);
5059 }
5060 -
5061 *proto = ac->protocol;
5062 - *buf = o->o_authorizer_buf;
5063 - *len = o->o_authorizer_buf_len;
5064 - *reply_buf = o->o_authorizer_reply_buf;
5065 - *reply_len = o->o_authorizer_reply_buf_len;
5066 - return 0;
5067 +
5068 + return auth;
5069 }
5070
5071
5072 @@ -2148,7 +2173,11 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
5073 struct ceph_osd_client *osdc = o->o_osdc;
5074 struct ceph_auth_client *ac = osdc->client->monc.auth;
5075
5076 - return ac->ops->verify_authorizer_reply(ac, o->o_authorizer, len);
5077 + /*
5078 + * XXX If ac->ops or ac->ops->verify_authorizer_reply is null,
5079 + * XXX which do we do: succeed or fail?
5080 + */
5081 + return ac->ops->verify_authorizer_reply(ac, o->o_auth.authorizer, len);
5082 }
5083
5084 static int invalidate_authorizer(struct ceph_connection *con)
5085 @@ -2157,7 +2186,7 @@ static int invalidate_authorizer(struct ceph_connection *con)
5086 struct ceph_osd_client *osdc = o->o_osdc;
5087 struct ceph_auth_client *ac = osdc->client->monc.auth;
5088
5089 - if (ac->ops->invalidate_authorizer)
5090 + if (ac->ops && ac->ops->invalidate_authorizer)
5091 ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
5092
5093 return ceph_monc_validate_auth(&osdc->client->monc);
5094 diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
5095 index 29ad46e..430076e 100644
5096 --- a/net/ceph/osdmap.c
5097 +++ b/net/ceph/osdmap.c
5098 @@ -495,15 +495,16 @@ static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
5099 ceph_decode_32_safe(p, end, pool, bad);
5100 ceph_decode_32_safe(p, end, len, bad);
5101 dout(" pool %d len %d\n", pool, len);
5102 + ceph_decode_need(p, end, len, bad);
5103 pi = __lookup_pg_pool(&map->pg_pools, pool);
5104 if (pi) {
5105 + char *name = kstrndup(*p, len, GFP_NOFS);
5106 +
5107 + if (!name)
5108 + return -ENOMEM;
5109 kfree(pi->name);
5110 - pi->name = kmalloc(len + 1, GFP_NOFS);
5111 - if (pi->name) {
5112 - memcpy(pi->name, *p, len);
5113 - pi->name[len] = '\0';
5114 - dout(" name is %s\n", pi->name);
5115 - }
5116 + pi->name = name;
5117 + dout(" name is %s\n", pi->name);
5118 }
5119 *p += len;
5120 }
5121 @@ -673,6 +674,9 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
5122 ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
5123 ceph_decode_copy(p, &pgid, sizeof(pgid));
5124 n = ceph_decode_32(p);
5125 + err = -EINVAL;
5126 + if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
5127 + goto bad;
5128 ceph_decode_need(p, end, n * sizeof(u32), bad);
5129 err = -ENOMEM;
5130 pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS);
5131 @@ -890,8 +894,16 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
5132 pglen = ceph_decode_32(p);
5133
5134 if (pglen) {
5135 - /* insert */
5136 ceph_decode_need(p, end, pglen*sizeof(u32), bad);
5137 +
5138 + /* removing existing (if any) */
5139 + (void) __remove_pg_mapping(&map->pg_temp, pgid);
5140 +
5141 + /* insert */
5142 + if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) {
5143 + err = -EINVAL;
5144 + goto bad;
5145 + }
5146 pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS);
5147 if (!pg) {
5148 err = -ENOMEM;
5149 @@ -940,7 +952,7 @@ bad:
5150 * for now, we write only a single su, until we can
5151 * pass a stride back to the caller.
5152 */
5153 -void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
5154 +int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
5155 u64 off, u64 *plen,
5156 u64 *ono,
5157 u64 *oxoff, u64 *oxlen)
5158 @@ -954,11 +966,17 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
5159
5160 dout("mapping %llu~%llu osize %u fl_su %u\n", off, *plen,
5161 osize, su);
5162 + if (su == 0 || sc == 0)
5163 + goto invalid;
5164 su_per_object = osize / su;
5165 + if (su_per_object == 0)
5166 + goto invalid;
5167 dout("osize %u / su %u = su_per_object %u\n", osize, su,
5168 su_per_object);
5169
5170 - BUG_ON((su & ~PAGE_MASK) != 0);
5171 + if ((su & ~PAGE_MASK) != 0)
5172 + goto invalid;
5173 +
5174 /* bl = *off / su; */
5175 t = off;
5176 do_div(t, su);
5177 @@ -986,6 +1004,14 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
5178 *plen = *oxlen;
5179
5180 dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
5181 + return 0;
5182 +
5183 +invalid:
5184 + dout(" invalid layout\n");
5185 + *ono = 0;
5186 + *oxoff = 0;
5187 + *oxlen = 0;
5188 + return -EINVAL;
5189 }
5190 EXPORT_SYMBOL(ceph_calc_file_object_mapping);
5191
5192 diff --git a/net/core/dev.c b/net/core/dev.c
5193 index 24a21f3..eb858dc 100644
5194 --- a/net/core/dev.c
5195 +++ b/net/core/dev.c
5196 @@ -2763,8 +2763,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
5197 if (unlikely(tcpu != next_cpu) &&
5198 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
5199 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
5200 - rflow->last_qtail)) >= 0))
5201 + rflow->last_qtail)) >= 0)) {
5202 + tcpu = next_cpu;
5203 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
5204 + }
5205
5206 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
5207 *rflowp = rflow;
5208 diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
5209 index 626698f..76f6d0b 100644
5210 --- a/net/core/dev_addr_lists.c
5211 +++ b/net/core/dev_addr_lists.c
5212 @@ -308,7 +308,8 @@ int dev_addr_del(struct net_device *dev, unsigned char *addr,
5213 */
5214 ha = list_first_entry(&dev->dev_addrs.list,
5215 struct netdev_hw_addr, list);
5216 - if (ha->addr == dev->dev_addr && ha->refcount == 1)
5217 + if (!memcmp(ha->addr, addr, dev->addr_len) &&
5218 + ha->type == addr_type && ha->refcount == 1)
5219 return -ENOENT;
5220
5221 err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
5222 diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
5223 index 2fd0fba..59ef40a 100644
5224 --- a/net/ipv4/ip_sockglue.c
5225 +++ b/net/ipv4/ip_sockglue.c
5226 @@ -456,19 +456,28 @@ static int do_ip_setsockopt(struct sock *sk, int level,
5227 struct inet_sock *inet = inet_sk(sk);
5228 int val = 0, err;
5229
5230 - if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) |
5231 - (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
5232 - (1<<IP_RETOPTS) | (1<<IP_TOS) |
5233 - (1<<IP_TTL) | (1<<IP_HDRINCL) |
5234 - (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
5235 - (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
5236 - (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
5237 - (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
5238 - optname == IP_UNICAST_IF ||
5239 - optname == IP_MULTICAST_TTL ||
5240 - optname == IP_MULTICAST_ALL ||
5241 - optname == IP_MULTICAST_LOOP ||
5242 - optname == IP_RECVORIGDSTADDR) {
5243 + switch (optname) {
5244 + case IP_PKTINFO:
5245 + case IP_RECVTTL:
5246 + case IP_RECVOPTS:
5247 + case IP_RECVTOS:
5248 + case IP_RETOPTS:
5249 + case IP_TOS:
5250 + case IP_TTL:
5251 + case IP_HDRINCL:
5252 + case IP_MTU_DISCOVER:
5253 + case IP_RECVERR:
5254 + case IP_ROUTER_ALERT:
5255 + case IP_FREEBIND:
5256 + case IP_PASSSEC:
5257 + case IP_TRANSPARENT:
5258 + case IP_MINTTL:
5259 + case IP_NODEFRAG:
5260 + case IP_UNICAST_IF:
5261 + case IP_MULTICAST_TTL:
5262 + case IP_MULTICAST_ALL:
5263 + case IP_MULTICAST_LOOP:
5264 + case IP_RECVORIGDSTADDR:
5265 if (optlen >= sizeof(int)) {
5266 if (get_user(val, (int __user *) optval))
5267 return -EFAULT;
5268 diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
5269 index 3828a42..da4098f 100644
5270 --- a/net/ipv4/netfilter/nf_nat_standalone.c
5271 +++ b/net/ipv4/netfilter/nf_nat_standalone.c
5272 @@ -194,7 +194,8 @@ nf_nat_out(unsigned int hooknum,
5273
5274 if ((ct->tuplehash[dir].tuple.src.u3.ip !=
5275 ct->tuplehash[!dir].tuple.dst.u3.ip) ||
5276 - (ct->tuplehash[dir].tuple.src.u.all !=
5277 + (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
5278 + ct->tuplehash[dir].tuple.src.u.all !=
5279 ct->tuplehash[!dir].tuple.dst.u.all)
5280 )
5281 return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP;
5282 @@ -230,7 +231,8 @@ nf_nat_local_fn(unsigned int hooknum,
5283 ret = NF_DROP;
5284 }
5285 #ifdef CONFIG_XFRM
5286 - else if (ct->tuplehash[dir].tuple.dst.u.all !=
5287 + else if (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
5288 + ct->tuplehash[dir].tuple.dst.u.all !=
5289 ct->tuplehash[!dir].tuple.src.u.all)
5290 if (ip_xfrm_me_harder(skb))
5291 ret = NF_DROP;
5292 diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
5293 index 63dd1f8..34c1109 100644
5294 --- a/net/ipv6/ipv6_sockglue.c
5295 +++ b/net/ipv6/ipv6_sockglue.c
5296 @@ -828,6 +828,7 @@ pref_skip_coa:
5297 if (val < 0 || val > 255)
5298 goto e_inval;
5299 np->min_hopcount = val;
5300 + retv = 0;
5301 break;
5302 case IPV6_DONTFRAG:
5303 np->dontfrag = valbool;
5304 diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
5305 index db8fae5..498e87b 100644
5306 --- a/net/mac80211/ieee80211_i.h
5307 +++ b/net/mac80211/ieee80211_i.h
5308 @@ -1297,6 +1297,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
5309 struct net_device *dev);
5310 netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
5311 struct net_device *dev);
5312 +void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
5313 + struct sk_buff_head *skbs);
5314
5315 /* HT */
5316 bool ieee80111_cfg_override_disables_ht40(struct ieee80211_sub_if_data *sdata);
5317 diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
5318 index d93d39b..6d25d77 100644
5319 --- a/net/mac80211/sta_info.c
5320 +++ b/net/mac80211/sta_info.c
5321 @@ -738,8 +738,8 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
5322
5323 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
5324 local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]);
5325 - __skb_queue_purge(&sta->ps_tx_buf[ac]);
5326 - __skb_queue_purge(&sta->tx_filtered[ac]);
5327 + ieee80211_purge_tx_queue(&local->hw, &sta->ps_tx_buf[ac]);
5328 + ieee80211_purge_tx_queue(&local->hw, &sta->tx_filtered[ac]);
5329 }
5330
5331 #ifdef CONFIG_MAC80211_MESH
5332 @@ -774,7 +774,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
5333 tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]);
5334 if (!tid_tx)
5335 continue;
5336 - __skb_queue_purge(&tid_tx->pending);
5337 + ieee80211_purge_tx_queue(&local->hw, &tid_tx->pending);
5338 kfree(tid_tx);
5339 }
5340
5341 @@ -959,6 +959,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
5342 struct ieee80211_local *local = sdata->local;
5343 struct sk_buff_head pending;
5344 int filtered = 0, buffered = 0, ac;
5345 + unsigned long flags;
5346
5347 clear_sta_flag(sta, WLAN_STA_SP);
5348
5349 @@ -974,12 +975,16 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
5350 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
5351 int count = skb_queue_len(&pending), tmp;
5352
5353 + spin_lock_irqsave(&sta->tx_filtered[ac].lock, flags);
5354 skb_queue_splice_tail_init(&sta->tx_filtered[ac], &pending);
5355 + spin_unlock_irqrestore(&sta->tx_filtered[ac].lock, flags);
5356 tmp = skb_queue_len(&pending);
5357 filtered += tmp - count;
5358 count = tmp;
5359
5360 + spin_lock_irqsave(&sta->ps_tx_buf[ac].lock, flags);
5361 skb_queue_splice_tail_init(&sta->ps_tx_buf[ac], &pending);
5362 + spin_unlock_irqrestore(&sta->ps_tx_buf[ac].lock, flags);
5363 tmp = skb_queue_len(&pending);
5364 buffered += tmp - count;
5365 }
5366 diff --git a/net/mac80211/status.c b/net/mac80211/status.c
5367 index 5f8f89e..47b117f 100644
5368 --- a/net/mac80211/status.c
5369 +++ b/net/mac80211/status.c
5370 @@ -660,3 +660,12 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb)
5371 dev_kfree_skb_any(skb);
5372 }
5373 EXPORT_SYMBOL(ieee80211_free_txskb);
5374 +
5375 +void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
5376 + struct sk_buff_head *skbs)
5377 +{
5378 + struct sk_buff *skb;
5379 +
5380 + while ((skb = __skb_dequeue(skbs)))
5381 + ieee80211_free_txskb(hw, skb);
5382 +}
5383 diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
5384 index e76facc..eace766 100644
5385 --- a/net/mac80211/tx.c
5386 +++ b/net/mac80211/tx.c
5387 @@ -1357,7 +1357,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
5388 if (tx->skb)
5389 dev_kfree_skb(tx->skb);
5390 else
5391 - __skb_queue_purge(&tx->skbs);
5392 + ieee80211_purge_tx_queue(&tx->local->hw, &tx->skbs);
5393 return -1;
5394 } else if (unlikely(res == TX_QUEUED)) {
5395 I802_DEBUG_INC(tx->local->tx_handlers_queued);
5396 @@ -2126,10 +2126,13 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
5397 */
5398 void ieee80211_clear_tx_pending(struct ieee80211_local *local)
5399 {
5400 + struct sk_buff *skb;
5401 int i;
5402
5403 - for (i = 0; i < local->hw.queues; i++)
5404 - skb_queue_purge(&local->pending[i]);
5405 + for (i = 0; i < local->hw.queues; i++) {
5406 + while ((skb = skb_dequeue(&local->pending[i])) != NULL)
5407 + ieee80211_free_txskb(&local->hw, skb);
5408 + }
5409 }
5410
5411 /*
5412 diff --git a/net/mac80211/util.c b/net/mac80211/util.c
5413 index 266d092..73ef163 100644
5414 --- a/net/mac80211/util.c
5415 +++ b/net/mac80211/util.c
5416 @@ -1341,6 +1341,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
5417 list_for_each_entry(sdata, &local->interfaces, list) {
5418 if (sdata->vif.type != NL80211_IFTYPE_STATION)
5419 continue;
5420 + if (!sdata->u.mgd.associated)
5421 + continue;
5422
5423 ieee80211_send_nullfunc(local, sdata, 0);
5424 }
5425 diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
5426 index 0d07a1d..e022123 100644
5427 --- a/net/netfilter/nf_conntrack_proto_tcp.c
5428 +++ b/net/netfilter/nf_conntrack_proto_tcp.c
5429 @@ -158,21 +158,18 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
5430 * sCL -> sSS
5431 */
5432 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
5433 -/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
5434 +/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
5435 /*
5436 * sNO -> sIV Too late and no reason to do anything
5437 * sSS -> sIV Client can't send SYN and then SYN/ACK
5438 * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open
5439 - * sSR -> sIG
5440 - * sES -> sIG Error: SYNs in window outside the SYN_SENT state
5441 - * are errors. Receiver will reply with RST
5442 - * and close the connection.
5443 - * Or we are not in sync and hold a dead connection.
5444 - * sFW -> sIG
5445 - * sCW -> sIG
5446 - * sLA -> sIG
5447 - * sTW -> sIG
5448 - * sCL -> sIG
5449 + * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open
5450 + * sES -> sIV Invalid SYN/ACK packets sent by the client
5451 + * sFW -> sIV
5452 + * sCW -> sIV
5453 + * sLA -> sIV
5454 + * sTW -> sIV
5455 + * sCL -> sIV
5456 */
5457 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
5458 /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
5459 @@ -627,15 +624,9 @@ static bool tcp_in_window(const struct nf_conn *ct,
5460 ack = sack = receiver->td_end;
5461 }
5462
5463 - if (seq == end
5464 - && (!tcph->rst
5465 - || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
5466 + if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
5467 /*
5468 - * Packets contains no data: we assume it is valid
5469 - * and check the ack value only.
5470 - * However RST segments are always validated by their
5471 - * SEQ number, except when seq == 0 (reset sent answering
5472 - * SYN.
5473 + * RST sent answering SYN.
5474 */
5475 seq = end = sender->td_end;
5476
5477 diff --git a/net/wireless/reg.c b/net/wireless/reg.c
5478 index b01449f..4dc8347 100644
5479 --- a/net/wireless/reg.c
5480 +++ b/net/wireless/reg.c
5481 @@ -134,9 +134,8 @@ static const struct ieee80211_regdomain world_regdom = {
5482 .reg_rules = {
5483 /* IEEE 802.11b/g, channels 1..11 */
5484 REG_RULE(2412-10, 2462+10, 40, 6, 20, 0),
5485 - /* IEEE 802.11b/g, channels 12..13. No HT40
5486 - * channel fits here. */
5487 - REG_RULE(2467-10, 2472+10, 20, 6, 20,
5488 + /* IEEE 802.11b/g, channels 12..13. */
5489 + REG_RULE(2467-10, 2472+10, 40, 6, 20,
5490 NL80211_RRF_PASSIVE_SCAN |
5491 NL80211_RRF_NO_IBSS),
5492 /* IEEE 802.11 channel 14 - Only JP enables
5493 diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c
5494 index 8636585..04aa5c8 100644
5495 --- a/security/selinux/netnode.c
5496 +++ b/security/selinux/netnode.c
5497 @@ -174,7 +174,8 @@ static void sel_netnode_insert(struct sel_netnode *node)
5498 if (sel_netnode_hash[idx].size == SEL_NETNODE_HASH_BKT_LIMIT) {
5499 struct sel_netnode *tail;
5500 tail = list_entry(
5501 - rcu_dereference(sel_netnode_hash[idx].list.prev),
5502 + rcu_dereference_protected(sel_netnode_hash[idx].list.prev,
5503 + lockdep_is_held(&sel_netnode_lock)),
5504 struct sel_netnode, list);
5505 list_del_rcu(&tail->list);
5506 kfree_rcu(tail, rcu);
5507 diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
5508 index 7143393..e23ad3f 100644
5509 --- a/sound/pci/hda/patch_analog.c
5510 +++ b/sound/pci/hda/patch_analog.c
5511 @@ -544,6 +544,7 @@ static int ad198x_build_pcms(struct hda_codec *codec)
5512 if (spec->multiout.dig_out_nid) {
5513 info++;
5514 codec->num_pcms++;
5515 + codec->spdif_status_reset = 1;
5516 info->name = "AD198x Digital";
5517 info->pcm_type = HDA_PCM_TYPE_SPDIF;
5518 info->stream[SNDRV_PCM_STREAM_PLAYBACK] = ad198x_pcm_digital_playback;
5519 diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
5520 index 2bc6c51..057f95a2 100644
5521 --- a/sound/pci/hda/patch_cirrus.c
5522 +++ b/sound/pci/hda/patch_cirrus.c
5523 @@ -95,8 +95,8 @@ enum {
5524 #define CS420X_VENDOR_NID 0x11
5525 #define CS_DIG_OUT1_PIN_NID 0x10
5526 #define CS_DIG_OUT2_PIN_NID 0x15
5527 -#define CS_DMIC1_PIN_NID 0x12
5528 -#define CS_DMIC2_PIN_NID 0x0e
5529 +#define CS_DMIC1_PIN_NID 0x0e
5530 +#define CS_DMIC2_PIN_NID 0x12
5531
5532 /* coef indices */
5533 #define IDX_SPDIF_STAT 0x0000
5534 @@ -1084,14 +1084,18 @@ static void init_input(struct hda_codec *codec)
5535 cs_automic(codec);
5536
5537 coef = 0x000a; /* ADC1/2 - Digital and Analog Soft Ramp */
5538 + cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
5539 +
5540 + coef = cs_vendor_coef_get(codec, IDX_BEEP_CFG);
5541 if (is_active_pin(codec, CS_DMIC2_PIN_NID))
5542 - coef |= 0x0500; /* DMIC2 2 chan on, GPIO1 off */
5543 + coef |= 1 << 4; /* DMIC2 2 chan on, GPIO1 off */
5544 if (is_active_pin(codec, CS_DMIC1_PIN_NID))
5545 - coef |= 0x1800; /* DMIC1 2 chan on, GPIO0 off
5546 + coef |= 1 << 3; /* DMIC1 2 chan on, GPIO0 off
5547 * No effect if SPDIF_OUT2 is
5548 * selected in IDX_SPDIF_CTL.
5549 */
5550 - cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
5551 +
5552 + cs_vendor_coef_set(codec, IDX_BEEP_CFG, coef);
5553 } else {
5554 if (spec->mic_detect)
5555 cs_automic(codec);
5556 @@ -1112,7 +1116,7 @@ static const struct hda_verb cs_coef_init_verbs[] = {
5557 | 0x0400 /* Disable Coefficient Auto increment */
5558 )},
5559 /* Beep */
5560 - {0x11, AC_VERB_SET_COEF_INDEX, IDX_DAC_CFG},
5561 + {0x11, AC_VERB_SET_COEF_INDEX, IDX_BEEP_CFG},
5562 {0x11, AC_VERB_SET_PROC_COEF, 0x0007}, /* Enable Beep thru DAC1/2/3 */
5563
5564 {} /* terminator */
5565 diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
5566 index 6ecf1d4..257fe87 100644
5567 --- a/sound/pci/hda/patch_realtek.c
5568 +++ b/sound/pci/hda/patch_realtek.c
5569 @@ -5458,6 +5458,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
5570 SND_PCI_QUIRK(0x106b, 0x4000, "MacbookPro 5,1", ALC889_FIXUP_IMAC91_VREF),
5571 SND_PCI_QUIRK(0x106b, 0x4100, "Macmini 3,1", ALC889_FIXUP_IMAC91_VREF),
5572 SND_PCI_QUIRK(0x106b, 0x4200, "Mac Pro 5,1", ALC885_FIXUP_MACPRO_GPIO),
5573 + SND_PCI_QUIRK(0x106b, 0x4300, "iMac 9,1", ALC889_FIXUP_IMAC91_VREF),
5574 SND_PCI_QUIRK(0x106b, 0x4600, "MacbookPro 5,2", ALC889_FIXUP_IMAC91_VREF),
5575 SND_PCI_QUIRK(0x106b, 0x4900, "iMac 9,1 Aluminum", ALC889_FIXUP_IMAC91_VREF),
5576 SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_IMAC91_VREF),
5577 @@ -7047,6 +7048,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = {
5578 .patch = patch_alc662 },
5579 { .id = 0x10ec0663, .name = "ALC663", .patch = patch_alc662 },
5580 { .id = 0x10ec0665, .name = "ALC665", .patch = patch_alc662 },
5581 + { .id = 0x10ec0668, .name = "ALC668", .patch = patch_alc662 },
5582 { .id = 0x10ec0670, .name = "ALC670", .patch = patch_alc662 },
5583 { .id = 0x10ec0680, .name = "ALC680", .patch = patch_alc680 },
5584 { .id = 0x10ec0880, .name = "ALC880", .patch = patch_alc880 },
5585 @@ -7064,6 +7066,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = {
5586 { .id = 0x10ec0889, .name = "ALC889", .patch = patch_alc882 },
5587 { .id = 0x10ec0892, .name = "ALC892", .patch = patch_alc662 },
5588 { .id = 0x10ec0899, .name = "ALC898", .patch = patch_alc882 },
5589 + { .id = 0x10ec0900, .name = "ALC1150", .patch = patch_alc882 },
5590 {} /* terminator */
5591 };
5592
5593 diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
5594 index 3998d09b..9dafacd 100644
5595 --- a/sound/pci/hda/patch_via.c
5596 +++ b/sound/pci/hda/patch_via.c
5597 @@ -1868,11 +1868,11 @@ static int via_auto_fill_dac_nids(struct hda_codec *codec)
5598 {
5599 struct via_spec *spec = codec->spec;
5600 const struct auto_pin_cfg *cfg = &spec->autocfg;
5601 - int i, dac_num;
5602 + int i;
5603 hda_nid_t nid;
5604
5605 + spec->multiout.num_dacs = 0;
5606 spec->multiout.dac_nids = spec->private_dac_nids;
5607 - dac_num = 0;
5608 for (i = 0; i < cfg->line_outs; i++) {
5609 hda_nid_t dac = 0;
5610 nid = cfg->line_out_pins[i];
5611 @@ -1883,16 +1883,13 @@ static int via_auto_fill_dac_nids(struct hda_codec *codec)
5612 if (!i && parse_output_path(codec, nid, dac, 1,
5613 &spec->out_mix_path))
5614 dac = spec->out_mix_path.path[0];
5615 - if (dac) {
5616 - spec->private_dac_nids[i] = dac;
5617 - dac_num++;
5618 - }
5619 + if (dac)
5620 + spec->private_dac_nids[spec->multiout.num_dacs++] = dac;
5621 }
5622 if (!spec->out_path[0].depth && spec->out_mix_path.depth) {
5623 spec->out_path[0] = spec->out_mix_path;
5624 spec->out_mix_path.depth = 0;
5625 }
5626 - spec->multiout.num_dacs = dac_num;
5627 return 0;
5628 }
5629
5630 @@ -3668,6 +3665,18 @@ static void set_widgets_power_state_vt2002P(struct hda_codec *codec)
5631 update_power_state(codec, 0x21, AC_PWRST_D3);
5632 }
5633
5634 +/* NIDs 0x24 and 0x33 on VT1802 have connections to non-existing NID 0x3e
5635 + * Replace this with mixer NID 0x1c
5636 + */
5637 +static void fix_vt1802_connections(struct hda_codec *codec)
5638 +{
5639 + static hda_nid_t conn_24[] = { 0x14, 0x1c };
5640 + static hda_nid_t conn_33[] = { 0x1c };
5641 +
5642 + snd_hda_override_conn_list(codec, 0x24, ARRAY_SIZE(conn_24), conn_24);
5643 + snd_hda_override_conn_list(codec, 0x33, ARRAY_SIZE(conn_33), conn_33);
5644 +}
5645 +
5646 /* patch for vt2002P */
5647 static int patch_vt2002P(struct hda_codec *codec)
5648 {
5649 @@ -3682,6 +3691,8 @@ static int patch_vt2002P(struct hda_codec *codec)
5650 spec->aa_mix_nid = 0x21;
5651 override_mic_boost(codec, 0x2b, 0, 3, 40);
5652 override_mic_boost(codec, 0x29, 0, 3, 40);
5653 + if (spec->codec_type == VT1802)
5654 + fix_vt1802_connections(codec);
5655 add_secret_dac_path(codec);
5656
5657 /* automatic parse from the BIOS config */
5658 diff --git a/sound/soc/codecs/wm8978.c b/sound/soc/codecs/wm8978.c
5659 index 72d5fdc..6c37c7c 100644
5660 --- a/sound/soc/codecs/wm8978.c
5661 +++ b/sound/soc/codecs/wm8978.c
5662 @@ -783,7 +783,7 @@ static int wm8978_hw_params(struct snd_pcm_substream *substream,
5663 wm8978->mclk_idx = -1;
5664 f_sel = wm8978->f_mclk;
5665 } else {
5666 - if (!wm8978->f_pllout) {
5667 + if (!wm8978->f_opclk) {
5668 /* We only enter here, if OPCLK is not used */
5669 int ret = wm8978_configure_pll(codec);
5670 if (ret < 0)
5671 diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
5672 index c41efe0..9ae82a4 100644
5673 --- a/sound/soc/soc-dapm.c
5674 +++ b/sound/soc/soc-dapm.c
5675 @@ -3253,7 +3253,7 @@ void snd_soc_dapm_shutdown(struct snd_soc_card *card)
5676 {
5677 struct snd_soc_codec *codec;
5678
5679 - list_for_each_entry(codec, &card->codec_dev_list, list) {
5680 + list_for_each_entry(codec, &card->codec_dev_list, card_list) {
5681 soc_dapm_shutdown_codec(&codec->dapm);
5682 if (codec->dapm.bias_level == SND_SOC_BIAS_STANDBY)
5683 snd_soc_dapm_set_bias_level(&codec->dapm,