Magellan Linux

Annotation of /trunk/kernel-alx/patches-3.4/0119-3.4.20-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1973 - (hide annotations) (download)
Fri Nov 30 10:38:25 2012 UTC (11 years, 5 months ago) by niro
File size: 186405 byte(s)
-linux 3.4.20
1 niro 1973 diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
2     index 9b1067a..68c5411 100644
3     --- a/Documentation/cgroups/memory.txt
4     +++ b/Documentation/cgroups/memory.txt
5     @@ -466,6 +466,10 @@ Note:
6     5.3 swappiness
7    
8     Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
9     +Please note that unlike the global swappiness, memcg knob set to 0
10     +really prevents from any swapping even if there is a swap storage
11     +available. This might lead to memcg OOM killer if there are no file
12     +pages to reclaim.
13    
14     Following cgroups' swappiness can't be changed.
15     - root cgroup (uses /proc/sys/vm/swappiness).
16     diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/arch/arm/plat-omap/include/plat/omap-serial.h
17     index c369c9d..9ff4444 100644
18     --- a/arch/arm/plat-omap/include/plat/omap-serial.h
19     +++ b/arch/arm/plat-omap/include/plat/omap-serial.h
20     @@ -42,10 +42,10 @@
21     #define OMAP_UART_WER_MOD_WKUP 0X7F
22    
23     /* Enable XON/XOFF flow control on output */
24     -#define OMAP_UART_SW_TX 0x8
25     +#define OMAP_UART_SW_TX 0x04
26    
27     /* Enable XON/XOFF flow control on input */
28     -#define OMAP_UART_SW_RX 0x2
29     +#define OMAP_UART_SW_RX 0x04
30    
31     #define OMAP_UART_SYSC_RESET 0X07
32     #define OMAP_UART_TCR_TRIG 0X0F
33     diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h
34     index 60e8866..93fe83e 100644
35     --- a/arch/m68k/include/asm/signal.h
36     +++ b/arch/m68k/include/asm/signal.h
37     @@ -156,7 +156,7 @@ typedef struct sigaltstack {
38     static inline void sigaddset(sigset_t *set, int _sig)
39     {
40     asm ("bfset %0{%1,#1}"
41     - : "+od" (*set)
42     + : "+o" (*set)
43     : "id" ((_sig - 1) ^ 31)
44     : "cc");
45     }
46     @@ -164,7 +164,7 @@ static inline void sigaddset(sigset_t *set, int _sig)
47     static inline void sigdelset(sigset_t *set, int _sig)
48     {
49     asm ("bfclr %0{%1,#1}"
50     - : "+od" (*set)
51     + : "+o" (*set)
52     : "id" ((_sig - 1) ^ 31)
53     : "cc");
54     }
55     @@ -180,7 +180,7 @@ static inline int __gen_sigismember(sigset_t *set, int _sig)
56     int ret;
57     asm ("bfextu %1{%2,#1},%0"
58     : "=d" (ret)
59     - : "od" (*set), "id" ((_sig-1) ^ 31)
60     + : "o" (*set), "id" ((_sig-1) ^ 31)
61     : "cc");
62     return ret;
63     }
64     diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
65     index 234f1d8..2e0a15b 100644
66     --- a/arch/s390/include/asm/compat.h
67     +++ b/arch/s390/include/asm/compat.h
68     @@ -20,7 +20,7 @@
69     #define PSW32_MASK_CC 0x00003000UL
70     #define PSW32_MASK_PM 0x00000f00UL
71    
72     -#define PSW32_MASK_USER 0x00003F00UL
73     +#define PSW32_MASK_USER 0x0000FF00UL
74    
75     #define PSW32_ADDR_AMODE 0x80000000UL
76     #define PSW32_ADDR_INSN 0x7FFFFFFFUL
77     diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
78     index aeb77f0..d3750e7 100644
79     --- a/arch/s390/include/asm/ptrace.h
80     +++ b/arch/s390/include/asm/ptrace.h
81     @@ -240,7 +240,7 @@ typedef struct
82     #define PSW_MASK_EA 0x00000000UL
83     #define PSW_MASK_BA 0x00000000UL
84    
85     -#define PSW_MASK_USER 0x00003F00UL
86     +#define PSW_MASK_USER 0x0000FF00UL
87    
88     #define PSW_ADDR_AMODE 0x80000000UL
89     #define PSW_ADDR_INSN 0x7FFFFFFFUL
90     @@ -269,7 +269,7 @@ typedef struct
91     #define PSW_MASK_EA 0x0000000100000000UL
92     #define PSW_MASK_BA 0x0000000080000000UL
93    
94     -#define PSW_MASK_USER 0x00003F0180000000UL
95     +#define PSW_MASK_USER 0x0000FF0180000000UL
96    
97     #define PSW_ADDR_AMODE 0x0000000000000000UL
98     #define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL
99     diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
100     index 28040fd..0bdca3a 100644
101     --- a/arch/s390/kernel/compat_signal.c
102     +++ b/arch/s390/kernel/compat_signal.c
103     @@ -313,6 +313,10 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
104     regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
105     (__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32 |
106     (__u64)(regs32.psw.addr & PSW32_ADDR_AMODE);
107     + /* Check for invalid user address space control. */
108     + if ((regs->psw.mask & PSW_MASK_ASC) >= (psw_kernel_bits & PSW_MASK_ASC))
109     + regs->psw.mask = (psw_user_bits & PSW_MASK_ASC) |
110     + (regs->psw.mask & ~PSW_MASK_ASC);
111     regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN);
112     for (i = 0; i < NUM_GPRS; i++)
113     regs->gprs[i] = (__u64) regs32.gprs[i];
114     @@ -494,7 +498,10 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
115    
116     /* Set up registers for signal handler */
117     regs->gprs[15] = (__force __u64) frame;
118     - regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */
119     + /* Force 31 bit amode and default user address space control. */
120     + regs->psw.mask = PSW_MASK_BA |
121     + (psw_user_bits & PSW_MASK_ASC) |
122     + (regs->psw.mask & ~PSW_MASK_ASC);
123     regs->psw.addr = (__force __u64) ka->sa.sa_handler;
124    
125     regs->gprs[2] = map_signal(sig);
126     @@ -562,7 +569,10 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
127    
128     /* Set up registers for signal handler */
129     regs->gprs[15] = (__force __u64) frame;
130     - regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */
131     + /* Force 31 bit amode and default user address space control. */
132     + regs->psw.mask = PSW_MASK_BA |
133     + (psw_user_bits & PSW_MASK_ASC) |
134     + (regs->psw.mask & ~PSW_MASK_ASC);
135     regs->psw.addr = (__u64) ka->sa.sa_handler;
136    
137     regs->gprs[2] = map_signal(sig);
138     diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
139     index f7582b2..74f58e2 100644
140     --- a/arch/s390/kernel/signal.c
141     +++ b/arch/s390/kernel/signal.c
142     @@ -148,6 +148,10 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
143     /* Use regs->psw.mask instead of psw_user_bits to preserve PER bit. */
144     regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
145     (user_sregs.regs.psw.mask & PSW_MASK_USER);
146     + /* Check for invalid user address space control. */
147     + if ((regs->psw.mask & PSW_MASK_ASC) >= (psw_kernel_bits & PSW_MASK_ASC))
148     + regs->psw.mask = (psw_user_bits & PSW_MASK_ASC) |
149     + (regs->psw.mask & ~PSW_MASK_ASC);
150     /* Check for invalid amode */
151     if (regs->psw.mask & PSW_MASK_EA)
152     regs->psw.mask |= PSW_MASK_BA;
153     @@ -294,7 +298,10 @@ static int setup_frame(int sig, struct k_sigaction *ka,
154    
155     /* Set up registers for signal handler */
156     regs->gprs[15] = (unsigned long) frame;
157     - regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */
158     + /* Force default amode and default user address space control. */
159     + regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
160     + (psw_user_bits & PSW_MASK_ASC) |
161     + (regs->psw.mask & ~PSW_MASK_ASC);
162     regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
163    
164     regs->gprs[2] = map_signal(sig);
165     @@ -367,7 +374,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
166    
167     /* Set up registers for signal handler */
168     regs->gprs[15] = (unsigned long) frame;
169     - regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */
170     + /* Force default amode and default user address space control. */
171     + regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
172     + (psw_user_bits & PSW_MASK_ASC) |
173     + (regs->psw.mask & ~PSW_MASK_ASC);
174     regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
175    
176     regs->gprs[2] = map_signal(sig);
177     diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
178     index 65cb06e..4ccf9f5 100644
179     --- a/arch/s390/mm/gup.c
180     +++ b/arch/s390/mm/gup.c
181     @@ -183,7 +183,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
182     addr = start;
183     len = (unsigned long) nr_pages << PAGE_SHIFT;
184     end = start + len;
185     - if (end < start)
186     + if ((end < start) || (end > TASK_SIZE))
187     goto slow_irqon;
188    
189     /*
190     diff --git a/crypto/cryptd.c b/crypto/cryptd.c
191     index 671d4d6..7bdd61b 100644
192     --- a/crypto/cryptd.c
193     +++ b/crypto/cryptd.c
194     @@ -137,13 +137,18 @@ static void cryptd_queue_worker(struct work_struct *work)
195     struct crypto_async_request *req, *backlog;
196    
197     cpu_queue = container_of(work, struct cryptd_cpu_queue, work);
198     - /* Only handle one request at a time to avoid hogging crypto
199     - * workqueue. preempt_disable/enable is used to prevent
200     - * being preempted by cryptd_enqueue_request() */
201     + /*
202     + * Only handle one request at a time to avoid hogging crypto workqueue.
203     + * preempt_disable/enable is used to prevent being preempted by
204     + * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent
205     + * cryptd_enqueue_request() being accessed from software interrupts.
206     + */
207     + local_bh_disable();
208     preempt_disable();
209     backlog = crypto_get_backlog(&cpu_queue->queue);
210     req = crypto_dequeue_request(&cpu_queue->queue);
211     preempt_enable();
212     + local_bh_enable();
213    
214     if (!req)
215     return;
216     diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
217     index 48b5a3c..62d9ee6 100644
218     --- a/drivers/acpi/video.c
219     +++ b/drivers/acpi/video.c
220     @@ -1345,12 +1345,15 @@ static int
221     acpi_video_bus_get_devices(struct acpi_video_bus *video,
222     struct acpi_device *device)
223     {
224     - int status;
225     + int status = 0;
226     struct acpi_device *dev;
227    
228     - status = acpi_video_device_enumerate(video);
229     - if (status)
230     - return status;
231     + /*
232     + * There are systems where video module known to work fine regardless
233     + * of broken _DOD and ignoring returned value here doesn't cause
234     + * any issues later.
235     + */
236     + acpi_video_device_enumerate(video);
237    
238     list_for_each_entry(dev, &device->children, node) {
239    
240     diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
241     index 013c7a5..7b33136 100644
242     --- a/drivers/block/rbd.c
243     +++ b/drivers/block/rbd.c
244     @@ -175,8 +175,7 @@ struct rbd_device {
245     /* protects updating the header */
246     struct rw_semaphore header_rwsem;
247     char snap_name[RBD_MAX_SNAP_NAME_LEN];
248     - u32 cur_snap; /* index+1 of current snapshot within snap context
249     - 0 - for the head */
250     + u64 snap_id; /* current snapshot id */
251     int read_only;
252    
253     struct list_head node;
254     @@ -450,7 +449,9 @@ static void rbd_client_release(struct kref *kref)
255     struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
256    
257     dout("rbd_release_client %p\n", rbdc);
258     + spin_lock(&rbd_client_list_lock);
259     list_del(&rbdc->node);
260     + spin_unlock(&rbd_client_list_lock);
261    
262     ceph_destroy_client(rbdc->client);
263     kfree(rbdc->rbd_opts);
264     @@ -463,9 +464,7 @@ static void rbd_client_release(struct kref *kref)
265     */
266     static void rbd_put_client(struct rbd_device *rbd_dev)
267     {
268     - spin_lock(&rbd_client_list_lock);
269     kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
270     - spin_unlock(&rbd_client_list_lock);
271     rbd_dev->rbd_client = NULL;
272     }
273    
274     @@ -498,7 +497,7 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
275    
276     snap_count = le32_to_cpu(ondisk->snap_count);
277     header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
278     - snap_count * sizeof (*ondisk),
279     + snap_count * sizeof(u64),
280     gfp_flags);
281     if (!header->snapc)
282     return -ENOMEM;
283     @@ -552,21 +551,6 @@ err_snapc:
284     return -ENOMEM;
285     }
286    
287     -static int snap_index(struct rbd_image_header *header, int snap_num)
288     -{
289     - return header->total_snaps - snap_num;
290     -}
291     -
292     -static u64 cur_snap_id(struct rbd_device *rbd_dev)
293     -{
294     - struct rbd_image_header *header = &rbd_dev->header;
295     -
296     - if (!rbd_dev->cur_snap)
297     - return 0;
298     -
299     - return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)];
300     -}
301     -
302     static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
303     u64 *seq, u64 *size)
304     {
305     @@ -605,7 +589,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
306     snapc->seq = header->snap_seq;
307     else
308     snapc->seq = 0;
309     - dev->cur_snap = 0;
310     + dev->snap_id = CEPH_NOSNAP;
311     dev->read_only = 0;
312     if (size)
313     *size = header->image_size;
314     @@ -613,8 +597,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
315     ret = snap_by_name(header, dev->snap_name, &snapc->seq, size);
316     if (ret < 0)
317     goto done;
318     -
319     - dev->cur_snap = header->total_snaps - ret;
320     + dev->snap_id = snapc->seq;
321     dev->read_only = 1;
322     }
323    
324     @@ -1521,7 +1504,7 @@ static void rbd_rq_fn(struct request_queue *q)
325     coll, cur_seg);
326     else
327     rbd_req_read(rq, rbd_dev,
328     - cur_snap_id(rbd_dev),
329     + rbd_dev->snap_id,
330     ofs,
331     op_size, bio,
332     coll, cur_seg);
333     @@ -1656,7 +1639,7 @@ static int rbd_header_add_snap(struct rbd_device *dev,
334     struct ceph_mon_client *monc;
335    
336     /* we should create a snapshot only if we're pointing at the head */
337     - if (dev->cur_snap)
338     + if (dev->snap_id != CEPH_NOSNAP)
339     return -EINVAL;
340    
341     monc = &dev->rbd_client->client->monc;
342     @@ -1683,7 +1666,9 @@ static int rbd_header_add_snap(struct rbd_device *dev,
343     if (ret < 0)
344     return ret;
345    
346     - dev->header.snapc->seq = new_snapid;
347     + down_write(&dev->header_rwsem);
348     + dev->header.snapc->seq = new_snapid;
349     + up_write(&dev->header_rwsem);
350    
351     return 0;
352     bad:
353     diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
354     index 80b331c..5ba5e66 100644
355     --- a/drivers/gpu/drm/i915/intel_overlay.c
356     +++ b/drivers/gpu/drm/i915/intel_overlay.c
357     @@ -427,9 +427,17 @@ static int intel_overlay_off(struct intel_overlay *overlay)
358     OUT_RING(flip_addr);
359     OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
360     /* turn overlay off */
361     - OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
362     - OUT_RING(flip_addr);
363     - OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
364     + if (IS_I830(dev)) {
365     + /* Workaround: Don't disable the overlay fully, since otherwise
366     + * it dies on the next OVERLAY_ON cmd. */
367     + OUT_RING(MI_NOOP);
368     + OUT_RING(MI_NOOP);
369     + OUT_RING(MI_NOOP);
370     + } else {
371     + OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
372     + OUT_RING(flip_addr);
373     + OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
374     + }
375     ADVANCE_LP_RING();
376    
377     return intel_overlay_do_wait_request(overlay, request,
378     diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
379     index 2d07fbf..f6176bc 100644
380     --- a/drivers/gpu/drm/radeon/atombios_encoders.c
381     +++ b/drivers/gpu/drm/radeon/atombios_encoders.c
382     @@ -1421,7 +1421,7 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode)
383     atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0);
384     atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
385     /* some early dce3.2 boards have a bug in their transmitter control table */
386     - if ((rdev->family != CHIP_RV710) || (rdev->family != CHIP_RV730))
387     + if ((rdev->family != CHIP_RV710) && (rdev->family != CHIP_RV730))
388     atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT, 0, 0);
389     }
390     if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) {
391     diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
392     index ebc6fac..578207e 100644
393     --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
394     +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
395     @@ -749,7 +749,10 @@ static int ttm_get_pages(struct page **pages, unsigned npages, int flags,
396     /* clear the pages coming from the pool if requested */
397     if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) {
398     list_for_each_entry(p, &plist, lru) {
399     - clear_page(page_address(p));
400     + if (PageHighMem(p))
401     + clear_highpage(p);
402     + else
403     + clear_page(page_address(p));
404     }
405     }
406    
407     diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
408     index 589753f..2b78ddd 100644
409     --- a/drivers/net/ethernet/marvell/sky2.c
410     +++ b/drivers/net/ethernet/marvell/sky2.c
411     @@ -3079,8 +3079,10 @@ static irqreturn_t sky2_intr(int irq, void *dev_id)
412    
413     /* Reading this mask interrupts as side effect */
414     status = sky2_read32(hw, B0_Y2_SP_ISRC2);
415     - if (status == 0 || status == ~0)
416     + if (status == 0 || status == ~0) {
417     + sky2_write32(hw, B0_Y2_SP_ICR, 2);
418     return IRQ_NONE;
419     + }
420    
421     prefetch(&hw->st_le[hw->st_idx]);
422    
423     diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
424     index 482dcd3..0dc70c2 100644
425     --- a/drivers/net/ethernet/realtek/r8169.c
426     +++ b/drivers/net/ethernet/realtek/r8169.c
427     @@ -73,7 +73,7 @@
428     static const int multicast_filter_limit = 32;
429    
430     #define MAX_READ_REQUEST_SHIFT 12
431     -#define TX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */
432     +#define TX_DMA_BURST 7 /* Maximum PCI burst, '7' is unlimited */
433     #define SafeMtu 0x1c20 /* ... actually life sucks beyond ~7k */
434     #define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */
435    
436     @@ -3488,6 +3488,8 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
437     void __iomem *ioaddr = tp->mmio_addr;
438    
439     switch (tp->mac_version) {
440     + case RTL_GIGA_MAC_VER_25:
441     + case RTL_GIGA_MAC_VER_26:
442     case RTL_GIGA_MAC_VER_29:
443     case RTL_GIGA_MAC_VER_30:
444     case RTL_GIGA_MAC_VER_32:
445     @@ -4129,6 +4131,9 @@ static void rtl_set_rx_mode(struct net_device *dev)
446     mc_filter[1] = swab32(data);
447     }
448    
449     + if (tp->mac_version == RTL_GIGA_MAC_VER_35)
450     + mc_filter[1] = mc_filter[0] = 0xffffffff;
451     +
452     RTL_W32(MAR0 + 4, mc_filter[1]);
453     RTL_W32(MAR0 + 0, mc_filter[0]);
454    
455     diff --git a/drivers/staging/android/android_alarm.h b/drivers/staging/android/android_alarm.h
456     index 66b6e3d..6eecbde 100644
457     --- a/drivers/staging/android/android_alarm.h
458     +++ b/drivers/staging/android/android_alarm.h
459     @@ -110,12 +110,10 @@ enum android_alarm_return_flags {
460     #define ANDROID_ALARM_WAIT _IO('a', 1)
461    
462     #define ALARM_IOW(c, type, size) _IOW('a', (c) | ((type) << 4), size)
463     -#define ALARM_IOR(c, type, size) _IOR('a', (c) | ((type) << 4), size)
464     -
465     /* Set alarm */
466     #define ANDROID_ALARM_SET(type) ALARM_IOW(2, type, struct timespec)
467     #define ANDROID_ALARM_SET_AND_WAIT(type) ALARM_IOW(3, type, struct timespec)
468     -#define ANDROID_ALARM_GET_TIME(type) ALARM_IOR(4, type, struct timespec)
469     +#define ANDROID_ALARM_GET_TIME(type) ALARM_IOW(4, type, struct timespec)
470     #define ANDROID_ALARM_SET_RTC _IOW('a', 5, struct timespec)
471     #define ANDROID_ALARM_BASE_CMD(cmd) (cmd & ~(_IOC(0, 0, 0xf0, 0)))
472     #define ANDROID_ALARM_IOCTL_TO_TYPE(cmd) (_IOC_NR(cmd) >> 4)
473     diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
474     index 6189923..d00b38e 100644
475     --- a/drivers/tty/serial/omap-serial.c
476     +++ b/drivers/tty/serial/omap-serial.c
477     @@ -649,19 +649,19 @@ serial_omap_configure_xonxoff
478    
479     /*
480     * IXON Flag:
481     - * Flow control for OMAP.TX
482     - * OMAP.RX should listen for XON/XOFF
483     + * Enable XON/XOFF flow control on output.
484     + * Transmit XON1, XOFF1
485     */
486     if (termios->c_iflag & IXON)
487     - up->efr |= OMAP_UART_SW_RX;
488     + up->efr |= OMAP_UART_SW_TX;
489    
490     /*
491     * IXOFF Flag:
492     - * Flow control for OMAP.RX
493     - * OMAP.TX should send XON/XOFF
494     + * Enable XON/XOFF flow control on input.
495     + * Receiver compares XON1, XOFF1.
496     */
497     if (termios->c_iflag & IXOFF)
498     - up->efr |= OMAP_UART_SW_TX;
499     + up->efr |= OMAP_UART_SW_RX;
500    
501     serial_out(up, UART_EFR, up->efr | UART_EFR_ECB);
502     serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
503     diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
504     index 17ec21e..43aa36b 100644
505     --- a/drivers/usb/serial/option.c
506     +++ b/drivers/usb/serial/option.c
507     @@ -157,6 +157,7 @@ static void option_instat_callback(struct urb *urb);
508     #define NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_HIGHSPEED 0x8001
509     #define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_FULLSPEED 0x9000
510     #define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED 0x9001
511     +#define NOVATELWIRELESS_PRODUCT_E362 0x9010
512     #define NOVATELWIRELESS_PRODUCT_G1 0xA001
513     #define NOVATELWIRELESS_PRODUCT_G1_M 0xA002
514     #define NOVATELWIRELESS_PRODUCT_G2 0xA010
515     @@ -192,6 +193,9 @@ static void option_instat_callback(struct urb *urb);
516     #define DELL_PRODUCT_5730_MINICARD_TELUS 0x8181
517     #define DELL_PRODUCT_5730_MINICARD_VZW 0x8182
518    
519     +#define DELL_PRODUCT_5800_MINICARD_VZW 0x8195 /* Novatel E362 */
520     +#define DELL_PRODUCT_5800_V2_MINICARD_VZW 0x8196 /* Novatel E362 */
521     +
522     #define KYOCERA_VENDOR_ID 0x0c88
523     #define KYOCERA_PRODUCT_KPC650 0x17da
524     #define KYOCERA_PRODUCT_KPC680 0x180a
525     @@ -282,6 +286,7 @@ static void option_instat_callback(struct urb *urb);
526     /* ALCATEL PRODUCTS */
527     #define ALCATEL_VENDOR_ID 0x1bbb
528     #define ALCATEL_PRODUCT_X060S_X200 0x0000
529     +#define ALCATEL_PRODUCT_X220_X500D 0x0017
530    
531     #define PIRELLI_VENDOR_ID 0x1266
532     #define PIRELLI_PRODUCT_C100_1 0x1002
533     @@ -705,6 +710,7 @@ static const struct usb_device_id option_ids[] = {
534     { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G2) },
535     /* Novatel Ovation MC551 a.k.a. Verizon USB551L */
536     { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC551, 0xff, 0xff, 0xff) },
537     + { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E362, 0xff, 0xff, 0xff) },
538    
539     { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) },
540     { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) },
541     @@ -727,6 +733,8 @@ static const struct usb_device_id option_ids[] = {
542     { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_SPRINT) }, /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */
543     { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_TELUS) }, /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */
544     { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_VZW) }, /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */
545     + { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_MINICARD_VZW, 0xff, 0xff, 0xff) },
546     + { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_V2_MINICARD_VZW, 0xff, 0xff, 0xff) },
547     { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */
548     { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) },
549     { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) },
550     @@ -1156,6 +1164,7 @@ static const struct usb_device_id option_ids[] = {
551     { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200),
552     .driver_info = (kernel_ulong_t)&alcatel_x200_blacklist
553     },
554     + { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X220_X500D) },
555     { USB_DEVICE(AIRPLUS_VENDOR_ID, AIRPLUS_PRODUCT_MCD650) },
556     { USB_DEVICE(TLAYTECH_VENDOR_ID, TLAYTECH_PRODUCT_TEU800) },
557     { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14),
558     diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
559     index bcf2617..c627ba2 100644
560     --- a/drivers/usb/serial/usb-serial.c
561     +++ b/drivers/usb/serial/usb-serial.c
562     @@ -768,7 +768,7 @@ int usb_serial_probe(struct usb_interface *interface,
563    
564     if (retval) {
565     dbg("sub driver rejected device");
566     - kfree(serial);
567     + usb_serial_put(serial);
568     module_put(type->driver.owner);
569     return retval;
570     }
571     @@ -840,7 +840,7 @@ int usb_serial_probe(struct usb_interface *interface,
572     */
573     if (num_bulk_in == 0 || num_bulk_out == 0) {
574     dev_info(&interface->dev, "PL-2303 hack: descriptors matched but endpoints did not\n");
575     - kfree(serial);
576     + usb_serial_put(serial);
577     module_put(type->driver.owner);
578     return -ENODEV;
579     }
580     @@ -854,7 +854,7 @@ int usb_serial_probe(struct usb_interface *interface,
581     if (num_ports == 0) {
582     dev_err(&interface->dev,
583     "Generic device with no bulk out, not allowed.\n");
584     - kfree(serial);
585     + usb_serial_put(serial);
586     module_put(type->driver.owner);
587     return -EIO;
588     }
589     diff --git a/drivers/xen/events.c b/drivers/xen/events.c
590     index 6908e4c..26c47a4 100644
591     --- a/drivers/xen/events.c
592     +++ b/drivers/xen/events.c
593     @@ -1365,8 +1365,8 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
594     {
595     struct pt_regs *old_regs = set_irq_regs(regs);
596    
597     - exit_idle();
598     irq_enter();
599     + exit_idle();
600    
601     __xen_evtchn_do_upcall();
602    
603     diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
604     index 173b1d2..32ee086 100644
605     --- a/fs/ceph/addr.c
606     +++ b/fs/ceph/addr.c
607     @@ -54,7 +54,12 @@
608     (CONGESTION_ON_THRESH(congestion_kb) - \
609     (CONGESTION_ON_THRESH(congestion_kb) >> 2))
610    
611     -
612     +static inline struct ceph_snap_context *page_snap_context(struct page *page)
613     +{
614     + if (PagePrivate(page))
615     + return (void *)page->private;
616     + return NULL;
617     +}
618    
619     /*
620     * Dirty a page. Optimistically adjust accounting, on the assumption
621     @@ -142,10 +147,9 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset)
622     {
623     struct inode *inode;
624     struct ceph_inode_info *ci;
625     - struct ceph_snap_context *snapc = (void *)page->private;
626     + struct ceph_snap_context *snapc = page_snap_context(page);
627    
628     BUG_ON(!PageLocked(page));
629     - BUG_ON(!page->private);
630     BUG_ON(!PagePrivate(page));
631     BUG_ON(!page->mapping);
632    
633     @@ -182,7 +186,6 @@ static int ceph_releasepage(struct page *page, gfp_t g)
634     struct inode *inode = page->mapping ? page->mapping->host : NULL;
635     dout("%p releasepage %p idx %lu\n", inode, page, page->index);
636     WARN_ON(PageDirty(page));
637     - WARN_ON(page->private);
638     WARN_ON(PagePrivate(page));
639     return 0;
640     }
641     @@ -202,7 +205,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
642     dout("readpage inode %p file %p page %p index %lu\n",
643     inode, filp, page, page->index);
644     err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
645     - page->index << PAGE_CACHE_SHIFT, &len,
646     + (u64) page_offset(page), &len,
647     ci->i_truncate_seq, ci->i_truncate_size,
648     &page, 1, 0);
649     if (err == -ENOENT)
650     @@ -283,7 +286,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
651     int nr_pages = 0;
652     int ret;
653    
654     - off = page->index << PAGE_CACHE_SHIFT;
655     + off = (u64) page_offset(page);
656    
657     /* count pages */
658     next_index = page->index;
659     @@ -423,7 +426,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
660     struct ceph_inode_info *ci;
661     struct ceph_fs_client *fsc;
662     struct ceph_osd_client *osdc;
663     - loff_t page_off = page->index << PAGE_CACHE_SHIFT;
664     + loff_t page_off = page_offset(page);
665     int len = PAGE_CACHE_SIZE;
666     loff_t i_size;
667     int err = 0;
668     @@ -443,7 +446,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
669     osdc = &fsc->client->osdc;
670    
671     /* verify this is a writeable snap context */
672     - snapc = (void *)page->private;
673     + snapc = page_snap_context(page);
674     if (snapc == NULL) {
675     dout("writepage %p page %p not dirty?\n", inode, page);
676     goto out;
677     @@ -451,7 +454,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
678     oldest = get_oldest_context(inode, &snap_size);
679     if (snapc->seq > oldest->seq) {
680     dout("writepage %p page %p snapc %p not writeable - noop\n",
681     - inode, page, (void *)page->private);
682     + inode, page, snapc);
683     /* we should only noop if called by kswapd */
684     WARN_ON((current->flags & PF_MEMALLOC) == 0);
685     ceph_put_snap_context(oldest);
686     @@ -591,7 +594,7 @@ static void writepages_finish(struct ceph_osd_request *req,
687     clear_bdi_congested(&fsc->backing_dev_info,
688     BLK_RW_ASYNC);
689    
690     - ceph_put_snap_context((void *)page->private);
691     + ceph_put_snap_context(page_snap_context(page));
692     page->private = 0;
693     ClearPagePrivate(page);
694     dout("unlocking %d %p\n", i, page);
695     @@ -795,7 +798,7 @@ get_more_pages:
696     }
697    
698     /* only if matching snap context */
699     - pgsnapc = (void *)page->private;
700     + pgsnapc = page_snap_context(page);
701     if (pgsnapc->seq > snapc->seq) {
702     dout("page snapc %p %lld > oldest %p %lld\n",
703     pgsnapc, pgsnapc->seq, snapc, snapc->seq);
704     @@ -814,8 +817,7 @@ get_more_pages:
705     /* ok */
706     if (locked_pages == 0) {
707     /* prepare async write request */
708     - offset = (unsigned long long)page->index
709     - << PAGE_CACHE_SHIFT;
710     + offset = (u64) page_offset(page);
711     len = wsize;
712     req = ceph_osdc_new_request(&fsc->client->osdc,
713     &ci->i_layout,
714     @@ -984,7 +986,7 @@ retry_locked:
715     BUG_ON(!ci->i_snap_realm);
716     down_read(&mdsc->snap_rwsem);
717     BUG_ON(!ci->i_snap_realm->cached_context);
718     - snapc = (void *)page->private;
719     + snapc = page_snap_context(page);
720     if (snapc && snapc != ci->i_head_snapc) {
721     /*
722     * this page is already dirty in another (older) snap
723     @@ -1177,7 +1179,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
724     struct inode *inode = vma->vm_file->f_dentry->d_inode;
725     struct page *page = vmf->page;
726     struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
727     - loff_t off = page->index << PAGE_CACHE_SHIFT;
728     + loff_t off = page_offset(page);
729     loff_t size, len;
730     int ret;
731    
732     diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
733     index fb962ef..6d59006 100644
734     --- a/fs/ceph/debugfs.c
735     +++ b/fs/ceph/debugfs.c
736     @@ -201,6 +201,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
737     int err = -ENOMEM;
738    
739     dout("ceph_fs_debugfs_init\n");
740     + BUG_ON(!fsc->client->debugfs_dir);
741     fsc->debugfs_congestion_kb =
742     debugfs_create_file("writeback_congestion_kb",
743     0600,
744     diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
745     index 89971e1..7f1682d 100644
746     --- a/fs/ceph/mds_client.c
747     +++ b/fs/ceph/mds_client.c
748     @@ -334,10 +334,10 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
749     dout("mdsc put_session %p %d -> %d\n", s,
750     atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
751     if (atomic_dec_and_test(&s->s_ref)) {
752     - if (s->s_authorizer)
753     + if (s->s_auth.authorizer)
754     s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer(
755     s->s_mdsc->fsc->client->monc.auth,
756     - s->s_authorizer);
757     + s->s_auth.authorizer);
758     kfree(s);
759     }
760     }
761     @@ -394,11 +394,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
762     s->s_seq = 0;
763     mutex_init(&s->s_mutex);
764    
765     - ceph_con_init(mdsc->fsc->client->msgr, &s->s_con);
766     - s->s_con.private = s;
767     - s->s_con.ops = &mds_con_ops;
768     - s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
769     - s->s_con.peer_name.num = cpu_to_le64(mds);
770     + ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr);
771    
772     spin_lock_init(&s->s_gen_ttl_lock);
773     s->s_cap_gen = 0;
774     @@ -440,7 +436,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
775     mdsc->sessions[mds] = s;
776     atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */
777    
778     - ceph_con_open(&s->s_con, ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
779     + ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds,
780     + ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
781    
782     return s;
783    
784     @@ -2532,6 +2529,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
785     session->s_seq = 0;
786    
787     ceph_con_open(&session->s_con,
788     + CEPH_ENTITY_TYPE_MDS, mds,
789     ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
790    
791     /* replay unsafe requests */
792     @@ -2636,7 +2634,8 @@ static void check_new_map(struct ceph_mds_client *mdsc,
793     ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
794     session_state_name(s->s_state));
795    
796     - if (memcmp(ceph_mdsmap_get_addr(oldmap, i),
797     + if (i >= newmap->m_max_mds ||
798     + memcmp(ceph_mdsmap_get_addr(oldmap, i),
799     ceph_mdsmap_get_addr(newmap, i),
800     sizeof(struct ceph_entity_addr))) {
801     if (s->s_state == CEPH_MDS_SESSION_OPENING) {
802     @@ -3395,39 +3394,33 @@ out:
803     /*
804     * authentication
805     */
806     -static int get_authorizer(struct ceph_connection *con,
807     - void **buf, int *len, int *proto,
808     - void **reply_buf, int *reply_len, int force_new)
809     +
810     +/*
811     + * Note: returned pointer is the address of a structure that's
812     + * managed separately. Caller must *not* attempt to free it.
813     + */
814     +static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
815     + int *proto, int force_new)
816     {
817     struct ceph_mds_session *s = con->private;
818     struct ceph_mds_client *mdsc = s->s_mdsc;
819     struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
820     - int ret = 0;
821     -
822     - if (force_new && s->s_authorizer) {
823     - ac->ops->destroy_authorizer(ac, s->s_authorizer);
824     - s->s_authorizer = NULL;
825     - }
826     - if (s->s_authorizer == NULL) {
827     - if (ac->ops->create_authorizer) {
828     - ret = ac->ops->create_authorizer(
829     - ac, CEPH_ENTITY_TYPE_MDS,
830     - &s->s_authorizer,
831     - &s->s_authorizer_buf,
832     - &s->s_authorizer_buf_len,
833     - &s->s_authorizer_reply_buf,
834     - &s->s_authorizer_reply_buf_len);
835     - if (ret)
836     - return ret;
837     - }
838     - }
839     + struct ceph_auth_handshake *auth = &s->s_auth;
840    
841     + if (force_new && auth->authorizer) {
842     + if (ac->ops && ac->ops->destroy_authorizer)
843     + ac->ops->destroy_authorizer(ac, auth->authorizer);
844     + auth->authorizer = NULL;
845     + }
846     + if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) {
847     + int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
848     + auth);
849     + if (ret)
850     + return ERR_PTR(ret);
851     + }
852     *proto = ac->protocol;
853     - *buf = s->s_authorizer_buf;
854     - *len = s->s_authorizer_buf_len;
855     - *reply_buf = s->s_authorizer_reply_buf;
856     - *reply_len = s->s_authorizer_reply_buf_len;
857     - return 0;
858     +
859     + return auth;
860     }
861    
862    
863     @@ -3437,7 +3430,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
864     struct ceph_mds_client *mdsc = s->s_mdsc;
865     struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
866    
867     - return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len);
868     + return ac->ops->verify_authorizer_reply(ac, s->s_auth.authorizer, len);
869     }
870    
871     static int invalidate_authorizer(struct ceph_connection *con)
872     diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
873     index 8c7c04e..dd26846 100644
874     --- a/fs/ceph/mds_client.h
875     +++ b/fs/ceph/mds_client.h
876     @@ -11,6 +11,7 @@
877     #include <linux/ceph/types.h>
878     #include <linux/ceph/messenger.h>
879     #include <linux/ceph/mdsmap.h>
880     +#include <linux/ceph/auth.h>
881    
882     /*
883     * Some lock dependencies:
884     @@ -113,9 +114,7 @@ struct ceph_mds_session {
885    
886     struct ceph_connection s_con;
887    
888     - struct ceph_authorizer *s_authorizer;
889     - void *s_authorizer_buf, *s_authorizer_reply_buf;
890     - size_t s_authorizer_buf_len, s_authorizer_reply_buf_len;
891     + struct ceph_auth_handshake s_auth;
892    
893     /* protected by s_gen_ttl_lock */
894     spinlock_t s_gen_ttl_lock;
895     diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
896     index 3cc1b25..6ccf176 100644
897     --- a/fs/cifs/cifsacl.c
898     +++ b/fs/cifs/cifsacl.c
899     @@ -225,6 +225,13 @@ sid_to_str(struct cifs_sid *sidptr, char *sidstr)
900     }
901    
902     static void
903     +cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
904     +{
905     + memcpy(dst, src, sizeof(*dst));
906     + dst->num_subauth = min_t(u8, src->num_subauth, NUM_SUBAUTHS);
907     +}
908     +
909     +static void
910     id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
911     struct cifs_sid_id **psidid, char *typestr)
912     {
913     @@ -248,7 +255,7 @@ id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
914     }
915     }
916    
917     - memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid));
918     + cifs_copy_sid(&(*psidid)->sid, sidptr);
919     (*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
920     (*psidid)->refcount = 0;
921    
922     @@ -354,7 +361,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
923     * any fields of the node after a reference is put .
924     */
925     if (test_bit(SID_ID_MAPPED, &psidid->state)) {
926     - memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
927     + cifs_copy_sid(ssid, &psidid->sid);
928     psidid->time = jiffies; /* update ts for accessing */
929     goto id_sid_out;
930     }
931     @@ -370,14 +377,14 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
932     if (IS_ERR(sidkey)) {
933     rc = -EINVAL;
934     cFYI(1, "%s: Can't map and id to a SID", __func__);
935     + } else if (sidkey->datalen < sizeof(struct cifs_sid)) {
936     + rc = -EIO;
937     + cFYI(1, "%s: Downcall contained malformed key "
938     + "(datalen=%hu)", __func__, sidkey->datalen);
939     } else {
940     lsid = (struct cifs_sid *)sidkey->payload.data;
941     - memcpy(&psidid->sid, lsid,
942     - sidkey->datalen < sizeof(struct cifs_sid) ?
943     - sidkey->datalen : sizeof(struct cifs_sid));
944     - memcpy(ssid, &psidid->sid,
945     - sidkey->datalen < sizeof(struct cifs_sid) ?
946     - sidkey->datalen : sizeof(struct cifs_sid));
947     + cifs_copy_sid(&psidid->sid, lsid);
948     + cifs_copy_sid(ssid, &psidid->sid);
949     set_bit(SID_ID_MAPPED, &psidid->state);
950     key_put(sidkey);
951     kfree(psidid->sidstr);
952     @@ -396,7 +403,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
953     return rc;
954     }
955     if (test_bit(SID_ID_MAPPED, &psidid->state))
956     - memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
957     + cifs_copy_sid(ssid, &psidid->sid);
958     else
959     rc = -EINVAL;
960     }
961     @@ -675,8 +682,6 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
962     static void copy_sec_desc(const struct cifs_ntsd *pntsd,
963     struct cifs_ntsd *pnntsd, __u32 sidsoffset)
964     {
965     - int i;
966     -
967     struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
968     struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
969    
970     @@ -692,26 +697,14 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
971     owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
972     le32_to_cpu(pntsd->osidoffset));
973     nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset);
974     -
975     - nowner_sid_ptr->revision = owner_sid_ptr->revision;
976     - nowner_sid_ptr->num_subauth = owner_sid_ptr->num_subauth;
977     - for (i = 0; i < 6; i++)
978     - nowner_sid_ptr->authority[i] = owner_sid_ptr->authority[i];
979     - for (i = 0; i < 5; i++)
980     - nowner_sid_ptr->sub_auth[i] = owner_sid_ptr->sub_auth[i];
981     + cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr);
982    
983     /* copy group sid */
984     group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
985     le32_to_cpu(pntsd->gsidoffset));
986     ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset +
987     sizeof(struct cifs_sid));
988     -
989     - ngroup_sid_ptr->revision = group_sid_ptr->revision;
990     - ngroup_sid_ptr->num_subauth = group_sid_ptr->num_subauth;
991     - for (i = 0; i < 6; i++)
992     - ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i];
993     - for (i = 0; i < 5; i++)
994     - ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i];
995     + cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr);
996    
997     return;
998     }
999     @@ -1120,8 +1113,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
1000     kfree(nowner_sid_ptr);
1001     return rc;
1002     }
1003     - memcpy(owner_sid_ptr, nowner_sid_ptr,
1004     - sizeof(struct cifs_sid));
1005     + cifs_copy_sid(owner_sid_ptr, nowner_sid_ptr);
1006     kfree(nowner_sid_ptr);
1007     *aclflag = CIFS_ACL_OWNER;
1008     }
1009     @@ -1139,8 +1131,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
1010     kfree(ngroup_sid_ptr);
1011     return rc;
1012     }
1013     - memcpy(group_sid_ptr, ngroup_sid_ptr,
1014     - sizeof(struct cifs_sid));
1015     + cifs_copy_sid(group_sid_ptr, ngroup_sid_ptr);
1016     kfree(ngroup_sid_ptr);
1017     *aclflag = CIFS_ACL_GROUP;
1018     }
1019     diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
1020     index 0f04d2e..240832e 100644
1021     --- a/fs/ecryptfs/main.c
1022     +++ b/fs/ecryptfs/main.c
1023     @@ -280,6 +280,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
1024     char *fnek_src;
1025     char *cipher_key_bytes_src;
1026     char *fn_cipher_key_bytes_src;
1027     + u8 cipher_code;
1028    
1029     *check_ruid = 0;
1030    
1031     @@ -421,6 +422,18 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
1032     && !fn_cipher_key_bytes_set)
1033     mount_crypt_stat->global_default_fn_cipher_key_bytes =
1034     mount_crypt_stat->global_default_cipher_key_size;
1035     +
1036     + cipher_code = ecryptfs_code_for_cipher_string(
1037     + mount_crypt_stat->global_default_cipher_name,
1038     + mount_crypt_stat->global_default_cipher_key_size);
1039     + if (!cipher_code) {
1040     + ecryptfs_printk(KERN_ERR,
1041     + "eCryptfs doesn't support cipher: %s",
1042     + mount_crypt_stat->global_default_cipher_name);
1043     + rc = -EINVAL;
1044     + goto out;
1045     + }
1046     +
1047     mutex_lock(&key_tfm_list_mutex);
1048     if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name,
1049     NULL)) {
1050     @@ -506,7 +519,6 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
1051     goto out;
1052     }
1053    
1054     - s->s_flags = flags;
1055     rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY);
1056     if (rc)
1057     goto out1;
1058     @@ -542,6 +554,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
1059     }
1060    
1061     ecryptfs_set_superblock_lower(s, path.dentry->d_sb);
1062     +
1063     + /**
1064     + * Set the POSIX ACL flag based on whether they're enabled in the lower
1065     + * mount. Force a read-only eCryptfs mount if the lower mount is ro.
1066     + * Allow a ro eCryptfs mount even when the lower mount is rw.
1067     + */
1068     + s->s_flags = flags & ~MS_POSIXACL;
1069     + s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL);
1070     +
1071     s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
1072     s->s_blocksize = path.dentry->d_sb->s_blocksize;
1073     s->s_magic = ECRYPTFS_SUPER_MAGIC;
1074     diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
1075     index 5e80180..8955e36 100644
1076     --- a/fs/nfs/nfs4proc.c
1077     +++ b/fs/nfs/nfs4proc.c
1078     @@ -307,8 +307,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
1079     dprintk("%s ERROR: %d Reset session\n", __func__,
1080     errorcode);
1081     nfs4_schedule_session_recovery(clp->cl_session);
1082     - exception->retry = 1;
1083     - break;
1084     + goto wait_on_recovery;
1085     #endif /* defined(CONFIG_NFS_V4_1) */
1086     case -NFS4ERR_FILE_OPEN:
1087     if (exception->timeout > HZ) {
1088     diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
1089     index f35794b..a506360 100644
1090     --- a/fs/notify/fanotify/fanotify.c
1091     +++ b/fs/notify/fanotify/fanotify.c
1092     @@ -21,6 +21,7 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
1093     if ((old->path.mnt == new->path.mnt) &&
1094     (old->path.dentry == new->path.dentry))
1095     return true;
1096     + break;
1097     case (FSNOTIFY_EVENT_NONE):
1098     return true;
1099     default:
1100     diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
1101     index f99c1b4..c11db51 100644
1102     --- a/fs/reiserfs/inode.c
1103     +++ b/fs/reiserfs/inode.c
1104     @@ -1788,8 +1788,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1105    
1106     BUG_ON(!th->t_trans_id);
1107    
1108     - dquot_initialize(inode);
1109     + reiserfs_write_unlock(inode->i_sb);
1110     err = dquot_alloc_inode(inode);
1111     + reiserfs_write_lock(inode->i_sb);
1112     if (err)
1113     goto out_end_trans;
1114     if (!dir->i_nlink) {
1115     @@ -1985,8 +1986,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1116    
1117     out_end_trans:
1118     journal_end(th, th->t_super, th->t_blocks_allocated);
1119     + reiserfs_write_unlock(inode->i_sb);
1120     /* Drop can be outside and it needs more credits so it's better to have it outside */
1121     dquot_drop(inode);
1122     + reiserfs_write_lock(inode->i_sb);
1123     inode->i_flags |= S_NOQUOTA;
1124     make_bad_inode(inode);
1125    
1126     @@ -3109,10 +3112,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
1127     /* must be turned off for recursive notify_change calls */
1128     ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
1129    
1130     - depth = reiserfs_write_lock_once(inode->i_sb);
1131     if (is_quota_modification(inode, attr))
1132     dquot_initialize(inode);
1133     -
1134     + depth = reiserfs_write_lock_once(inode->i_sb);
1135     if (attr->ia_valid & ATTR_SIZE) {
1136     /* version 2 items will be caught by the s_maxbytes check
1137     ** done for us in vmtruncate
1138     @@ -3176,7 +3178,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
1139     error = journal_begin(&th, inode->i_sb, jbegin_count);
1140     if (error)
1141     goto out;
1142     + reiserfs_write_unlock_once(inode->i_sb, depth);
1143     error = dquot_transfer(inode, attr);
1144     + depth = reiserfs_write_lock_once(inode->i_sb);
1145     if (error) {
1146     journal_end(&th, inode->i_sb, jbegin_count);
1147     goto out;
1148     diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
1149     index f8afa4b..2f40a4c 100644
1150     --- a/fs/reiserfs/stree.c
1151     +++ b/fs/reiserfs/stree.c
1152     @@ -1968,7 +1968,9 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
1153     key2type(&(key->on_disk_key)));
1154     #endif
1155    
1156     + reiserfs_write_unlock(inode->i_sb);
1157     retval = dquot_alloc_space_nodirty(inode, pasted_size);
1158     + reiserfs_write_lock(inode->i_sb);
1159     if (retval) {
1160     pathrelse(search_path);
1161     return retval;
1162     @@ -2061,9 +2063,11 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
1163     "reiserquota insert_item(): allocating %u id=%u type=%c",
1164     quota_bytes, inode->i_uid, head2type(ih));
1165     #endif
1166     + reiserfs_write_unlock(inode->i_sb);
1167     /* We can't dirty inode here. It would be immediately written but
1168     * appropriate stat item isn't inserted yet... */
1169     retval = dquot_alloc_space_nodirty(inode, quota_bytes);
1170     + reiserfs_write_lock(inode->i_sb);
1171     if (retval) {
1172     pathrelse(path);
1173     return retval;
1174     diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
1175     index 8b7616e..8169be9 100644
1176     --- a/fs/reiserfs/super.c
1177     +++ b/fs/reiserfs/super.c
1178     @@ -256,7 +256,9 @@ static int finish_unfinished(struct super_block *s)
1179     retval = remove_save_link_only(s, &save_link_key, 0);
1180     continue;
1181     }
1182     + reiserfs_write_unlock(s);
1183     dquot_initialize(inode);
1184     + reiserfs_write_lock(s);
1185    
1186     if (truncate && S_ISDIR(inode->i_mode)) {
1187     /* We got a truncate request for a dir which is impossible.
1188     @@ -1292,7 +1294,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1189     kfree(qf_names[i]);
1190     #endif
1191     err = -EINVAL;
1192     - goto out_err;
1193     + goto out_unlock;
1194     }
1195     #ifdef CONFIG_QUOTA
1196     handle_quota_files(s, qf_names, &qfmt);
1197     @@ -1336,7 +1338,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1198     if (blocks) {
1199     err = reiserfs_resize(s, blocks);
1200     if (err != 0)
1201     - goto out_err;
1202     + goto out_unlock;
1203     }
1204    
1205     if (*mount_flags & MS_RDONLY) {
1206     @@ -1346,9 +1348,15 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1207     /* it is read-only already */
1208     goto out_ok;
1209    
1210     + /*
1211     + * Drop write lock. Quota will retake it when needed and lock
1212     + * ordering requires calling dquot_suspend() without it.
1213     + */
1214     + reiserfs_write_unlock(s);
1215     err = dquot_suspend(s, -1);
1216     if (err < 0)
1217     goto out_err;
1218     + reiserfs_write_lock(s);
1219    
1220     /* try to remount file system with read-only permissions */
1221     if (sb_umount_state(rs) == REISERFS_VALID_FS
1222     @@ -1358,7 +1366,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1223    
1224     err = journal_begin(&th, s, 10);
1225     if (err)
1226     - goto out_err;
1227     + goto out_unlock;
1228    
1229     /* Mounting a rw partition read-only. */
1230     reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
1231     @@ -1373,7 +1381,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1232    
1233     if (reiserfs_is_journal_aborted(journal)) {
1234     err = journal->j_errno;
1235     - goto out_err;
1236     + goto out_unlock;
1237     }
1238    
1239     handle_data_mode(s, mount_options);
1240     @@ -1382,7 +1390,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1241     s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */
1242     err = journal_begin(&th, s, 10);
1243     if (err)
1244     - goto out_err;
1245     + goto out_unlock;
1246    
1247     /* Mount a partition which is read-only, read-write */
1248     reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
1249     @@ -1399,11 +1407,17 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1250     SB_JOURNAL(s)->j_must_wait = 1;
1251     err = journal_end(&th, s, 10);
1252     if (err)
1253     - goto out_err;
1254     + goto out_unlock;
1255     s->s_dirt = 0;
1256    
1257     if (!(*mount_flags & MS_RDONLY)) {
1258     + /*
1259     + * Drop write lock. Quota will retake it when needed and lock
1260     + * ordering requires calling dquot_resume() without it.
1261     + */
1262     + reiserfs_write_unlock(s);
1263     dquot_resume(s, -1);
1264     + reiserfs_write_lock(s);
1265     finish_unfinished(s);
1266     reiserfs_xattr_init(s, *mount_flags);
1267     }
1268     @@ -1413,9 +1427,10 @@ out_ok:
1269     reiserfs_write_unlock(s);
1270     return 0;
1271    
1272     +out_unlock:
1273     + reiserfs_write_unlock(s);
1274     out_err:
1275     kfree(new_opts);
1276     - reiserfs_write_unlock(s);
1277     return err;
1278     }
1279    
1280     @@ -2049,13 +2064,15 @@ static int reiserfs_write_dquot(struct dquot *dquot)
1281     REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
1282     if (ret)
1283     goto out;
1284     + reiserfs_write_unlock(dquot->dq_sb);
1285     ret = dquot_commit(dquot);
1286     + reiserfs_write_lock(dquot->dq_sb);
1287     err =
1288     journal_end(&th, dquot->dq_sb,
1289     REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
1290     if (!ret && err)
1291     ret = err;
1292     - out:
1293     +out:
1294     reiserfs_write_unlock(dquot->dq_sb);
1295     return ret;
1296     }
1297     @@ -2071,13 +2088,15 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
1298     REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
1299     if (ret)
1300     goto out;
1301     + reiserfs_write_unlock(dquot->dq_sb);
1302     ret = dquot_acquire(dquot);
1303     + reiserfs_write_lock(dquot->dq_sb);
1304     err =
1305     journal_end(&th, dquot->dq_sb,
1306     REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
1307     if (!ret && err)
1308     ret = err;
1309     - out:
1310     +out:
1311     reiserfs_write_unlock(dquot->dq_sb);
1312     return ret;
1313     }
1314     @@ -2091,19 +2110,21 @@ static int reiserfs_release_dquot(struct dquot *dquot)
1315     ret =
1316     journal_begin(&th, dquot->dq_sb,
1317     REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
1318     + reiserfs_write_unlock(dquot->dq_sb);
1319     if (ret) {
1320     /* Release dquot anyway to avoid endless cycle in dqput() */
1321     dquot_release(dquot);
1322     goto out;
1323     }
1324     ret = dquot_release(dquot);
1325     + reiserfs_write_lock(dquot->dq_sb);
1326     err =
1327     journal_end(&th, dquot->dq_sb,
1328     REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
1329     if (!ret && err)
1330     ret = err;
1331     - out:
1332     reiserfs_write_unlock(dquot->dq_sb);
1333     +out:
1334     return ret;
1335     }
1336    
1337     @@ -2128,11 +2149,13 @@ static int reiserfs_write_info(struct super_block *sb, int type)
1338     ret = journal_begin(&th, sb, 2);
1339     if (ret)
1340     goto out;
1341     + reiserfs_write_unlock(sb);
1342     ret = dquot_commit_info(sb, type);
1343     + reiserfs_write_lock(sb);
1344     err = journal_end(&th, sb, 2);
1345     if (!ret && err)
1346     ret = err;
1347     - out:
1348     +out:
1349     reiserfs_write_unlock(sb);
1350     return ret;
1351     }
1352     @@ -2157,8 +2180,11 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
1353     struct reiserfs_transaction_handle th;
1354     int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA;
1355    
1356     - if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt)))
1357     - return -EINVAL;
1358     + reiserfs_write_lock(sb);
1359     + if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) {
1360     + err = -EINVAL;
1361     + goto out;
1362     + }
1363    
1364     /* Quotafile not on the same filesystem? */
1365     if (path->dentry->d_sb != sb) {
1366     @@ -2200,8 +2226,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
1367     if (err)
1368     goto out;
1369     }
1370     - err = dquot_quota_on(sb, type, format_id, path);
1371     + reiserfs_write_unlock(sb);
1372     + return dquot_quota_on(sb, type, format_id, path);
1373     out:
1374     + reiserfs_write_unlock(sb);
1375     return err;
1376     }
1377    
1378     @@ -2275,7 +2303,9 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
1379     tocopy = sb->s_blocksize - offset < towrite ?
1380     sb->s_blocksize - offset : towrite;
1381     tmp_bh.b_state = 0;
1382     + reiserfs_write_lock(sb);
1383     err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE);
1384     + reiserfs_write_unlock(sb);
1385     if (err)
1386     goto out;
1387     if (offset || tocopy != sb->s_blocksize)
1388     @@ -2291,10 +2321,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
1389     flush_dcache_page(bh->b_page);
1390     set_buffer_uptodate(bh);
1391     unlock_buffer(bh);
1392     + reiserfs_write_lock(sb);
1393     reiserfs_prepare_for_journal(sb, bh, 1);
1394     journal_mark_dirty(current->journal_info, sb, bh);
1395     if (!journal_quota)
1396     reiserfs_add_ordered_list(inode, bh);
1397     + reiserfs_write_unlock(sb);
1398     brelse(bh);
1399     offset = 0;
1400     towrite -= tocopy;
1401     diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
1402     index 2559d17..5dc48ca 100644
1403     --- a/fs/ubifs/find.c
1404     +++ b/fs/ubifs/find.c
1405     @@ -681,8 +681,16 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c)
1406     if (!lprops) {
1407     lprops = ubifs_fast_find_freeable(c);
1408     if (!lprops) {
1409     - ubifs_assert(c->freeable_cnt == 0);
1410     - if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
1411     + /*
1412     + * The first condition means the following: go scan the
1413     + * LPT if there are uncategorized lprops, which means
1414     + * there may be freeable LEBs there (UBIFS does not
1415     + * store the information about freeable LEBs in the
1416     + * master node).
1417     + */
1418     + if (c->in_a_category_cnt != c->main_lebs ||
1419     + c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
1420     + ubifs_assert(c->freeable_cnt == 0);
1421     lprops = scan_for_leb_for_idx(c);
1422     if (IS_ERR(lprops)) {
1423     err = PTR_ERR(lprops);
1424     diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
1425     index f8a181e..ea9d491 100644
1426     --- a/fs/ubifs/lprops.c
1427     +++ b/fs/ubifs/lprops.c
1428     @@ -300,8 +300,11 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
1429     default:
1430     ubifs_assert(0);
1431     }
1432     +
1433     lprops->flags &= ~LPROPS_CAT_MASK;
1434     lprops->flags |= cat;
1435     + c->in_a_category_cnt += 1;
1436     + ubifs_assert(c->in_a_category_cnt <= c->main_lebs);
1437     }
1438    
1439     /**
1440     @@ -334,6 +337,9 @@ static void ubifs_remove_from_cat(struct ubifs_info *c,
1441     default:
1442     ubifs_assert(0);
1443     }
1444     +
1445     + c->in_a_category_cnt -= 1;
1446     + ubifs_assert(c->in_a_category_cnt >= 0);
1447     }
1448    
1449     /**
1450     diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
1451     index 93d59ac..4971cb2 100644
1452     --- a/fs/ubifs/ubifs.h
1453     +++ b/fs/ubifs/ubifs.h
1454     @@ -1184,6 +1184,8 @@ struct ubifs_debug_info;
1455     * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size)
1456     * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size)
1457     * @freeable_cnt: number of freeable LEBs in @freeable_list
1458     + * @in_a_category_cnt: count of lprops which are in a certain category, which
1459     + * basically meants that they were loaded from the flash
1460     *
1461     * @ltab_lnum: LEB number of LPT's own lprops table
1462     * @ltab_offs: offset of LPT's own lprops table
1463     @@ -1413,6 +1415,7 @@ struct ubifs_info {
1464     struct list_head freeable_list;
1465     struct list_head frdi_idx_list;
1466     int freeable_cnt;
1467     + int in_a_category_cnt;
1468    
1469     int ltab_lnum;
1470     int ltab_offs;
1471     diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
1472     index 6819b51..bb76128 100644
1473     --- a/fs/xfs/xfs_buf.c
1474     +++ b/fs/xfs/xfs_buf.c
1475     @@ -1165,9 +1165,14 @@ xfs_buf_bio_end_io(
1476     {
1477     xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
1478    
1479     - xfs_buf_ioerror(bp, -error);
1480     + /*
1481     + * don't overwrite existing errors - otherwise we can lose errors on
1482     + * buffers that require multiple bios to complete.
1483     + */
1484     + if (!bp->b_error)
1485     + xfs_buf_ioerror(bp, -error);
1486    
1487     - if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1488     + if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1489     invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1490    
1491     _xfs_buf_ioend(bp, 1);
1492     @@ -1243,6 +1248,11 @@ next_chunk:
1493     if (size)
1494     goto next_chunk;
1495     } else {
1496     + /*
1497     + * This is guaranteed not to be the last io reference count
1498     + * because the caller (xfs_buf_iorequest) holds a count itself.
1499     + */
1500     + atomic_dec(&bp->b_io_remaining);
1501     xfs_buf_ioerror(bp, EIO);
1502     bio_put(bio);
1503     }
1504     diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
1505     index aa13392..d4080f3 100644
1506     --- a/include/linux/ceph/auth.h
1507     +++ b/include/linux/ceph/auth.h
1508     @@ -14,6 +14,14 @@
1509     struct ceph_auth_client;
1510     struct ceph_authorizer;
1511    
1512     +struct ceph_auth_handshake {
1513     + struct ceph_authorizer *authorizer;
1514     + void *authorizer_buf;
1515     + size_t authorizer_buf_len;
1516     + void *authorizer_reply_buf;
1517     + size_t authorizer_reply_buf_len;
1518     +};
1519     +
1520     struct ceph_auth_client_ops {
1521     const char *name;
1522    
1523     @@ -43,9 +51,7 @@ struct ceph_auth_client_ops {
1524     * the response to authenticate the service.
1525     */
1526     int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type,
1527     - struct ceph_authorizer **a,
1528     - void **buf, size_t *len,
1529     - void **reply_buf, size_t *reply_len);
1530     + struct ceph_auth_handshake *auth);
1531     int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
1532     struct ceph_authorizer *a, size_t len);
1533     void (*destroy_authorizer)(struct ceph_auth_client *ac,
1534     diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
1535     index e71d683..98ec36a 100644
1536     --- a/include/linux/ceph/libceph.h
1537     +++ b/include/linux/ceph/libceph.h
1538     @@ -132,7 +132,7 @@ struct ceph_client {
1539     u32 supported_features;
1540     u32 required_features;
1541    
1542     - struct ceph_messenger *msgr; /* messenger instance */
1543     + struct ceph_messenger msgr; /* messenger instance */
1544     struct ceph_mon_client monc;
1545     struct ceph_osd_client osdc;
1546    
1547     diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
1548     index 3bff047..189ae06 100644
1549     --- a/include/linux/ceph/messenger.h
1550     +++ b/include/linux/ceph/messenger.h
1551     @@ -25,15 +25,12 @@ struct ceph_connection_operations {
1552     void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m);
1553    
1554     /* authorize an outgoing connection */
1555     - int (*get_authorizer) (struct ceph_connection *con,
1556     - void **buf, int *len, int *proto,
1557     - void **reply_buf, int *reply_len, int force_new);
1558     + struct ceph_auth_handshake *(*get_authorizer) (
1559     + struct ceph_connection *con,
1560     + int *proto, int force_new);
1561     int (*verify_authorizer_reply) (struct ceph_connection *con, int len);
1562     int (*invalidate_authorizer)(struct ceph_connection *con);
1563    
1564     - /* protocol version mismatch */
1565     - void (*bad_proto) (struct ceph_connection *con);
1566     -
1567     /* there was some error on the socket (disconnect, whatever) */
1568     void (*fault) (struct ceph_connection *con);
1569    
1570     @@ -53,6 +50,7 @@ struct ceph_messenger {
1571     struct ceph_entity_inst inst; /* my name+address */
1572     struct ceph_entity_addr my_enc_addr;
1573    
1574     + atomic_t stopping;
1575     bool nocrc;
1576    
1577     /*
1578     @@ -80,7 +78,10 @@ struct ceph_msg {
1579     unsigned nr_pages; /* size of page array */
1580     unsigned page_alignment; /* io offset in first page */
1581     struct ceph_pagelist *pagelist; /* instead of pages */
1582     +
1583     + struct ceph_connection *con;
1584     struct list_head list_head;
1585     +
1586     struct kref kref;
1587     struct bio *bio; /* instead of pages/pagelist */
1588     struct bio *bio_iter; /* bio iterator */
1589     @@ -106,23 +107,6 @@ struct ceph_msg_pos {
1590     #define MAX_DELAY_INTERVAL (5 * 60 * HZ)
1591    
1592     /*
1593     - * ceph_connection state bit flags
1594     - */
1595     -#define LOSSYTX 0 /* we can close channel or drop messages on errors */
1596     -#define CONNECTING 1
1597     -#define NEGOTIATING 2
1598     -#define KEEPALIVE_PENDING 3
1599     -#define WRITE_PENDING 4 /* we have data ready to send */
1600     -#define STANDBY 8 /* no outgoing messages, socket closed. we keep
1601     - * the ceph_connection around to maintain shared
1602     - * state with the peer. */
1603     -#define CLOSED 10 /* we've closed the connection */
1604     -#define SOCK_CLOSED 11 /* socket state changed to closed */
1605     -#define OPENING 13 /* open connection w/ (possibly new) peer */
1606     -#define DEAD 14 /* dead, about to kfree */
1607     -#define BACKOFF 15
1608     -
1609     -/*
1610     * A single connection with another host.
1611     *
1612     * We maintain a queue of outgoing messages, and some session state to
1613     @@ -131,18 +115,22 @@ struct ceph_msg_pos {
1614     */
1615     struct ceph_connection {
1616     void *private;
1617     - atomic_t nref;
1618    
1619     const struct ceph_connection_operations *ops;
1620    
1621     struct ceph_messenger *msgr;
1622     +
1623     + atomic_t sock_state;
1624     struct socket *sock;
1625     - unsigned long state; /* connection state (see flags above) */
1626     + struct ceph_entity_addr peer_addr; /* peer address */
1627     + struct ceph_entity_addr peer_addr_for_me;
1628     +
1629     + unsigned long flags;
1630     + unsigned long state;
1631     const char *error_msg; /* error message, if any */
1632    
1633     - struct ceph_entity_addr peer_addr; /* peer address */
1634     struct ceph_entity_name peer_name; /* peer name */
1635     - struct ceph_entity_addr peer_addr_for_me;
1636     +
1637     unsigned peer_features;
1638     u32 connect_seq; /* identify the most recent connection
1639     attempt for this connection, client */
1640     @@ -163,16 +151,8 @@ struct ceph_connection {
1641    
1642     /* connection negotiation temps */
1643     char in_banner[CEPH_BANNER_MAX_LEN];
1644     - union {
1645     - struct { /* outgoing connection */
1646     - struct ceph_msg_connect out_connect;
1647     - struct ceph_msg_connect_reply in_reply;
1648     - };
1649     - struct { /* incoming */
1650     - struct ceph_msg_connect in_connect;
1651     - struct ceph_msg_connect_reply out_reply;
1652     - };
1653     - };
1654     + struct ceph_msg_connect out_connect;
1655     + struct ceph_msg_connect_reply in_reply;
1656     struct ceph_entity_addr actual_peer_addr;
1657    
1658     /* message out temps */
1659     @@ -215,24 +195,26 @@ extern int ceph_msgr_init(void);
1660     extern void ceph_msgr_exit(void);
1661     extern void ceph_msgr_flush(void);
1662    
1663     -extern struct ceph_messenger *ceph_messenger_create(
1664     - struct ceph_entity_addr *myaddr,
1665     - u32 features, u32 required);
1666     -extern void ceph_messenger_destroy(struct ceph_messenger *);
1667     +extern void ceph_messenger_init(struct ceph_messenger *msgr,
1668     + struct ceph_entity_addr *myaddr,
1669     + u32 supported_features,
1670     + u32 required_features,
1671     + bool nocrc);
1672    
1673     -extern void ceph_con_init(struct ceph_messenger *msgr,
1674     - struct ceph_connection *con);
1675     +extern void ceph_con_init(struct ceph_connection *con, void *private,
1676     + const struct ceph_connection_operations *ops,
1677     + struct ceph_messenger *msgr);
1678     extern void ceph_con_open(struct ceph_connection *con,
1679     + __u8 entity_type, __u64 entity_num,
1680     struct ceph_entity_addr *addr);
1681     extern bool ceph_con_opened(struct ceph_connection *con);
1682     extern void ceph_con_close(struct ceph_connection *con);
1683     extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
1684     -extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
1685     -extern void ceph_con_revoke_message(struct ceph_connection *con,
1686     - struct ceph_msg *msg);
1687     +
1688     +extern void ceph_msg_revoke(struct ceph_msg *msg);
1689     +extern void ceph_msg_revoke_incoming(struct ceph_msg *msg);
1690     +
1691     extern void ceph_con_keepalive(struct ceph_connection *con);
1692     -extern struct ceph_connection *ceph_con_get(struct ceph_connection *con);
1693     -extern void ceph_con_put(struct ceph_connection *con);
1694    
1695     extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
1696     bool can_fail);
1697     diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
1698     index 545f859..2113e38 100644
1699     --- a/include/linux/ceph/mon_client.h
1700     +++ b/include/linux/ceph/mon_client.h
1701     @@ -70,7 +70,7 @@ struct ceph_mon_client {
1702     bool hunting;
1703     int cur_mon; /* last monitor i contacted */
1704     unsigned long sub_sent, sub_renew_after;
1705     - struct ceph_connection *con;
1706     + struct ceph_connection con;
1707     bool have_fsid;
1708    
1709     /* pending generic requests */
1710     diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h
1711     index a362605..09fa96b 100644
1712     --- a/include/linux/ceph/msgpool.h
1713     +++ b/include/linux/ceph/msgpool.h
1714     @@ -11,10 +11,11 @@
1715     struct ceph_msgpool {
1716     const char *name;
1717     mempool_t *pool;
1718     + int type; /* preallocated message type */
1719     int front_len; /* preallocated payload size */
1720     };
1721    
1722     -extern int ceph_msgpool_init(struct ceph_msgpool *pool,
1723     +extern int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
1724     int front_len, int size, bool blocking,
1725     const char *name);
1726     extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
1727     diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
1728     index 7c05ac2..d9b880e 100644
1729     --- a/include/linux/ceph/osd_client.h
1730     +++ b/include/linux/ceph/osd_client.h
1731     @@ -6,9 +6,10 @@
1732     #include <linux/mempool.h>
1733     #include <linux/rbtree.h>
1734    
1735     -#include "types.h"
1736     -#include "osdmap.h"
1737     -#include "messenger.h"
1738     +#include <linux/ceph/types.h>
1739     +#include <linux/ceph/osdmap.h>
1740     +#include <linux/ceph/messenger.h>
1741     +#include <linux/ceph/auth.h>
1742    
1743     /*
1744     * Maximum object name size
1745     @@ -40,9 +41,7 @@ struct ceph_osd {
1746     struct list_head o_requests;
1747     struct list_head o_linger_requests;
1748     struct list_head o_osd_lru;
1749     - struct ceph_authorizer *o_authorizer;
1750     - void *o_authorizer_buf, *o_authorizer_reply_buf;
1751     - size_t o_authorizer_buf_len, o_authorizer_reply_buf_len;
1752     + struct ceph_auth_handshake o_auth;
1753     unsigned long lru_ttl;
1754     int o_marked_for_keepalive;
1755     struct list_head o_keepalive_item;
1756     @@ -208,7 +207,7 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
1757     extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
1758     struct ceph_msg *msg);
1759    
1760     -extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
1761     +extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
1762     struct ceph_file_layout *layout,
1763     u64 snapid,
1764     u64 off, u64 *plen, u64 *bno,
1765     diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
1766     index ba4c205..11db454 100644
1767     --- a/include/linux/ceph/osdmap.h
1768     +++ b/include/linux/ceph/osdmap.h
1769     @@ -111,9 +111,9 @@ extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
1770     extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
1771    
1772     /* calculate mapping of a file extent to an object */
1773     -extern void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
1774     - u64 off, u64 *plen,
1775     - u64 *bno, u64 *oxoff, u64 *oxlen);
1776     +extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
1777     + u64 off, u64 *plen,
1778     + u64 *bno, u64 *oxoff, u64 *oxlen);
1779    
1780     /* calculate mapping of object to a placement group */
1781     extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
1782     diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
1783     index 97e435b..e7a8c90 100644
1784     --- a/include/linux/crush/crush.h
1785     +++ b/include/linux/crush/crush.h
1786     @@ -168,7 +168,7 @@ struct crush_map {
1787    
1788    
1789     /* crush.c */
1790     -extern int crush_get_bucket_item_weight(struct crush_bucket *b, int pos);
1791     +extern int crush_get_bucket_item_weight(const struct crush_bucket *b, int pos);
1792     extern void crush_calc_parents(struct crush_map *map);
1793     extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
1794     extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
1795     @@ -177,4 +177,9 @@ extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
1796     extern void crush_destroy_bucket(struct crush_bucket *b);
1797     extern void crush_destroy(struct crush_map *map);
1798    
1799     +static inline int crush_calc_tree_node(int i)
1800     +{
1801     + return ((i+1) << 1)-1;
1802     +}
1803     +
1804     #endif
1805     diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
1806     index c46b99c..9322ab8 100644
1807     --- a/include/linux/crush/mapper.h
1808     +++ b/include/linux/crush/mapper.h
1809     @@ -10,11 +10,11 @@
1810    
1811     #include "crush.h"
1812    
1813     -extern int crush_find_rule(struct crush_map *map, int pool, int type, int size);
1814     -extern int crush_do_rule(struct crush_map *map,
1815     +extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size);
1816     +extern int crush_do_rule(const struct crush_map *map,
1817     int ruleno,
1818     int x, int *result, int result_max,
1819     int forcefeed, /* -1 for none */
1820     - __u32 *weights);
1821     + const __u32 *weights);
1822    
1823     #endif
1824     diff --git a/kernel/module.c b/kernel/module.c
1825     index 61ea75e..8597217 100644
1826     --- a/kernel/module.c
1827     +++ b/kernel/module.c
1828     @@ -2273,12 +2273,17 @@ static void layout_symtab(struct module *mod, struct load_info *info)
1829     src = (void *)info->hdr + symsect->sh_offset;
1830     nsrc = symsect->sh_size / sizeof(*src);
1831    
1832     + /* strtab always starts with a nul, so offset 0 is the empty string. */
1833     + strtab_size = 1;
1834     +
1835     /* Compute total space required for the core symbols' strtab. */
1836     - for (ndst = i = strtab_size = 1; i < nsrc; ++i, ++src)
1837     - if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) {
1838     - strtab_size += strlen(&info->strtab[src->st_name]) + 1;
1839     + for (ndst = i = 0; i < nsrc; i++) {
1840     + if (i == 0 ||
1841     + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
1842     + strtab_size += strlen(&info->strtab[src[i].st_name])+1;
1843     ndst++;
1844     }
1845     + }
1846    
1847     /* Append room for core symbols at end of core part. */
1848     info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
1849     @@ -2312,15 +2317,15 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
1850     mod->core_symtab = dst = mod->module_core + info->symoffs;
1851     mod->core_strtab = s = mod->module_core + info->stroffs;
1852     src = mod->symtab;
1853     - *dst = *src;
1854     *s++ = 0;
1855     - for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
1856     - if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum))
1857     - continue;
1858     -
1859     - dst[ndst] = *src;
1860     - dst[ndst++].st_name = s - mod->core_strtab;
1861     - s += strlcpy(s, &mod->strtab[src->st_name], KSYM_NAME_LEN) + 1;
1862     + for (ndst = i = 0; i < mod->num_symtab; i++) {
1863     + if (i == 0 ||
1864     + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
1865     + dst[ndst] = src[i];
1866     + dst[ndst++].st_name = s - mod->core_strtab;
1867     + s += strlcpy(s, &mod->strtab[src[i].st_name],
1868     + KSYM_NAME_LEN) + 1;
1869     + }
1870     }
1871     mod->core_num_syms = ndst;
1872     }
1873     diff --git a/mm/memcontrol.c b/mm/memcontrol.c
1874     index 7685d4a..81c275b 100644
1875     --- a/mm/memcontrol.c
1876     +++ b/mm/memcontrol.c
1877     @@ -1489,17 +1489,26 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)
1878     u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
1879     {
1880     u64 limit;
1881     - u64 memsw;
1882    
1883     limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
1884     - limit += total_swap_pages << PAGE_SHIFT;
1885    
1886     - memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
1887     /*
1888     - * If memsw is finite and limits the amount of swap space available
1889     - * to this memcg, return that limit.
1890     + * Do not consider swap space if we cannot swap due to swappiness
1891     */
1892     - return min(limit, memsw);
1893     + if (mem_cgroup_swappiness(memcg)) {
1894     + u64 memsw;
1895     +
1896     + limit += total_swap_pages << PAGE_SHIFT;
1897     + memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
1898     +
1899     + /*
1900     + * If memsw is finite and limits the amount of swap space
1901     + * available to this memcg, return that limit.
1902     + */
1903     + limit = min(limit, memsw);
1904     + }
1905     +
1906     + return limit;
1907     }
1908    
1909     static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
1910     diff --git a/mm/shmem.c b/mm/shmem.c
1911     index 40383cd..a859b06 100644
1912     --- a/mm/shmem.c
1913     +++ b/mm/shmem.c
1914     @@ -595,7 +595,7 @@ static void shmem_evict_inode(struct inode *inode)
1915     kfree(xattr->name);
1916     kfree(xattr);
1917     }
1918     - BUG_ON(inode->i_blocks);
1919     + WARN_ON(inode->i_blocks);
1920     shmem_free_inode(inode->i_sb);
1921     end_writeback(inode);
1922     }
1923     diff --git a/mm/vmscan.c b/mm/vmscan.c
1924     index e989ee2..e6ca505 100644
1925     --- a/mm/vmscan.c
1926     +++ b/mm/vmscan.c
1927     @@ -3128,6 +3128,8 @@ static int kswapd(void *p)
1928     &balanced_classzone_idx);
1929     }
1930     }
1931     +
1932     + current->reclaim_state = NULL;
1933     return 0;
1934     }
1935    
1936     diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
1937     index 214c2bb..925ca58 100644
1938     --- a/net/ceph/auth_none.c
1939     +++ b/net/ceph/auth_none.c
1940     @@ -59,9 +59,7 @@ static int handle_reply(struct ceph_auth_client *ac, int result,
1941     */
1942     static int ceph_auth_none_create_authorizer(
1943     struct ceph_auth_client *ac, int peer_type,
1944     - struct ceph_authorizer **a,
1945     - void **buf, size_t *len,
1946     - void **reply_buf, size_t *reply_len)
1947     + struct ceph_auth_handshake *auth)
1948     {
1949     struct ceph_auth_none_info *ai = ac->private;
1950     struct ceph_none_authorizer *au = &ai->au;
1951     @@ -82,11 +80,12 @@ static int ceph_auth_none_create_authorizer(
1952     dout("built authorizer len %d\n", au->buf_len);
1953     }
1954    
1955     - *a = (struct ceph_authorizer *)au;
1956     - *buf = au->buf;
1957     - *len = au->buf_len;
1958     - *reply_buf = au->reply_buf;
1959     - *reply_len = sizeof(au->reply_buf);
1960     + auth->authorizer = (struct ceph_authorizer *) au;
1961     + auth->authorizer_buf = au->buf;
1962     + auth->authorizer_buf_len = au->buf_len;
1963     + auth->authorizer_reply_buf = au->reply_buf;
1964     + auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
1965     +
1966     return 0;
1967    
1968     bad2:
1969     diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
1970     index 1587dc6..a16bf14 100644
1971     --- a/net/ceph/auth_x.c
1972     +++ b/net/ceph/auth_x.c
1973     @@ -526,9 +526,7 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
1974    
1975     static int ceph_x_create_authorizer(
1976     struct ceph_auth_client *ac, int peer_type,
1977     - struct ceph_authorizer **a,
1978     - void **buf, size_t *len,
1979     - void **reply_buf, size_t *reply_len)
1980     + struct ceph_auth_handshake *auth)
1981     {
1982     struct ceph_x_authorizer *au;
1983     struct ceph_x_ticket_handler *th;
1984     @@ -548,11 +546,12 @@ static int ceph_x_create_authorizer(
1985     return ret;
1986     }
1987    
1988     - *a = (struct ceph_authorizer *)au;
1989     - *buf = au->buf->vec.iov_base;
1990     - *len = au->buf->vec.iov_len;
1991     - *reply_buf = au->reply_buf;
1992     - *reply_len = sizeof(au->reply_buf);
1993     + auth->authorizer = (struct ceph_authorizer *) au;
1994     + auth->authorizer_buf = au->buf->vec.iov_base;
1995     + auth->authorizer_buf_len = au->buf->vec.iov_len;
1996     + auth->authorizer_reply_buf = au->reply_buf;
1997     + auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
1998     +
1999     return 0;
2000     }
2001    
2002     diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
2003     index cc91319..8e74e8c 100644
2004     --- a/net/ceph/ceph_common.c
2005     +++ b/net/ceph/ceph_common.c
2006     @@ -83,7 +83,6 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
2007     return -1;
2008     }
2009     } else {
2010     - pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid);
2011     memcpy(&client->fsid, fsid, sizeof(*fsid));
2012     }
2013     return 0;
2014     @@ -468,19 +467,15 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
2015     /* msgr */
2016     if (ceph_test_opt(client, MYIP))
2017     myaddr = &client->options->my_addr;
2018     - client->msgr = ceph_messenger_create(myaddr,
2019     - client->supported_features,
2020     - client->required_features);
2021     - if (IS_ERR(client->msgr)) {
2022     - err = PTR_ERR(client->msgr);
2023     - goto fail;
2024     - }
2025     - client->msgr->nocrc = ceph_test_opt(client, NOCRC);
2026     + ceph_messenger_init(&client->msgr, myaddr,
2027     + client->supported_features,
2028     + client->required_features,
2029     + ceph_test_opt(client, NOCRC));
2030    
2031     /* subsystems */
2032     err = ceph_monc_init(&client->monc, client);
2033     if (err < 0)
2034     - goto fail_msgr;
2035     + goto fail;
2036     err = ceph_osdc_init(&client->osdc, client);
2037     if (err < 0)
2038     goto fail_monc;
2039     @@ -489,8 +484,6 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
2040    
2041     fail_monc:
2042     ceph_monc_stop(&client->monc);
2043     -fail_msgr:
2044     - ceph_messenger_destroy(client->msgr);
2045     fail:
2046     kfree(client);
2047     return ERR_PTR(err);
2048     @@ -501,22 +494,15 @@ void ceph_destroy_client(struct ceph_client *client)
2049     {
2050     dout("destroy_client %p\n", client);
2051    
2052     + atomic_set(&client->msgr.stopping, 1);
2053     +
2054     /* unmount */
2055     ceph_osdc_stop(&client->osdc);
2056    
2057     - /*
2058     - * make sure osd connections close out before destroying the
2059     - * auth module, which is needed to free those connections'
2060     - * ceph_authorizers.
2061     - */
2062     - ceph_msgr_flush();
2063     -
2064     ceph_monc_stop(&client->monc);
2065    
2066     ceph_debugfs_client_cleanup(client);
2067    
2068     - ceph_messenger_destroy(client->msgr);
2069     -
2070     ceph_destroy_options(client->options);
2071    
2072     kfree(client);
2073     diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
2074     index d6ebb13..fbda052 100644
2075     --- a/net/ceph/crush/crush.c
2076     +++ b/net/ceph/crush/crush.c
2077     @@ -26,9 +26,9 @@ const char *crush_bucket_alg_name(int alg)
2078     * @b: bucket pointer
2079     * @p: item index in bucket
2080     */
2081     -int crush_get_bucket_item_weight(struct crush_bucket *b, int p)
2082     +int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
2083     {
2084     - if (p >= b->size)
2085     + if ((__u32)p >= b->size)
2086     return 0;
2087    
2088     switch (b->alg) {
2089     @@ -37,9 +37,7 @@ int crush_get_bucket_item_weight(struct crush_bucket *b, int p)
2090     case CRUSH_BUCKET_LIST:
2091     return ((struct crush_bucket_list *)b)->item_weights[p];
2092     case CRUSH_BUCKET_TREE:
2093     - if (p & 1)
2094     - return ((struct crush_bucket_tree *)b)->node_weights[p];
2095     - return 0;
2096     + return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
2097     case CRUSH_BUCKET_STRAW:
2098     return ((struct crush_bucket_straw *)b)->item_weights[p];
2099     }
2100     @@ -87,6 +85,8 @@ void crush_destroy_bucket_list(struct crush_bucket_list *b)
2101    
2102     void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
2103     {
2104     + kfree(b->h.perm);
2105     + kfree(b->h.items);
2106     kfree(b->node_weights);
2107     kfree(b);
2108     }
2109     @@ -124,10 +124,9 @@ void crush_destroy_bucket(struct crush_bucket *b)
2110     */
2111     void crush_destroy(struct crush_map *map)
2112     {
2113     - int b;
2114     -
2115     /* buckets */
2116     if (map->buckets) {
2117     + __s32 b;
2118     for (b = 0; b < map->max_buckets; b++) {
2119     if (map->buckets[b] == NULL)
2120     continue;
2121     @@ -138,6 +137,7 @@ void crush_destroy(struct crush_map *map)
2122    
2123     /* rules */
2124     if (map->rules) {
2125     + __u32 b;
2126     for (b = 0; b < map->max_rules; b++)
2127     kfree(map->rules[b]);
2128     kfree(map->rules);
2129     diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
2130     index b79747c..00baad5 100644
2131     --- a/net/ceph/crush/mapper.c
2132     +++ b/net/ceph/crush/mapper.c
2133     @@ -32,9 +32,9 @@
2134     * @type: storage ruleset type (user defined)
2135     * @size: output set size
2136     */
2137     -int crush_find_rule(struct crush_map *map, int ruleset, int type, int size)
2138     +int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size)
2139     {
2140     - int i;
2141     + __u32 i;
2142    
2143     for (i = 0; i < map->max_rules; i++) {
2144     if (map->rules[i] &&
2145     @@ -72,7 +72,7 @@ static int bucket_perm_choose(struct crush_bucket *bucket,
2146     unsigned i, s;
2147    
2148     /* start a new permutation if @x has changed */
2149     - if (bucket->perm_x != x || bucket->perm_n == 0) {
2150     + if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) {
2151     dprintk("bucket %d new x=%d\n", bucket->id, x);
2152     bucket->perm_x = x;
2153    
2154     @@ -152,8 +152,8 @@ static int bucket_list_choose(struct crush_bucket_list *bucket,
2155     return bucket->h.items[i];
2156     }
2157    
2158     - BUG_ON(1);
2159     - return 0;
2160     + dprintk("bad list sums for bucket %d\n", bucket->h.id);
2161     + return bucket->h.items[0];
2162     }
2163    
2164    
2165     @@ -219,7 +219,7 @@ static int bucket_tree_choose(struct crush_bucket_tree *bucket,
2166     static int bucket_straw_choose(struct crush_bucket_straw *bucket,
2167     int x, int r)
2168     {
2169     - int i;
2170     + __u32 i;
2171     int high = 0;
2172     __u64 high_draw = 0;
2173     __u64 draw;
2174     @@ -239,6 +239,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
2175     static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
2176     {
2177     dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
2178     + BUG_ON(in->size == 0);
2179     switch (in->alg) {
2180     case CRUSH_BUCKET_UNIFORM:
2181     return bucket_uniform_choose((struct crush_bucket_uniform *)in,
2182     @@ -253,7 +254,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
2183     return bucket_straw_choose((struct crush_bucket_straw *)in,
2184     x, r);
2185     default:
2186     - BUG_ON(1);
2187     + dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
2188     return in->items[0];
2189     }
2190     }
2191     @@ -262,7 +263,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
2192     * true if device is marked "out" (failed, fully offloaded)
2193     * of the cluster
2194     */
2195     -static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
2196     +static int is_out(const struct crush_map *map, const __u32 *weight, int item, int x)
2197     {
2198     if (weight[item] >= 0x10000)
2199     return 0;
2200     @@ -287,16 +288,16 @@ static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
2201     * @recurse_to_leaf: true if we want one device under each item of given type
2202     * @out2: second output vector for leaf items (if @recurse_to_leaf)
2203     */
2204     -static int crush_choose(struct crush_map *map,
2205     +static int crush_choose(const struct crush_map *map,
2206     struct crush_bucket *bucket,
2207     - __u32 *weight,
2208     + const __u32 *weight,
2209     int x, int numrep, int type,
2210     int *out, int outpos,
2211     int firstn, int recurse_to_leaf,
2212     int *out2)
2213     {
2214     int rep;
2215     - int ftotal, flocal;
2216     + unsigned int ftotal, flocal;
2217     int retry_descent, retry_bucket, skip_rep;
2218     struct crush_bucket *in = bucket;
2219     int r;
2220     @@ -304,7 +305,7 @@ static int crush_choose(struct crush_map *map,
2221     int item = 0;
2222     int itemtype;
2223     int collide, reject;
2224     - const int orig_tries = 5; /* attempts before we fall back to search */
2225     + const unsigned int orig_tries = 5; /* attempts before we fall back to search */
2226    
2227     dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
2228     bucket->id, x, outpos, numrep);
2229     @@ -325,7 +326,7 @@ static int crush_choose(struct crush_map *map,
2230     r = rep;
2231     if (in->alg == CRUSH_BUCKET_UNIFORM) {
2232     /* be careful */
2233     - if (firstn || numrep >= in->size)
2234     + if (firstn || (__u32)numrep >= in->size)
2235     /* r' = r + f_total */
2236     r += ftotal;
2237     else if (in->size % numrep == 0)
2238     @@ -354,7 +355,11 @@ static int crush_choose(struct crush_map *map,
2239     item = bucket_perm_choose(in, x, r);
2240     else
2241     item = crush_bucket_choose(in, x, r);
2242     - BUG_ON(item >= map->max_devices);
2243     + if (item >= map->max_devices) {
2244     + dprintk(" bad item %d\n", item);
2245     + skip_rep = 1;
2246     + break;
2247     + }
2248    
2249     /* desired type? */
2250     if (item < 0)
2251     @@ -365,8 +370,12 @@ static int crush_choose(struct crush_map *map,
2252    
2253     /* keep going? */
2254     if (itemtype != type) {
2255     - BUG_ON(item >= 0 ||
2256     - (-1-item) >= map->max_buckets);
2257     + if (item >= 0 ||
2258     + (-1-item) >= map->max_buckets) {
2259     + dprintk(" bad item type %d\n", type);
2260     + skip_rep = 1;
2261     + break;
2262     + }
2263     in = map->buckets[-1-item];
2264     retry_bucket = 1;
2265     continue;
2266     @@ -415,7 +424,7 @@ reject:
2267     if (collide && flocal < 3)
2268     /* retry locally a few times */
2269     retry_bucket = 1;
2270     - else if (flocal < in->size + orig_tries)
2271     + else if (flocal <= in->size + orig_tries)
2272     /* exhaustive bucket search */
2273     retry_bucket = 1;
2274     else if (ftotal < 20)
2275     @@ -425,7 +434,7 @@ reject:
2276     /* else give up */
2277     skip_rep = 1;
2278     dprintk(" reject %d collide %d "
2279     - "ftotal %d flocal %d\n",
2280     + "ftotal %u flocal %u\n",
2281     reject, collide, ftotal,
2282     flocal);
2283     }
2284     @@ -456,9 +465,9 @@ reject:
2285     * @result_max: maximum result size
2286     * @force: force initial replica choice; -1 for none
2287     */
2288     -int crush_do_rule(struct crush_map *map,
2289     +int crush_do_rule(const struct crush_map *map,
2290     int ruleno, int x, int *result, int result_max,
2291     - int force, __u32 *weight)
2292     + int force, const __u32 *weight)
2293     {
2294     int result_len;
2295     int force_context[CRUSH_MAX_DEPTH];
2296     @@ -473,12 +482,15 @@ int crush_do_rule(struct crush_map *map,
2297     int osize;
2298     int *tmp;
2299     struct crush_rule *rule;
2300     - int step;
2301     + __u32 step;
2302     int i, j;
2303     int numrep;
2304     int firstn;
2305    
2306     - BUG_ON(ruleno >= map->max_rules);
2307     + if ((__u32)ruleno >= map->max_rules) {
2308     + dprintk(" bad ruleno %d\n", ruleno);
2309     + return 0;
2310     + }
2311    
2312     rule = map->rules[ruleno];
2313     result_len = 0;
2314     @@ -488,7 +500,8 @@ int crush_do_rule(struct crush_map *map,
2315     /*
2316     * determine hierarchical context of force, if any. note
2317     * that this may or may not correspond to the specific types
2318     - * referenced by the crush rule.
2319     + * referenced by the crush rule. it will also only affect
2320     + * the first descent (TAKE).
2321     */
2322     if (force >= 0 &&
2323     force < map->max_devices &&
2324     @@ -527,7 +540,8 @@ int crush_do_rule(struct crush_map *map,
2325     firstn = 1;
2326     case CRUSH_RULE_CHOOSE_LEAF_INDEP:
2327     case CRUSH_RULE_CHOOSE_INDEP:
2328     - BUG_ON(wsize == 0);
2329     + if (wsize == 0)
2330     + break;
2331    
2332     recurse_to_leaf =
2333     rule->steps[step].op ==
2334     @@ -596,7 +610,9 @@ int crush_do_rule(struct crush_map *map,
2335     break;
2336    
2337     default:
2338     - BUG_ON(1);
2339     + dprintk(" unknown op %d at step %d\n",
2340     + curstep->op, step);
2341     + break;
2342     }
2343     }
2344     return result_len;
2345     diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
2346     index b780cb7..9da7fdd 100644
2347     --- a/net/ceph/crypto.c
2348     +++ b/net/ceph/crypto.c
2349     @@ -466,6 +466,7 @@ void ceph_key_destroy(struct key *key) {
2350     struct ceph_crypto_key *ckey = key->payload.data;
2351    
2352     ceph_crypto_key_destroy(ckey);
2353     + kfree(ckey);
2354     }
2355    
2356     struct key_type key_type_ceph = {
2357     diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
2358     index 1919d15..3572dc5 100644
2359     --- a/net/ceph/crypto.h
2360     +++ b/net/ceph/crypto.h
2361     @@ -16,7 +16,8 @@ struct ceph_crypto_key {
2362    
2363     static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
2364     {
2365     - kfree(key->key);
2366     + if (key)
2367     + kfree(key->key);
2368     }
2369    
2370     extern int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
2371     diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
2372     index 27d4ea3..680978d 100644
2373     --- a/net/ceph/debugfs.c
2374     +++ b/net/ceph/debugfs.c
2375     @@ -189,6 +189,9 @@ int ceph_debugfs_client_init(struct ceph_client *client)
2376     snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
2377     client->monc.auth->global_id);
2378    
2379     + dout("ceph_debugfs_client_init %p %s\n", client, name);
2380     +
2381     + BUG_ON(client->debugfs_dir);
2382     client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
2383     if (!client->debugfs_dir)
2384     goto out;
2385     @@ -234,6 +237,7 @@ out:
2386    
2387     void ceph_debugfs_client_cleanup(struct ceph_client *client)
2388     {
2389     + dout("ceph_debugfs_client_cleanup %p\n", client);
2390     debugfs_remove(client->debugfs_osdmap);
2391     debugfs_remove(client->debugfs_monmap);
2392     debugfs_remove(client->osdc.debugfs_file);
2393     diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
2394     index f0993af..aa71a67 100644
2395     --- a/net/ceph/messenger.c
2396     +++ b/net/ceph/messenger.c
2397     @@ -29,6 +29,74 @@
2398     * the sender.
2399     */
2400    
2401     +/*
2402     + * We track the state of the socket on a given connection using
2403     + * values defined below. The transition to a new socket state is
2404     + * handled by a function which verifies we aren't coming from an
2405     + * unexpected state.
2406     + *
2407     + * --------
2408     + * | NEW* | transient initial state
2409     + * --------
2410     + * | con_sock_state_init()
2411     + * v
2412     + * ----------
2413     + * | CLOSED | initialized, but no socket (and no
2414     + * ---------- TCP connection)
2415     + * ^ \
2416     + * | \ con_sock_state_connecting()
2417     + * | ----------------------
2418     + * | \
2419     + * + con_sock_state_closed() \
2420     + * |+--------------------------- \
2421     + * | \ \ \
2422     + * | ----------- \ \
2423     + * | | CLOSING | socket event; \ \
2424     + * | ----------- await close \ \
2425     + * | ^ \ |
2426     + * | | \ |
2427     + * | + con_sock_state_closing() \ |
2428     + * | / \ | |
2429     + * | / --------------- | |
2430     + * | / \ v v
2431     + * | / --------------
2432     + * | / -----------------| CONNECTING | socket created, TCP
2433     + * | | / -------------- connect initiated
2434     + * | | | con_sock_state_connected()
2435     + * | | v
2436     + * -------------
2437     + * | CONNECTED | TCP connection established
2438     + * -------------
2439     + *
2440     + * State values for ceph_connection->sock_state; NEW is assumed to be 0.
2441     + */
2442     +
2443     +#define CON_SOCK_STATE_NEW 0 /* -> CLOSED */
2444     +#define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */
2445     +#define CON_SOCK_STATE_CONNECTING 2 /* -> CONNECTED or -> CLOSING */
2446     +#define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED */
2447     +#define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */
2448     +
2449     +/*
2450     + * connection states
2451     + */
2452     +#define CON_STATE_CLOSED 1 /* -> PREOPEN */
2453     +#define CON_STATE_PREOPEN 2 /* -> CONNECTING, CLOSED */
2454     +#define CON_STATE_CONNECTING 3 /* -> NEGOTIATING, CLOSED */
2455     +#define CON_STATE_NEGOTIATING 4 /* -> OPEN, CLOSED */
2456     +#define CON_STATE_OPEN 5 /* -> STANDBY, CLOSED */
2457     +#define CON_STATE_STANDBY 6 /* -> PREOPEN, CLOSED */
2458     +
2459     +/*
2460     + * ceph_connection flag bits
2461     + */
2462     +#define CON_FLAG_LOSSYTX 0 /* we can close channel or drop
2463     + * messages on errors */
2464     +#define CON_FLAG_KEEPALIVE_PENDING 1 /* we need to send a keepalive */
2465     +#define CON_FLAG_WRITE_PENDING 2 /* we have data ready to send */
2466     +#define CON_FLAG_SOCK_CLOSED 3 /* socket state changed to closed */
2467     +#define CON_FLAG_BACKOFF 4 /* need to retry queuing delayed work */
2468     +
2469     /* static tag bytes (protocol control messages) */
2470     static char tag_msg = CEPH_MSGR_TAG_MSG;
2471     static char tag_ack = CEPH_MSGR_TAG_ACK;
2472     @@ -147,72 +215,130 @@ void ceph_msgr_flush(void)
2473     }
2474     EXPORT_SYMBOL(ceph_msgr_flush);
2475    
2476     +/* Connection socket state transition functions */
2477     +
2478     +static void con_sock_state_init(struct ceph_connection *con)
2479     +{
2480     + int old_state;
2481     +
2482     + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
2483     + if (WARN_ON(old_state != CON_SOCK_STATE_NEW))
2484     + printk("%s: unexpected old state %d\n", __func__, old_state);
2485     + dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
2486     + CON_SOCK_STATE_CLOSED);
2487     +}
2488     +
2489     +static void con_sock_state_connecting(struct ceph_connection *con)
2490     +{
2491     + int old_state;
2492     +
2493     + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING);
2494     + if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED))
2495     + printk("%s: unexpected old state %d\n", __func__, old_state);
2496     + dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
2497     + CON_SOCK_STATE_CONNECTING);
2498     +}
2499     +
2500     +static void con_sock_state_connected(struct ceph_connection *con)
2501     +{
2502     + int old_state;
2503     +
2504     + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED);
2505     + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING))
2506     + printk("%s: unexpected old state %d\n", __func__, old_state);
2507     + dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
2508     + CON_SOCK_STATE_CONNECTED);
2509     +}
2510     +
2511     +static void con_sock_state_closing(struct ceph_connection *con)
2512     +{
2513     + int old_state;
2514     +
2515     + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING);
2516     + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING &&
2517     + old_state != CON_SOCK_STATE_CONNECTED &&
2518     + old_state != CON_SOCK_STATE_CLOSING))
2519     + printk("%s: unexpected old state %d\n", __func__, old_state);
2520     + dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
2521     + CON_SOCK_STATE_CLOSING);
2522     +}
2523     +
2524     +static void con_sock_state_closed(struct ceph_connection *con)
2525     +{
2526     + int old_state;
2527     +
2528     + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
2529     + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED &&
2530     + old_state != CON_SOCK_STATE_CLOSING &&
2531     + old_state != CON_SOCK_STATE_CONNECTING &&
2532     + old_state != CON_SOCK_STATE_CLOSED))
2533     + printk("%s: unexpected old state %d\n", __func__, old_state);
2534     + dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
2535     + CON_SOCK_STATE_CLOSED);
2536     +}
2537    
2538     /*
2539     * socket callback functions
2540     */
2541    
2542     /* data available on socket, or listen socket received a connect */
2543     -static void ceph_data_ready(struct sock *sk, int count_unused)
2544     +static void ceph_sock_data_ready(struct sock *sk, int count_unused)
2545     {
2546     struct ceph_connection *con = sk->sk_user_data;
2547     + if (atomic_read(&con->msgr->stopping)) {
2548     + return;
2549     + }
2550    
2551     if (sk->sk_state != TCP_CLOSE_WAIT) {
2552     - dout("ceph_data_ready on %p state = %lu, queueing work\n",
2553     + dout("%s on %p state = %lu, queueing work\n", __func__,
2554     con, con->state);
2555     queue_con(con);
2556     }
2557     }
2558    
2559     /* socket has buffer space for writing */
2560     -static void ceph_write_space(struct sock *sk)
2561     +static void ceph_sock_write_space(struct sock *sk)
2562     {
2563     struct ceph_connection *con = sk->sk_user_data;
2564    
2565     /* only queue to workqueue if there is data we want to write,
2566     * and there is sufficient space in the socket buffer to accept
2567     - * more data. clear SOCK_NOSPACE so that ceph_write_space()
2568     + * more data. clear SOCK_NOSPACE so that ceph_sock_write_space()
2569     * doesn't get called again until try_write() fills the socket
2570     * buffer. See net/ipv4/tcp_input.c:tcp_check_space()
2571     * and net/core/stream.c:sk_stream_write_space().
2572     */
2573     - if (test_bit(WRITE_PENDING, &con->state)) {
2574     + if (test_bit(CON_FLAG_WRITE_PENDING, &con->flags)) {
2575     if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
2576     - dout("ceph_write_space %p queueing write work\n", con);
2577     + dout("%s %p queueing write work\n", __func__, con);
2578     clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2579     queue_con(con);
2580     }
2581     } else {
2582     - dout("ceph_write_space %p nothing to write\n", con);
2583     + dout("%s %p nothing to write\n", __func__, con);
2584     }
2585     }
2586    
2587     /* socket's state has changed */
2588     -static void ceph_state_change(struct sock *sk)
2589     +static void ceph_sock_state_change(struct sock *sk)
2590     {
2591     struct ceph_connection *con = sk->sk_user_data;
2592    
2593     - dout("ceph_state_change %p state = %lu sk_state = %u\n",
2594     + dout("%s %p state = %lu sk_state = %u\n", __func__,
2595     con, con->state, sk->sk_state);
2596    
2597     - if (test_bit(CLOSED, &con->state))
2598     - return;
2599     -
2600     switch (sk->sk_state) {
2601     case TCP_CLOSE:
2602     - dout("ceph_state_change TCP_CLOSE\n");
2603     + dout("%s TCP_CLOSE\n", __func__);
2604     case TCP_CLOSE_WAIT:
2605     - dout("ceph_state_change TCP_CLOSE_WAIT\n");
2606     - if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) {
2607     - if (test_bit(CONNECTING, &con->state))
2608     - con->error_msg = "connection failed";
2609     - else
2610     - con->error_msg = "socket closed";
2611     - queue_con(con);
2612     - }
2613     + dout("%s TCP_CLOSE_WAIT\n", __func__);
2614     + con_sock_state_closing(con);
2615     + set_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
2616     + queue_con(con);
2617     break;
2618     case TCP_ESTABLISHED:
2619     - dout("ceph_state_change TCP_ESTABLISHED\n");
2620     + dout("%s TCP_ESTABLISHED\n", __func__);
2621     + con_sock_state_connected(con);
2622     queue_con(con);
2623     break;
2624     default: /* Everything else is uninteresting */
2625     @@ -228,9 +354,9 @@ static void set_sock_callbacks(struct socket *sock,
2626     {
2627     struct sock *sk = sock->sk;
2628     sk->sk_user_data = con;
2629     - sk->sk_data_ready = ceph_data_ready;
2630     - sk->sk_write_space = ceph_write_space;
2631     - sk->sk_state_change = ceph_state_change;
2632     + sk->sk_data_ready = ceph_sock_data_ready;
2633     + sk->sk_write_space = ceph_sock_write_space;
2634     + sk->sk_state_change = ceph_sock_state_change;
2635     }
2636    
2637    
2638     @@ -262,6 +388,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
2639    
2640     dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
2641    
2642     + con_sock_state_connecting(con);
2643     ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
2644     O_NONBLOCK);
2645     if (ret == -EINPROGRESS) {
2646     @@ -277,7 +404,6 @@ static int ceph_tcp_connect(struct ceph_connection *con)
2647     return ret;
2648     }
2649     con->sock = sock;
2650     -
2651     return 0;
2652     }
2653    
2654     @@ -333,16 +459,24 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
2655     */
2656     static int con_close_socket(struct ceph_connection *con)
2657     {
2658     - int rc;
2659     + int rc = 0;
2660    
2661     dout("con_close_socket on %p sock %p\n", con, con->sock);
2662     - if (!con->sock)
2663     - return 0;
2664     - set_bit(SOCK_CLOSED, &con->state);
2665     - rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
2666     - sock_release(con->sock);
2667     - con->sock = NULL;
2668     - clear_bit(SOCK_CLOSED, &con->state);
2669     + if (con->sock) {
2670     + rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
2671     + sock_release(con->sock);
2672     + con->sock = NULL;
2673     + }
2674     +
2675     + /*
2676     + * Forcibly clear the SOCK_CLOSED flag. It gets set
2677     + * independent of the connection mutex, and we could have
2678     + * received a socket close event before we had the chance to
2679     + * shut the socket down.
2680     + */
2681     + clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
2682     +
2683     + con_sock_state_closed(con);
2684     return rc;
2685     }
2686    
2687     @@ -353,6 +487,10 @@ static int con_close_socket(struct ceph_connection *con)
2688     static void ceph_msg_remove(struct ceph_msg *msg)
2689     {
2690     list_del_init(&msg->list_head);
2691     + BUG_ON(msg->con == NULL);
2692     + msg->con->ops->put(msg->con);
2693     + msg->con = NULL;
2694     +
2695     ceph_msg_put(msg);
2696     }
2697     static void ceph_msg_remove_list(struct list_head *head)
2698     @@ -372,8 +510,11 @@ static void reset_connection(struct ceph_connection *con)
2699     ceph_msg_remove_list(&con->out_sent);
2700    
2701     if (con->in_msg) {
2702     + BUG_ON(con->in_msg->con != con);
2703     + con->in_msg->con = NULL;
2704     ceph_msg_put(con->in_msg);
2705     con->in_msg = NULL;
2706     + con->ops->put(con);
2707     }
2708    
2709     con->connect_seq = 0;
2710     @@ -391,32 +532,44 @@ static void reset_connection(struct ceph_connection *con)
2711     */
2712     void ceph_con_close(struct ceph_connection *con)
2713     {
2714     + mutex_lock(&con->mutex);
2715     dout("con_close %p peer %s\n", con,
2716     ceph_pr_addr(&con->peer_addr.in_addr));
2717     - set_bit(CLOSED, &con->state); /* in case there's queued work */
2718     - clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */
2719     - clear_bit(LOSSYTX, &con->state); /* so we retry next connect */
2720     - clear_bit(KEEPALIVE_PENDING, &con->state);
2721     - clear_bit(WRITE_PENDING, &con->state);
2722     - mutex_lock(&con->mutex);
2723     + con->state = CON_STATE_CLOSED;
2724     +
2725     + clear_bit(CON_FLAG_LOSSYTX, &con->flags); /* so we retry next connect */
2726     + clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
2727     + clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
2728     + clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
2729     + clear_bit(CON_FLAG_BACKOFF, &con->flags);
2730     +
2731     reset_connection(con);
2732     con->peer_global_seq = 0;
2733     cancel_delayed_work(&con->work);
2734     + con_close_socket(con);
2735     mutex_unlock(&con->mutex);
2736     - queue_con(con);
2737     }
2738     EXPORT_SYMBOL(ceph_con_close);
2739    
2740     /*
2741     * Reopen a closed connection, with a new peer address.
2742     */
2743     -void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
2744     +void ceph_con_open(struct ceph_connection *con,
2745     + __u8 entity_type, __u64 entity_num,
2746     + struct ceph_entity_addr *addr)
2747     {
2748     + mutex_lock(&con->mutex);
2749     dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
2750     - set_bit(OPENING, &con->state);
2751     - clear_bit(CLOSED, &con->state);
2752     +
2753     + BUG_ON(con->state != CON_STATE_CLOSED);
2754     + con->state = CON_STATE_PREOPEN;
2755     +
2756     + con->peer_name.type = (__u8) entity_type;
2757     + con->peer_name.num = cpu_to_le64(entity_num);
2758     +
2759     memcpy(&con->peer_addr, addr, sizeof(*addr));
2760     con->delay = 0; /* reset backoff memory */
2761     + mutex_unlock(&con->mutex);
2762     queue_con(con);
2763     }
2764     EXPORT_SYMBOL(ceph_con_open);
2765     @@ -430,42 +583,26 @@ bool ceph_con_opened(struct ceph_connection *con)
2766     }
2767    
2768     /*
2769     - * generic get/put
2770     - */
2771     -struct ceph_connection *ceph_con_get(struct ceph_connection *con)
2772     -{
2773     - int nref = __atomic_add_unless(&con->nref, 1, 0);
2774     -
2775     - dout("con_get %p nref = %d -> %d\n", con, nref, nref + 1);
2776     -
2777     - return nref ? con : NULL;
2778     -}
2779     -
2780     -void ceph_con_put(struct ceph_connection *con)
2781     -{
2782     - int nref = atomic_dec_return(&con->nref);
2783     -
2784     - BUG_ON(nref < 0);
2785     - if (nref == 0) {
2786     - BUG_ON(con->sock);
2787     - kfree(con);
2788     - }
2789     - dout("con_put %p nref = %d -> %d\n", con, nref + 1, nref);
2790     -}
2791     -
2792     -/*
2793     * initialize a new connection.
2794     */
2795     -void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con)
2796     +void ceph_con_init(struct ceph_connection *con, void *private,
2797     + const struct ceph_connection_operations *ops,
2798     + struct ceph_messenger *msgr)
2799     {
2800     dout("con_init %p\n", con);
2801     memset(con, 0, sizeof(*con));
2802     - atomic_set(&con->nref, 1);
2803     + con->private = private;
2804     + con->ops = ops;
2805     con->msgr = msgr;
2806     +
2807     + con_sock_state_init(con);
2808     +
2809     mutex_init(&con->mutex);
2810     INIT_LIST_HEAD(&con->out_queue);
2811     INIT_LIST_HEAD(&con->out_sent);
2812     INIT_DELAYED_WORK(&con->work, con_work);
2813     +
2814     + con->state = CON_STATE_CLOSED;
2815     }
2816     EXPORT_SYMBOL(ceph_con_init);
2817    
2818     @@ -486,14 +623,14 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
2819     return ret;
2820     }
2821    
2822     -static void ceph_con_out_kvec_reset(struct ceph_connection *con)
2823     +static void con_out_kvec_reset(struct ceph_connection *con)
2824     {
2825     con->out_kvec_left = 0;
2826     con->out_kvec_bytes = 0;
2827     con->out_kvec_cur = &con->out_kvec[0];
2828     }
2829    
2830     -static void ceph_con_out_kvec_add(struct ceph_connection *con,
2831     +static void con_out_kvec_add(struct ceph_connection *con,
2832     size_t size, void *data)
2833     {
2834     int index;
2835     @@ -507,6 +644,53 @@ static void ceph_con_out_kvec_add(struct ceph_connection *con,
2836     con->out_kvec_bytes += size;
2837     }
2838    
2839     +#ifdef CONFIG_BLOCK
2840     +static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
2841     +{
2842     + if (!bio) {
2843     + *iter = NULL;
2844     + *seg = 0;
2845     + return;
2846     + }
2847     + *iter = bio;
2848     + *seg = bio->bi_idx;
2849     +}
2850     +
2851     +static void iter_bio_next(struct bio **bio_iter, int *seg)
2852     +{
2853     + if (*bio_iter == NULL)
2854     + return;
2855     +
2856     + BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
2857     +
2858     + (*seg)++;
2859     + if (*seg == (*bio_iter)->bi_vcnt)
2860     + init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
2861     +}
2862     +#endif
2863     +
2864     +static void prepare_write_message_data(struct ceph_connection *con)
2865     +{
2866     + struct ceph_msg *msg = con->out_msg;
2867     +
2868     + BUG_ON(!msg);
2869     + BUG_ON(!msg->hdr.data_len);
2870     +
2871     + /* initialize page iterator */
2872     + con->out_msg_pos.page = 0;
2873     + if (msg->pages)
2874     + con->out_msg_pos.page_pos = msg->page_alignment;
2875     + else
2876     + con->out_msg_pos.page_pos = 0;
2877     +#ifdef CONFIG_BLOCK
2878     + if (msg->bio)
2879     + init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
2880     +#endif
2881     + con->out_msg_pos.data_pos = 0;
2882     + con->out_msg_pos.did_page_crc = false;
2883     + con->out_more = 1; /* data + footer will follow */
2884     +}
2885     +
2886     /*
2887     * Prepare footer for currently outgoing message, and finish things
2888     * off. Assumes out_kvec* are already valid.. we just add on to the end.
2889     @@ -516,6 +700,8 @@ static void prepare_write_message_footer(struct ceph_connection *con)
2890     struct ceph_msg *m = con->out_msg;
2891     int v = con->out_kvec_left;
2892    
2893     + m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
2894     +
2895     dout("prepare_write_message_footer %p\n", con);
2896     con->out_kvec_is_msg = true;
2897     con->out_kvec[v].iov_base = &m->footer;
2898     @@ -534,7 +720,7 @@ static void prepare_write_message(struct ceph_connection *con)
2899     struct ceph_msg *m;
2900     u32 crc;
2901    
2902     - ceph_con_out_kvec_reset(con);
2903     + con_out_kvec_reset(con);
2904     con->out_kvec_is_msg = true;
2905     con->out_msg_done = false;
2906    
2907     @@ -542,14 +728,16 @@ static void prepare_write_message(struct ceph_connection *con)
2908     * TCP packet that's a good thing. */
2909     if (con->in_seq > con->in_seq_acked) {
2910     con->in_seq_acked = con->in_seq;
2911     - ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
2912     + con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
2913     con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
2914     - ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack),
2915     + con_out_kvec_add(con, sizeof (con->out_temp_ack),
2916     &con->out_temp_ack);
2917     }
2918    
2919     + BUG_ON(list_empty(&con->out_queue));
2920     m = list_first_entry(&con->out_queue, struct ceph_msg, list_head);
2921     con->out_msg = m;
2922     + BUG_ON(m->con != con);
2923    
2924     /* put message on sent list */
2925     ceph_msg_get(m);
2926     @@ -572,18 +760,18 @@ static void prepare_write_message(struct ceph_connection *con)
2927     BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
2928    
2929     /* tag + hdr + front + middle */
2930     - ceph_con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
2931     - ceph_con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
2932     - ceph_con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
2933     + con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
2934     + con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
2935     + con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
2936    
2937     if (m->middle)
2938     - ceph_con_out_kvec_add(con, m->middle->vec.iov_len,
2939     + con_out_kvec_add(con, m->middle->vec.iov_len,
2940     m->middle->vec.iov_base);
2941    
2942     /* fill in crc (except data pages), footer */
2943     crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
2944     con->out_msg->hdr.crc = cpu_to_le32(crc);
2945     - con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE;
2946     + con->out_msg->footer.flags = 0;
2947    
2948     crc = crc32c(0, m->front.iov_base, m->front.iov_len);
2949     con->out_msg->footer.front_crc = cpu_to_le32(crc);
2950     @@ -593,28 +781,19 @@ static void prepare_write_message(struct ceph_connection *con)
2951     con->out_msg->footer.middle_crc = cpu_to_le32(crc);
2952     } else
2953     con->out_msg->footer.middle_crc = 0;
2954     - con->out_msg->footer.data_crc = 0;
2955     - dout("prepare_write_message front_crc %u data_crc %u\n",
2956     + dout("%s front_crc %u middle_crc %u\n", __func__,
2957     le32_to_cpu(con->out_msg->footer.front_crc),
2958     le32_to_cpu(con->out_msg->footer.middle_crc));
2959    
2960     /* is there a data payload? */
2961     - if (le32_to_cpu(m->hdr.data_len) > 0) {
2962     - /* initialize page iterator */
2963     - con->out_msg_pos.page = 0;
2964     - if (m->pages)
2965     - con->out_msg_pos.page_pos = m->page_alignment;
2966     - else
2967     - con->out_msg_pos.page_pos = 0;
2968     - con->out_msg_pos.data_pos = 0;
2969     - con->out_msg_pos.did_page_crc = false;
2970     - con->out_more = 1; /* data + footer will follow */
2971     - } else {
2972     + con->out_msg->footer.data_crc = 0;
2973     + if (m->hdr.data_len)
2974     + prepare_write_message_data(con);
2975     + else
2976     /* no, queue up footer too and be done */
2977     prepare_write_message_footer(con);
2978     - }
2979    
2980     - set_bit(WRITE_PENDING, &con->state);
2981     + set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
2982     }
2983    
2984     /*
2985     @@ -626,16 +805,16 @@ static void prepare_write_ack(struct ceph_connection *con)
2986     con->in_seq_acked, con->in_seq);
2987     con->in_seq_acked = con->in_seq;
2988    
2989     - ceph_con_out_kvec_reset(con);
2990     + con_out_kvec_reset(con);
2991    
2992     - ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
2993     + con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
2994    
2995     con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
2996     - ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack),
2997     + con_out_kvec_add(con, sizeof (con->out_temp_ack),
2998     &con->out_temp_ack);
2999    
3000     con->out_more = 1; /* more will follow.. eventually.. */
3001     - set_bit(WRITE_PENDING, &con->state);
3002     + set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
3003     }
3004    
3005     /*
3006     @@ -644,63 +823,60 @@ static void prepare_write_ack(struct ceph_connection *con)
3007     static void prepare_write_keepalive(struct ceph_connection *con)
3008     {
3009     dout("prepare_write_keepalive %p\n", con);
3010     - ceph_con_out_kvec_reset(con);
3011     - ceph_con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
3012     - set_bit(WRITE_PENDING, &con->state);
3013     + con_out_kvec_reset(con);
3014     + con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
3015     + set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
3016     }
3017    
3018     /*
3019     * Connection negotiation.
3020     */
3021    
3022     -static int prepare_connect_authorizer(struct ceph_connection *con)
3023     +static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con,
3024     + int *auth_proto)
3025     {
3026     - void *auth_buf;
3027     - int auth_len = 0;
3028     - int auth_protocol = 0;
3029     + struct ceph_auth_handshake *auth;
3030     +
3031     + if (!con->ops->get_authorizer) {
3032     + con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
3033     + con->out_connect.authorizer_len = 0;
3034     + return NULL;
3035     + }
3036    
3037     + /* Can't hold the mutex while getting authorizer */
3038     mutex_unlock(&con->mutex);
3039     - if (con->ops->get_authorizer)
3040     - con->ops->get_authorizer(con, &auth_buf, &auth_len,
3041     - &auth_protocol, &con->auth_reply_buf,
3042     - &con->auth_reply_buf_len,
3043     - con->auth_retry);
3044     + auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
3045     mutex_lock(&con->mutex);
3046    
3047     - if (test_bit(CLOSED, &con->state) ||
3048     - test_bit(OPENING, &con->state))
3049     - return -EAGAIN;
3050     + if (IS_ERR(auth))
3051     + return auth;
3052     + if (con->state != CON_STATE_NEGOTIATING)
3053     + return ERR_PTR(-EAGAIN);
3054    
3055     - con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol);
3056     - con->out_connect.authorizer_len = cpu_to_le32(auth_len);
3057     -
3058     - if (auth_len)
3059     - ceph_con_out_kvec_add(con, auth_len, auth_buf);
3060     -
3061     - return 0;
3062     + con->auth_reply_buf = auth->authorizer_reply_buf;
3063     + con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
3064     + return auth;
3065     }
3066    
3067     /*
3068     * We connected to a peer and are saying hello.
3069     */
3070     -static void prepare_write_banner(struct ceph_messenger *msgr,
3071     - struct ceph_connection *con)
3072     +static void prepare_write_banner(struct ceph_connection *con)
3073     {
3074     - ceph_con_out_kvec_reset(con);
3075     - ceph_con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
3076     - ceph_con_out_kvec_add(con, sizeof (msgr->my_enc_addr),
3077     - &msgr->my_enc_addr);
3078     + con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
3079     + con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
3080     + &con->msgr->my_enc_addr);
3081    
3082     con->out_more = 0;
3083     - set_bit(WRITE_PENDING, &con->state);
3084     + set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
3085     }
3086    
3087     -static int prepare_write_connect(struct ceph_messenger *msgr,
3088     - struct ceph_connection *con,
3089     - int include_banner)
3090     +static int prepare_write_connect(struct ceph_connection *con)
3091     {
3092     unsigned global_seq = get_global_seq(con->msgr, 0);
3093     int proto;
3094     + int auth_proto;
3095     + struct ceph_auth_handshake *auth;
3096    
3097     switch (con->peer_name.type) {
3098     case CEPH_ENTITY_TYPE_MON:
3099     @@ -719,23 +895,32 @@ static int prepare_write_connect(struct ceph_messenger *msgr,
3100     dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
3101     con->connect_seq, global_seq, proto);
3102    
3103     - con->out_connect.features = cpu_to_le64(msgr->supported_features);
3104     + con->out_connect.features = cpu_to_le64(con->msgr->supported_features);
3105     con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
3106     con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
3107     con->out_connect.global_seq = cpu_to_le32(global_seq);
3108     con->out_connect.protocol_version = cpu_to_le32(proto);
3109     con->out_connect.flags = 0;
3110    
3111     - if (include_banner)
3112     - prepare_write_banner(msgr, con);
3113     - else
3114     - ceph_con_out_kvec_reset(con);
3115     - ceph_con_out_kvec_add(con, sizeof (con->out_connect), &con->out_connect);
3116     + auth_proto = CEPH_AUTH_UNKNOWN;
3117     + auth = get_connect_authorizer(con, &auth_proto);
3118     + if (IS_ERR(auth))
3119     + return PTR_ERR(auth);
3120     +
3121     + con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
3122     + con->out_connect.authorizer_len = auth ?
3123     + cpu_to_le32(auth->authorizer_buf_len) : 0;
3124     +
3125     + con_out_kvec_add(con, sizeof (con->out_connect),
3126     + &con->out_connect);
3127     + if (auth && auth->authorizer_buf_len)
3128     + con_out_kvec_add(con, auth->authorizer_buf_len,
3129     + auth->authorizer_buf);
3130    
3131     con->out_more = 0;
3132     - set_bit(WRITE_PENDING, &con->state);
3133     + set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
3134    
3135     - return prepare_connect_authorizer(con);
3136     + return 0;
3137     }
3138    
3139     /*
3140     @@ -781,30 +966,34 @@ out:
3141     return ret; /* done! */
3142     }
3143    
3144     -#ifdef CONFIG_BLOCK
3145     -static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
3146     +static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
3147     + size_t len, size_t sent, bool in_trail)
3148     {
3149     - if (!bio) {
3150     - *iter = NULL;
3151     - *seg = 0;
3152     - return;
3153     - }
3154     - *iter = bio;
3155     - *seg = bio->bi_idx;
3156     -}
3157     + struct ceph_msg *msg = con->out_msg;
3158    
3159     -static void iter_bio_next(struct bio **bio_iter, int *seg)
3160     -{
3161     - if (*bio_iter == NULL)
3162     - return;
3163     + BUG_ON(!msg);
3164     + BUG_ON(!sent);
3165    
3166     - BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
3167     + con->out_msg_pos.data_pos += sent;
3168     + con->out_msg_pos.page_pos += sent;
3169     + if (sent < len)
3170     + return;
3171    
3172     - (*seg)++;
3173     - if (*seg == (*bio_iter)->bi_vcnt)
3174     - init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
3175     -}
3176     + BUG_ON(sent != len);
3177     + con->out_msg_pos.page_pos = 0;
3178     + con->out_msg_pos.page++;
3179     + con->out_msg_pos.did_page_crc = false;
3180     + if (in_trail)
3181     + list_move_tail(&page->lru,
3182     + &msg->trail->head);
3183     + else if (msg->pagelist)
3184     + list_move_tail(&page->lru,
3185     + &msg->pagelist->head);
3186     +#ifdef CONFIG_BLOCK
3187     + else if (msg->bio)
3188     + iter_bio_next(&msg->bio_iter, &msg->bio_seg);
3189     #endif
3190     +}
3191    
3192     /*
3193     * Write as much message data payload as we can. If we finish, queue
3194     @@ -821,41 +1010,36 @@ static int write_partial_msg_pages(struct ceph_connection *con)
3195     bool do_datacrc = !con->msgr->nocrc;
3196     int ret;
3197     int total_max_write;
3198     - int in_trail = 0;
3199     - size_t trail_len = (msg->trail ? msg->trail->length : 0);
3200     + bool in_trail = false;
3201     + const size_t trail_len = (msg->trail ? msg->trail->length : 0);
3202     + const size_t trail_off = data_len - trail_len;
3203    
3204     dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
3205     - con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages,
3206     + con, msg, con->out_msg_pos.page, msg->nr_pages,
3207     con->out_msg_pos.page_pos);
3208    
3209     -#ifdef CONFIG_BLOCK
3210     - if (msg->bio && !msg->bio_iter)
3211     - init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
3212     -#endif
3213     -
3214     + /*
3215     + * Iterate through each page that contains data to be
3216     + * written, and send as much as possible for each.
3217     + *
3218     + * If we are calculating the data crc (the default), we will
3219     + * need to map the page. If we have no pages, they have
3220     + * been revoked, so use the zero page.
3221     + */
3222     while (data_len > con->out_msg_pos.data_pos) {
3223     struct page *page = NULL;
3224     int max_write = PAGE_SIZE;
3225     int bio_offset = 0;
3226    
3227     - total_max_write = data_len - trail_len -
3228     - con->out_msg_pos.data_pos;
3229     -
3230     - /*
3231     - * if we are calculating the data crc (the default), we need
3232     - * to map the page. if our pages[] has been revoked, use the
3233     - * zero page.
3234     - */
3235     -
3236     - /* have we reached the trail part of the data? */
3237     - if (con->out_msg_pos.data_pos >= data_len - trail_len) {
3238     - in_trail = 1;
3239     + in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off;
3240     + if (!in_trail)
3241     + total_max_write = trail_off - con->out_msg_pos.data_pos;
3242    
3243     + if (in_trail) {
3244     total_max_write = data_len - con->out_msg_pos.data_pos;
3245    
3246     page = list_first_entry(&msg->trail->head,
3247     struct page, lru);
3248     - max_write = PAGE_SIZE;
3249     } else if (msg->pages) {
3250     page = msg->pages[con->out_msg_pos.page];
3251     } else if (msg->pagelist) {
3252     @@ -878,52 +1062,32 @@ static int write_partial_msg_pages(struct ceph_connection *con)
3253    
3254     if (do_datacrc && !con->out_msg_pos.did_page_crc) {
3255     void *base;
3256     - u32 crc;
3257     - u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc);
3258     + u32 crc = le32_to_cpu(msg->footer.data_crc);
3259     char *kaddr;
3260    
3261     kaddr = kmap(page);
3262     BUG_ON(kaddr == NULL);
3263     base = kaddr + con->out_msg_pos.page_pos + bio_offset;
3264     - crc = crc32c(tmpcrc, base, len);
3265     - con->out_msg->footer.data_crc = cpu_to_le32(crc);
3266     + crc = crc32c(crc, base, len);
3267     + kunmap(page);
3268     + msg->footer.data_crc = cpu_to_le32(crc);
3269     con->out_msg_pos.did_page_crc = true;
3270     }
3271     ret = ceph_tcp_sendpage(con->sock, page,
3272     con->out_msg_pos.page_pos + bio_offset,
3273     len, 1);
3274     -
3275     - if (do_datacrc)
3276     - kunmap(page);
3277     -
3278     if (ret <= 0)
3279     goto out;
3280    
3281     - con->out_msg_pos.data_pos += ret;
3282     - con->out_msg_pos.page_pos += ret;
3283     - if (ret == len) {
3284     - con->out_msg_pos.page_pos = 0;
3285     - con->out_msg_pos.page++;
3286     - con->out_msg_pos.did_page_crc = false;
3287     - if (in_trail)
3288     - list_move_tail(&page->lru,
3289     - &msg->trail->head);
3290     - else if (msg->pagelist)
3291     - list_move_tail(&page->lru,
3292     - &msg->pagelist->head);
3293     -#ifdef CONFIG_BLOCK
3294     - else if (msg->bio)
3295     - iter_bio_next(&msg->bio_iter, &msg->bio_seg);
3296     -#endif
3297     - }
3298     + out_msg_pos_next(con, page, len, (size_t) ret, in_trail);
3299     }
3300    
3301     dout("write_partial_msg_pages %p msg %p done\n", con, msg);
3302    
3303     /* prepare and queue up footer, too */
3304     if (!do_datacrc)
3305     - con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
3306     - ceph_con_out_kvec_reset(con);
3307     + msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
3308     + con_out_kvec_reset(con);
3309     prepare_write_message_footer(con);
3310     ret = 1;
3311     out:
3312     @@ -992,11 +1156,10 @@ static int prepare_read_message(struct ceph_connection *con)
3313    
3314    
3315     static int read_partial(struct ceph_connection *con,
3316     - int *to, int size, void *object)
3317     + int end, int size, void *object)
3318     {
3319     - *to += size;
3320     - while (con->in_base_pos < *to) {
3321     - int left = *to - con->in_base_pos;
3322     + while (con->in_base_pos < end) {
3323     + int left = end - con->in_base_pos;
3324     int have = size - left;
3325     int ret = ceph_tcp_recvmsg(con->sock, object + have, left);
3326     if (ret <= 0)
3327     @@ -1012,37 +1175,52 @@ static int read_partial(struct ceph_connection *con,
3328     */
3329     static int read_partial_banner(struct ceph_connection *con)
3330     {
3331     - int ret, to = 0;
3332     + int size;
3333     + int end;
3334     + int ret;
3335    
3336     dout("read_partial_banner %p at %d\n", con, con->in_base_pos);
3337    
3338     /* peer's banner */
3339     - ret = read_partial(con, &to, strlen(CEPH_BANNER), con->in_banner);
3340     + size = strlen(CEPH_BANNER);
3341     + end = size;
3342     + ret = read_partial(con, end, size, con->in_banner);
3343     if (ret <= 0)
3344     goto out;
3345     - ret = read_partial(con, &to, sizeof(con->actual_peer_addr),
3346     - &con->actual_peer_addr);
3347     +
3348     + size = sizeof (con->actual_peer_addr);
3349     + end += size;
3350     + ret = read_partial(con, end, size, &con->actual_peer_addr);
3351     if (ret <= 0)
3352     goto out;
3353     - ret = read_partial(con, &to, sizeof(con->peer_addr_for_me),
3354     - &con->peer_addr_for_me);
3355     +
3356     + size = sizeof (con->peer_addr_for_me);
3357     + end += size;
3358     + ret = read_partial(con, end, size, &con->peer_addr_for_me);
3359     if (ret <= 0)
3360     goto out;
3361     +
3362     out:
3363     return ret;
3364     }
3365    
3366     static int read_partial_connect(struct ceph_connection *con)
3367     {
3368     - int ret, to = 0;
3369     + int size;
3370     + int end;
3371     + int ret;
3372    
3373     dout("read_partial_connect %p at %d\n", con, con->in_base_pos);
3374    
3375     - ret = read_partial(con, &to, sizeof(con->in_reply), &con->in_reply);
3376     + size = sizeof (con->in_reply);
3377     + end = size;
3378     + ret = read_partial(con, end, size, &con->in_reply);
3379     if (ret <= 0)
3380     goto out;
3381     - ret = read_partial(con, &to, le32_to_cpu(con->in_reply.authorizer_len),
3382     - con->auth_reply_buf);
3383     +
3384     + size = le32_to_cpu(con->in_reply.authorizer_len);
3385     + end += size;
3386     + ret = read_partial(con, end, size, con->auth_reply_buf);
3387     if (ret <= 0)
3388     goto out;
3389    
3390     @@ -1321,20 +1499,14 @@ static int process_banner(struct ceph_connection *con)
3391     ceph_pr_addr(&con->msgr->inst.addr.in_addr));
3392     }
3393    
3394     - set_bit(NEGOTIATING, &con->state);
3395     - prepare_read_connect(con);
3396     return 0;
3397     }
3398    
3399     static void fail_protocol(struct ceph_connection *con)
3400     {
3401     reset_connection(con);
3402     - set_bit(CLOSED, &con->state); /* in case there's queued work */
3403     -
3404     - mutex_unlock(&con->mutex);
3405     - if (con->ops->bad_proto)
3406     - con->ops->bad_proto(con);
3407     - mutex_lock(&con->mutex);
3408     + BUG_ON(con->state != CON_STATE_NEGOTIATING);
3409     + con->state = CON_STATE_CLOSED;
3410     }
3411    
3412     static int process_connect(struct ceph_connection *con)
3413     @@ -1377,7 +1549,8 @@ static int process_connect(struct ceph_connection *con)
3414     return -1;
3415     }
3416     con->auth_retry = 1;
3417     - ret = prepare_write_connect(con->msgr, con, 0);
3418     + con_out_kvec_reset(con);
3419     + ret = prepare_write_connect(con);
3420     if (ret < 0)
3421     return ret;
3422     prepare_read_connect(con);
3423     @@ -1392,12 +1565,15 @@ static int process_connect(struct ceph_connection *con)
3424     * dropped messages.
3425     */
3426     dout("process_connect got RESET peer seq %u\n",
3427     - le32_to_cpu(con->in_connect.connect_seq));
3428     + le32_to_cpu(con->in_reply.connect_seq));
3429     pr_err("%s%lld %s connection reset\n",
3430     ENTITY_NAME(con->peer_name),
3431     ceph_pr_addr(&con->peer_addr.in_addr));
3432     reset_connection(con);
3433     - prepare_write_connect(con->msgr, con, 0);
3434     + con_out_kvec_reset(con);
3435     + ret = prepare_write_connect(con);
3436     + if (ret < 0)
3437     + return ret;
3438     prepare_read_connect(con);
3439    
3440     /* Tell ceph about it. */
3441     @@ -1406,8 +1582,7 @@ static int process_connect(struct ceph_connection *con)
3442     if (con->ops->peer_reset)
3443     con->ops->peer_reset(con);
3444     mutex_lock(&con->mutex);
3445     - if (test_bit(CLOSED, &con->state) ||
3446     - test_bit(OPENING, &con->state))
3447     + if (con->state != CON_STATE_NEGOTIATING)
3448     return -EAGAIN;
3449     break;
3450    
3451     @@ -1416,11 +1591,14 @@ static int process_connect(struct ceph_connection *con)
3452     * If we sent a smaller connect_seq than the peer has, try
3453     * again with a larger value.
3454     */
3455     - dout("process_connect got RETRY my seq = %u, peer_seq = %u\n",
3456     + dout("process_connect got RETRY_SESSION my seq %u, peer %u\n",
3457     le32_to_cpu(con->out_connect.connect_seq),
3458     - le32_to_cpu(con->in_connect.connect_seq));
3459     - con->connect_seq = le32_to_cpu(con->in_connect.connect_seq);
3460     - prepare_write_connect(con->msgr, con, 0);
3461     + le32_to_cpu(con->in_reply.connect_seq));
3462     + con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
3463     + con_out_kvec_reset(con);
3464     + ret = prepare_write_connect(con);
3465     + if (ret < 0)
3466     + return ret;
3467     prepare_read_connect(con);
3468     break;
3469    
3470     @@ -1431,10 +1609,13 @@ static int process_connect(struct ceph_connection *con)
3471     */
3472     dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n",
3473     con->peer_global_seq,
3474     - le32_to_cpu(con->in_connect.global_seq));
3475     + le32_to_cpu(con->in_reply.global_seq));
3476     get_global_seq(con->msgr,
3477     - le32_to_cpu(con->in_connect.global_seq));
3478     - prepare_write_connect(con->msgr, con, 0);
3479     + le32_to_cpu(con->in_reply.global_seq));
3480     + con_out_kvec_reset(con);
3481     + ret = prepare_write_connect(con);
3482     + if (ret < 0)
3483     + return ret;
3484     prepare_read_connect(con);
3485     break;
3486    
3487     @@ -1449,7 +1630,10 @@ static int process_connect(struct ceph_connection *con)
3488     fail_protocol(con);
3489     return -1;
3490     }
3491     - clear_bit(CONNECTING, &con->state);
3492     +
3493     + BUG_ON(con->state != CON_STATE_NEGOTIATING);
3494     + con->state = CON_STATE_OPEN;
3495     +
3496     con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
3497     con->connect_seq++;
3498     con->peer_features = server_feat;
3499     @@ -1461,7 +1645,9 @@ static int process_connect(struct ceph_connection *con)
3500     le32_to_cpu(con->in_reply.connect_seq));
3501    
3502     if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
3503     - set_bit(LOSSYTX, &con->state);
3504     + set_bit(CON_FLAG_LOSSYTX, &con->flags);
3505     +
3506     + con->delay = 0; /* reset backoff memory */
3507    
3508     prepare_read_tag(con);
3509     break;
3510     @@ -1491,10 +1677,10 @@ static int process_connect(struct ceph_connection *con)
3511     */
3512     static int read_partial_ack(struct ceph_connection *con)
3513     {
3514     - int to = 0;
3515     + int size = sizeof (con->in_temp_ack);
3516     + int end = size;
3517    
3518     - return read_partial(con, &to, sizeof(con->in_temp_ack),
3519     - &con->in_temp_ack);
3520     + return read_partial(con, end, size, &con->in_temp_ack);
3521     }
3522    
3523    
3524     @@ -1547,10 +1733,7 @@ static int read_partial_message_section(struct ceph_connection *con,
3525     return 1;
3526     }
3527    
3528     -static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
3529     - struct ceph_msg_header *hdr,
3530     - int *skip);
3531     -
3532     +static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
3533    
3534     static int read_partial_message_pages(struct ceph_connection *con,
3535     struct page **pages,
3536     @@ -1593,9 +1776,6 @@ static int read_partial_message_bio(struct ceph_connection *con,
3537     void *p;
3538     int ret, left;
3539    
3540     - if (IS_ERR(bv))
3541     - return PTR_ERR(bv);
3542     -
3543     left = min((int)(data_len - con->in_msg_pos.data_pos),
3544     (int)(bv->bv_len - con->in_msg_pos.page_pos));
3545    
3546     @@ -1627,26 +1807,22 @@ static int read_partial_message_bio(struct ceph_connection *con,
3547     static int read_partial_message(struct ceph_connection *con)
3548     {
3549     struct ceph_msg *m = con->in_msg;
3550     + int size;
3551     + int end;
3552     int ret;
3553     - int to, left;
3554     unsigned front_len, middle_len, data_len;
3555     bool do_datacrc = !con->msgr->nocrc;
3556     - int skip;
3557     u64 seq;
3558     u32 crc;
3559    
3560     dout("read_partial_message con %p msg %p\n", con, m);
3561    
3562     /* header */
3563     - while (con->in_base_pos < sizeof(con->in_hdr)) {
3564     - left = sizeof(con->in_hdr) - con->in_base_pos;
3565     - ret = ceph_tcp_recvmsg(con->sock,
3566     - (char *)&con->in_hdr + con->in_base_pos,
3567     - left);
3568     - if (ret <= 0)
3569     - return ret;
3570     - con->in_base_pos += ret;
3571     - }
3572     + size = sizeof (con->in_hdr);
3573     + end = size;
3574     + ret = read_partial(con, end, size, &con->in_hdr);
3575     + if (ret <= 0)
3576     + return ret;
3577    
3578     crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc));
3579     if (cpu_to_le32(crc) != con->in_hdr.crc) {
3580     @@ -1686,10 +1862,13 @@ static int read_partial_message(struct ceph_connection *con)
3581    
3582     /* allocate message? */
3583     if (!con->in_msg) {
3584     + int skip = 0;
3585     +
3586     dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
3587     con->in_hdr.front_len, con->in_hdr.data_len);
3588     - skip = 0;
3589     - con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
3590     + ret = ceph_con_in_msg_alloc(con, &skip);
3591     + if (ret < 0)
3592     + return ret;
3593     if (skip) {
3594     /* skip this message */
3595     dout("alloc_msg said skip message\n");
3596     @@ -1700,11 +1879,9 @@ static int read_partial_message(struct ceph_connection *con)
3597     con->in_seq++;
3598     return 0;
3599     }
3600     - if (!con->in_msg) {
3601     - con->error_msg =
3602     - "error allocating memory for incoming message";
3603     - return -ENOMEM;
3604     - }
3605     +
3606     + BUG_ON(!con->in_msg);
3607     + BUG_ON(con->in_msg->con != con);
3608     m = con->in_msg;
3609     m->front.iov_len = 0; /* haven't read it yet */
3610     if (m->middle)
3611     @@ -1716,6 +1893,11 @@ static int read_partial_message(struct ceph_connection *con)
3612     else
3613     con->in_msg_pos.page_pos = 0;
3614     con->in_msg_pos.data_pos = 0;
3615     +
3616     +#ifdef CONFIG_BLOCK
3617     + if (m->bio)
3618     + init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
3619     +#endif
3620     }
3621    
3622     /* front */
3623     @@ -1732,10 +1914,6 @@ static int read_partial_message(struct ceph_connection *con)
3624     if (ret <= 0)
3625     return ret;
3626     }
3627     -#ifdef CONFIG_BLOCK
3628     - if (m->bio && !m->bio_iter)
3629     - init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
3630     -#endif
3631    
3632     /* (page) data */
3633     while (con->in_msg_pos.data_pos < data_len) {
3634     @@ -1746,7 +1924,7 @@ static int read_partial_message(struct ceph_connection *con)
3635     return ret;
3636     #ifdef CONFIG_BLOCK
3637     } else if (m->bio) {
3638     -
3639     + BUG_ON(!m->bio_iter);
3640     ret = read_partial_message_bio(con,
3641     &m->bio_iter, &m->bio_seg,
3642     data_len, do_datacrc);
3643     @@ -1759,16 +1937,12 @@ static int read_partial_message(struct ceph_connection *con)
3644     }
3645    
3646     /* footer */
3647     - to = sizeof(m->hdr) + sizeof(m->footer);
3648     - while (con->in_base_pos < to) {
3649     - left = to - con->in_base_pos;
3650     - ret = ceph_tcp_recvmsg(con->sock, (char *)&m->footer +
3651     - (con->in_base_pos - sizeof(m->hdr)),
3652     - left);
3653     - if (ret <= 0)
3654     - return ret;
3655     - con->in_base_pos += ret;
3656     - }
3657     + size = sizeof (m->footer);
3658     + end += size;
3659     + ret = read_partial(con, end, size, &m->footer);
3660     + if (ret <= 0)
3661     + return ret;
3662     +
3663     dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
3664     m, front_len, m->footer.front_crc, middle_len,
3665     m->footer.middle_crc, data_len, m->footer.data_crc);
3666     @@ -1804,8 +1978,11 @@ static void process_message(struct ceph_connection *con)
3667     {
3668     struct ceph_msg *msg;
3669    
3670     + BUG_ON(con->in_msg->con != con);
3671     + con->in_msg->con = NULL;
3672     msg = con->in_msg;
3673     con->in_msg = NULL;
3674     + con->ops->put(con);
3675    
3676     /* if first message, set peer_name */
3677     if (con->peer_name.type == 0)
3678     @@ -1825,7 +2002,6 @@ static void process_message(struct ceph_connection *con)
3679     con->ops->dispatch(con, msg);
3680    
3681     mutex_lock(&con->mutex);
3682     - prepare_read_tag(con);
3683     }
3684    
3685    
3686     @@ -1835,21 +2011,21 @@ static void process_message(struct ceph_connection *con)
3687     */
3688     static int try_write(struct ceph_connection *con)
3689     {
3690     - struct ceph_messenger *msgr = con->msgr;
3691     int ret = 1;
3692    
3693     - dout("try_write start %p state %lu nref %d\n", con, con->state,
3694     - atomic_read(&con->nref));
3695     + dout("try_write start %p state %lu\n", con, con->state);
3696    
3697     more:
3698     dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
3699    
3700     /* open the socket first? */
3701     - if (con->sock == NULL) {
3702     - prepare_write_connect(msgr, con, 1);
3703     + if (con->state == CON_STATE_PREOPEN) {
3704     + BUG_ON(con->sock);
3705     + con->state = CON_STATE_CONNECTING;
3706     +
3707     + con_out_kvec_reset(con);
3708     + prepare_write_banner(con);
3709     prepare_read_banner(con);
3710     - set_bit(CONNECTING, &con->state);
3711     - clear_bit(NEGOTIATING, &con->state);
3712    
3713     BUG_ON(con->in_msg);
3714     con->in_tag = CEPH_MSGR_TAG_READY;
3715     @@ -1896,7 +2072,7 @@ more_kvec:
3716     }
3717    
3718     do_next:
3719     - if (!test_bit(CONNECTING, &con->state)) {
3720     + if (con->state == CON_STATE_OPEN) {
3721     /* is anything else pending? */
3722     if (!list_empty(&con->out_queue)) {
3723     prepare_write_message(con);
3724     @@ -1906,14 +2082,15 @@ do_next:
3725     prepare_write_ack(con);
3726     goto more;
3727     }
3728     - if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) {
3729     + if (test_and_clear_bit(CON_FLAG_KEEPALIVE_PENDING,
3730     + &con->flags)) {
3731     prepare_write_keepalive(con);
3732     goto more;
3733     }
3734     }
3735    
3736     /* Nothing to do! */
3737     - clear_bit(WRITE_PENDING, &con->state);
3738     + clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
3739     dout("try_write nothing else to write.\n");
3740     ret = 0;
3741     out:
3742     @@ -1930,38 +2107,46 @@ static int try_read(struct ceph_connection *con)
3743     {
3744     int ret = -1;
3745    
3746     - if (!con->sock)
3747     - return 0;
3748     -
3749     - if (test_bit(STANDBY, &con->state))
3750     +more:
3751     + dout("try_read start on %p state %lu\n", con, con->state);
3752     + if (con->state != CON_STATE_CONNECTING &&
3753     + con->state != CON_STATE_NEGOTIATING &&
3754     + con->state != CON_STATE_OPEN)
3755     return 0;
3756    
3757     - dout("try_read start on %p\n", con);
3758     + BUG_ON(!con->sock);
3759    
3760     -more:
3761     dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
3762     con->in_base_pos);
3763    
3764     - /*
3765     - * process_connect and process_message drop and re-take
3766     - * con->mutex. make sure we handle a racing close or reopen.
3767     - */
3768     - if (test_bit(CLOSED, &con->state) ||
3769     - test_bit(OPENING, &con->state)) {
3770     - ret = -EAGAIN;
3771     + if (con->state == CON_STATE_CONNECTING) {
3772     + dout("try_read connecting\n");
3773     + ret = read_partial_banner(con);
3774     + if (ret <= 0)
3775     + goto out;
3776     + ret = process_banner(con);
3777     + if (ret < 0)
3778     + goto out;
3779     +
3780     + BUG_ON(con->state != CON_STATE_CONNECTING);
3781     + con->state = CON_STATE_NEGOTIATING;
3782     +
3783     + /*
3784     + * Received banner is good, exchange connection info.
3785     + * Do not reset out_kvec, as sending our banner raced
3786     + * with receiving peer banner after connect completed.
3787     + */
3788     + ret = prepare_write_connect(con);
3789     + if (ret < 0)
3790     + goto out;
3791     + prepare_read_connect(con);
3792     +
3793     + /* Send connection info before awaiting response */
3794     goto out;
3795     }
3796    
3797     - if (test_bit(CONNECTING, &con->state)) {
3798     - if (!test_bit(NEGOTIATING, &con->state)) {
3799     - dout("try_read connecting\n");
3800     - ret = read_partial_banner(con);
3801     - if (ret <= 0)
3802     - goto out;
3803     - ret = process_banner(con);
3804     - if (ret < 0)
3805     - goto out;
3806     - }
3807     + if (con->state == CON_STATE_NEGOTIATING) {
3808     + dout("try_read negotiating\n");
3809     ret = read_partial_connect(con);
3810     if (ret <= 0)
3811     goto out;
3812     @@ -1971,6 +2156,8 @@ more:
3813     goto more;
3814     }
3815    
3816     + BUG_ON(con->state != CON_STATE_OPEN);
3817     +
3818     if (con->in_base_pos < 0) {
3819     /*
3820     * skipping + discarding content.
3821     @@ -2004,7 +2191,8 @@ more:
3822     prepare_read_ack(con);
3823     break;
3824     case CEPH_MSGR_TAG_CLOSE:
3825     - set_bit(CLOSED, &con->state); /* fixme */
3826     + con_close_socket(con);
3827     + con->state = CON_STATE_CLOSED;
3828     goto out;
3829     default:
3830     goto bad_tag;
3831     @@ -2027,6 +2215,8 @@ more:
3832     if (con->in_tag == CEPH_MSGR_TAG_READY)
3833     goto more;
3834     process_message(con);
3835     + if (con->state == CON_STATE_OPEN)
3836     + prepare_read_tag(con);
3837     goto more;
3838     }
3839     if (con->in_tag == CEPH_MSGR_TAG_ACK) {
3840     @@ -2055,12 +2245,6 @@ bad_tag:
3841     */
3842     static void queue_con(struct ceph_connection *con)
3843     {
3844     - if (test_bit(DEAD, &con->state)) {
3845     - dout("queue_con %p ignoring: DEAD\n",
3846     - con);
3847     - return;
3848     - }
3849     -
3850     if (!con->ops->get(con)) {
3851     dout("queue_con %p ref count 0\n", con);
3852     return;
3853     @@ -2085,7 +2269,26 @@ static void con_work(struct work_struct *work)
3854    
3855     mutex_lock(&con->mutex);
3856     restart:
3857     - if (test_and_clear_bit(BACKOFF, &con->state)) {
3858     + if (test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) {
3859     + switch (con->state) {
3860     + case CON_STATE_CONNECTING:
3861     + con->error_msg = "connection failed";
3862     + break;
3863     + case CON_STATE_NEGOTIATING:
3864     + con->error_msg = "negotiation failed";
3865     + break;
3866     + case CON_STATE_OPEN:
3867     + con->error_msg = "socket closed";
3868     + break;
3869     + default:
3870     + dout("unrecognized con state %d\n", (int)con->state);
3871     + con->error_msg = "unrecognized con state";
3872     + BUG();
3873     + }
3874     + goto fault;
3875     + }
3876     +
3877     + if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) {
3878     dout("con_work %p backing off\n", con);
3879     if (queue_delayed_work(ceph_msgr_wq, &con->work,
3880     round_jiffies_relative(con->delay))) {
3881     @@ -2093,41 +2296,42 @@ restart:
3882     mutex_unlock(&con->mutex);
3883     return;
3884     } else {
3885     - con->ops->put(con);
3886     dout("con_work %p FAILED to back off %lu\n", con,
3887     con->delay);
3888     + set_bit(CON_FLAG_BACKOFF, &con->flags);
3889     }
3890     + goto done;
3891     }
3892    
3893     - if (test_bit(STANDBY, &con->state)) {
3894     + if (con->state == CON_STATE_STANDBY) {
3895     dout("con_work %p STANDBY\n", con);
3896     goto done;
3897     }
3898     - if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
3899     - dout("con_work CLOSED\n");
3900     - con_close_socket(con);
3901     + if (con->state == CON_STATE_CLOSED) {
3902     + dout("con_work %p CLOSED\n", con);
3903     + BUG_ON(con->sock);
3904     goto done;
3905     }
3906     - if (test_and_clear_bit(OPENING, &con->state)) {
3907     - /* reopen w/ new peer */
3908     + if (con->state == CON_STATE_PREOPEN) {
3909     dout("con_work OPENING\n");
3910     - con_close_socket(con);
3911     + BUG_ON(con->sock);
3912     }
3913    
3914     - if (test_and_clear_bit(SOCK_CLOSED, &con->state))
3915     - goto fault;
3916     -
3917     ret = try_read(con);
3918     if (ret == -EAGAIN)
3919     goto restart;
3920     - if (ret < 0)
3921     + if (ret < 0) {
3922     + con->error_msg = "socket error on read";
3923     goto fault;
3924     + }
3925    
3926     ret = try_write(con);
3927     if (ret == -EAGAIN)
3928     goto restart;
3929     - if (ret < 0)
3930     + if (ret < 0) {
3931     + con->error_msg = "socket error on write";
3932     goto fault;
3933     + }
3934    
3935     done:
3936     mutex_unlock(&con->mutex);
3937     @@ -2136,7 +2340,6 @@ done_unlocked:
3938     return;
3939    
3940     fault:
3941     - mutex_unlock(&con->mutex);
3942     ceph_fault(con); /* error/fault path */
3943     goto done_unlocked;
3944     }
3945     @@ -2147,26 +2350,31 @@ fault:
3946     * exponential backoff
3947     */
3948     static void ceph_fault(struct ceph_connection *con)
3949     + __releases(con->mutex)
3950     {
3951     pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
3952     ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
3953     dout("fault %p state %lu to peer %s\n",
3954     con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
3955    
3956     - if (test_bit(LOSSYTX, &con->state)) {
3957     - dout("fault on LOSSYTX channel\n");
3958     - goto out;
3959     - }
3960     -
3961     - mutex_lock(&con->mutex);
3962     - if (test_bit(CLOSED, &con->state))
3963     - goto out_unlock;
3964     + BUG_ON(con->state != CON_STATE_CONNECTING &&
3965     + con->state != CON_STATE_NEGOTIATING &&
3966     + con->state != CON_STATE_OPEN);
3967    
3968     con_close_socket(con);
3969    
3970     + if (test_bit(CON_FLAG_LOSSYTX, &con->flags)) {
3971     + dout("fault on LOSSYTX channel, marking CLOSED\n");
3972     + con->state = CON_STATE_CLOSED;
3973     + goto out_unlock;
3974     + }
3975     +
3976     if (con->in_msg) {
3977     + BUG_ON(con->in_msg->con != con);
3978     + con->in_msg->con = NULL;
3979     ceph_msg_put(con->in_msg);
3980     con->in_msg = NULL;
3981     + con->ops->put(con);
3982     }
3983    
3984     /* Requeue anything that hasn't been acked */
3985     @@ -2175,12 +2383,13 @@ static void ceph_fault(struct ceph_connection *con)
3986     /* If there are no messages queued or keepalive pending, place
3987     * the connection in a STANDBY state */
3988     if (list_empty(&con->out_queue) &&
3989     - !test_bit(KEEPALIVE_PENDING, &con->state)) {
3990     + !test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)) {
3991     dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
3992     - clear_bit(WRITE_PENDING, &con->state);
3993     - set_bit(STANDBY, &con->state);
3994     + clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
3995     + con->state = CON_STATE_STANDBY;
3996     } else {
3997     /* retry after a delay. */
3998     + con->state = CON_STATE_PREOPEN;
3999     if (con->delay == 0)
4000     con->delay = BASE_DELAY_INTERVAL;
4001     else if (con->delay < MAX_DELAY_INTERVAL)
4002     @@ -2201,13 +2410,12 @@ static void ceph_fault(struct ceph_connection *con)
4003     * that when con_work restarts we schedule the
4004     * delay then.
4005     */
4006     - set_bit(BACKOFF, &con->state);
4007     + set_bit(CON_FLAG_BACKOFF, &con->flags);
4008     }
4009     }
4010    
4011     out_unlock:
4012     mutex_unlock(&con->mutex);
4013     -out:
4014     /*
4015     * in case we faulted due to authentication, invalidate our
4016     * current tickets so that we can get new ones.
4017     @@ -2224,18 +2432,14 @@ out:
4018    
4019    
4020     /*
4021     - * create a new messenger instance
4022     + * initialize a new messenger instance
4023     */
4024     -struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
4025     - u32 supported_features,
4026     - u32 required_features)
4027     +void ceph_messenger_init(struct ceph_messenger *msgr,
4028     + struct ceph_entity_addr *myaddr,
4029     + u32 supported_features,
4030     + u32 required_features,
4031     + bool nocrc)
4032     {
4033     - struct ceph_messenger *msgr;
4034     -
4035     - msgr = kzalloc(sizeof(*msgr), GFP_KERNEL);
4036     - if (msgr == NULL)
4037     - return ERR_PTR(-ENOMEM);
4038     -
4039     msgr->supported_features = supported_features;
4040     msgr->required_features = required_features;
4041    
4042     @@ -2248,30 +2452,23 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
4043     msgr->inst.addr.type = 0;
4044     get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
4045     encode_my_addr(msgr);
4046     + msgr->nocrc = nocrc;
4047    
4048     - dout("messenger_create %p\n", msgr);
4049     - return msgr;
4050     -}
4051     -EXPORT_SYMBOL(ceph_messenger_create);
4052     + atomic_set(&msgr->stopping, 0);
4053    
4054     -void ceph_messenger_destroy(struct ceph_messenger *msgr)
4055     -{
4056     - dout("destroy %p\n", msgr);
4057     - kfree(msgr);
4058     - dout("destroyed messenger %p\n", msgr);
4059     + dout("%s %p\n", __func__, msgr);
4060     }
4061     -EXPORT_SYMBOL(ceph_messenger_destroy);
4062     +EXPORT_SYMBOL(ceph_messenger_init);
4063    
4064     static void clear_standby(struct ceph_connection *con)
4065     {
4066     /* come back from STANDBY? */
4067     - if (test_and_clear_bit(STANDBY, &con->state)) {
4068     - mutex_lock(&con->mutex);
4069     + if (con->state == CON_STATE_STANDBY) {
4070     dout("clear_standby %p and ++connect_seq\n", con);
4071     + con->state = CON_STATE_PREOPEN;
4072     con->connect_seq++;
4073     - WARN_ON(test_bit(WRITE_PENDING, &con->state));
4074     - WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state));
4075     - mutex_unlock(&con->mutex);
4076     + WARN_ON(test_bit(CON_FLAG_WRITE_PENDING, &con->flags));
4077     + WARN_ON(test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags));
4078     }
4079     }
4080    
4081     @@ -2280,21 +2477,24 @@ static void clear_standby(struct ceph_connection *con)
4082     */
4083     void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
4084     {
4085     - if (test_bit(CLOSED, &con->state)) {
4086     - dout("con_send %p closed, dropping %p\n", con, msg);
4087     - ceph_msg_put(msg);
4088     - return;
4089     - }
4090     -
4091     /* set src+dst */
4092     msg->hdr.src = con->msgr->inst.name;
4093     -
4094     BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
4095     -
4096     msg->needs_out_seq = true;
4097    
4098     - /* queue */
4099     mutex_lock(&con->mutex);
4100     +
4101     + if (con->state == CON_STATE_CLOSED) {
4102     + dout("con_send %p closed, dropping %p\n", con, msg);
4103     + ceph_msg_put(msg);
4104     + mutex_unlock(&con->mutex);
4105     + return;
4106     + }
4107     +
4108     + BUG_ON(msg->con != NULL);
4109     + msg->con = con->ops->get(con);
4110     + BUG_ON(msg->con == NULL);
4111     +
4112     BUG_ON(!list_empty(&msg->list_head));
4113     list_add_tail(&msg->list_head, &con->out_queue);
4114     dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg,
4115     @@ -2303,12 +2503,13 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
4116     le32_to_cpu(msg->hdr.front_len),
4117     le32_to_cpu(msg->hdr.middle_len),
4118     le32_to_cpu(msg->hdr.data_len));
4119     +
4120     + clear_standby(con);
4121     mutex_unlock(&con->mutex);
4122    
4123     /* if there wasn't anything waiting to send before, queue
4124     * new work */
4125     - clear_standby(con);
4126     - if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
4127     + if (test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
4128     queue_con(con);
4129     }
4130     EXPORT_SYMBOL(ceph_con_send);
4131     @@ -2316,24 +2517,34 @@ EXPORT_SYMBOL(ceph_con_send);
4132     /*
4133     * Revoke a message that was previously queued for send
4134     */
4135     -void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
4136     +void ceph_msg_revoke(struct ceph_msg *msg)
4137     {
4138     + struct ceph_connection *con = msg->con;
4139     +
4140     + if (!con)
4141     + return; /* Message not in our possession */
4142     +
4143     mutex_lock(&con->mutex);
4144     if (!list_empty(&msg->list_head)) {
4145     - dout("con_revoke %p msg %p - was on queue\n", con, msg);
4146     + dout("%s %p msg %p - was on queue\n", __func__, con, msg);
4147     list_del_init(&msg->list_head);
4148     - ceph_msg_put(msg);
4149     + BUG_ON(msg->con == NULL);
4150     + msg->con->ops->put(msg->con);
4151     + msg->con = NULL;
4152     msg->hdr.seq = 0;
4153     +
4154     + ceph_msg_put(msg);
4155     }
4156     if (con->out_msg == msg) {
4157     - dout("con_revoke %p msg %p - was sending\n", con, msg);
4158     + dout("%s %p msg %p - was sending\n", __func__, con, msg);
4159     con->out_msg = NULL;
4160     if (con->out_kvec_is_msg) {
4161     con->out_skip = con->out_kvec_bytes;
4162     con->out_kvec_is_msg = false;
4163     }
4164     - ceph_msg_put(msg);
4165     msg->hdr.seq = 0;
4166     +
4167     + ceph_msg_put(msg);
4168     }
4169     mutex_unlock(&con->mutex);
4170     }
4171     @@ -2341,17 +2552,27 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
4172     /*
4173     * Revoke a message that we may be reading data into
4174     */
4175     -void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
4176     +void ceph_msg_revoke_incoming(struct ceph_msg *msg)
4177     {
4178     + struct ceph_connection *con;
4179     +
4180     + BUG_ON(msg == NULL);
4181     + if (!msg->con) {
4182     + dout("%s msg %p null con\n", __func__, msg);
4183     +
4184     + return; /* Message not in our possession */
4185     + }
4186     +
4187     + con = msg->con;
4188     mutex_lock(&con->mutex);
4189     - if (con->in_msg && con->in_msg == msg) {
4190     + if (con->in_msg == msg) {
4191     unsigned front_len = le32_to_cpu(con->in_hdr.front_len);
4192     unsigned middle_len = le32_to_cpu(con->in_hdr.middle_len);
4193     unsigned data_len = le32_to_cpu(con->in_hdr.data_len);
4194    
4195     /* skip rest of message */
4196     - dout("con_revoke_pages %p msg %p revoked\n", con, msg);
4197     - con->in_base_pos = con->in_base_pos -
4198     + dout("%s %p msg %p revoked\n", __func__, con, msg);
4199     + con->in_base_pos = con->in_base_pos -
4200     sizeof(struct ceph_msg_header) -
4201     front_len -
4202     middle_len -
4203     @@ -2362,8 +2583,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
4204     con->in_tag = CEPH_MSGR_TAG_READY;
4205     con->in_seq++;
4206     } else {
4207     - dout("con_revoke_pages %p msg %p pages %p no-op\n",
4208     - con, con->in_msg, msg);
4209     + dout("%s %p in_msg %p msg %p no-op\n",
4210     + __func__, con, con->in_msg, msg);
4211     }
4212     mutex_unlock(&con->mutex);
4213     }
4214     @@ -2374,9 +2595,11 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
4215     void ceph_con_keepalive(struct ceph_connection *con)
4216     {
4217     dout("con_keepalive %p\n", con);
4218     + mutex_lock(&con->mutex);
4219     clear_standby(con);
4220     - if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
4221     - test_and_set_bit(WRITE_PENDING, &con->state) == 0)
4222     + mutex_unlock(&con->mutex);
4223     + if (test_and_set_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags) == 0 &&
4224     + test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
4225     queue_con(con);
4226     }
4227     EXPORT_SYMBOL(ceph_con_keepalive);
4228     @@ -2395,6 +2618,8 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
4229     if (m == NULL)
4230     goto out;
4231     kref_init(&m->kref);
4232     +
4233     + m->con = NULL;
4234     INIT_LIST_HEAD(&m->list_head);
4235    
4236     m->hdr.tid = 0;
4237     @@ -2490,46 +2715,78 @@ static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg)
4238     }
4239    
4240     /*
4241     - * Generic message allocator, for incoming messages.
4242     + * Allocate a message for receiving an incoming message on a
4243     + * connection, and save the result in con->in_msg. Uses the
4244     + * connection's private alloc_msg op if available.
4245     + *
4246     + * Returns 0 on success, or a negative error code.
4247     + *
4248     + * On success, if we set *skip = 1:
4249     + * - the next message should be skipped and ignored.
4250     + * - con->in_msg == NULL
4251     + * or if we set *skip = 0:
4252     + * - con->in_msg is non-null.
4253     + * On error (ENOMEM, EAGAIN, ...),
4254     + * - con->in_msg == NULL
4255     */
4256     -static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
4257     - struct ceph_msg_header *hdr,
4258     - int *skip)
4259     +static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
4260     {
4261     + struct ceph_msg_header *hdr = &con->in_hdr;
4262     int type = le16_to_cpu(hdr->type);
4263     int front_len = le32_to_cpu(hdr->front_len);
4264     int middle_len = le32_to_cpu(hdr->middle_len);
4265     - struct ceph_msg *msg = NULL;
4266     - int ret;
4267     + int ret = 0;
4268     +
4269     + BUG_ON(con->in_msg != NULL);
4270    
4271     if (con->ops->alloc_msg) {
4272     + struct ceph_msg *msg;
4273     +
4274     mutex_unlock(&con->mutex);
4275     msg = con->ops->alloc_msg(con, hdr, skip);
4276     mutex_lock(&con->mutex);
4277     - if (!msg || *skip)
4278     - return NULL;
4279     + if (con->state != CON_STATE_OPEN) {
4280     + if (msg)
4281     + ceph_msg_put(msg);
4282     + return -EAGAIN;
4283     + }
4284     + con->in_msg = msg;
4285     + if (con->in_msg) {
4286     + con->in_msg->con = con->ops->get(con);
4287     + BUG_ON(con->in_msg->con == NULL);
4288     + }
4289     + if (*skip) {
4290     + con->in_msg = NULL;
4291     + return 0;
4292     + }
4293     + if (!con->in_msg) {
4294     + con->error_msg =
4295     + "error allocating memory for incoming message";
4296     + return -ENOMEM;
4297     + }
4298     }
4299     - if (!msg) {
4300     - *skip = 0;
4301     - msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
4302     - if (!msg) {
4303     + if (!con->in_msg) {
4304     + con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
4305     + if (!con->in_msg) {
4306     pr_err("unable to allocate msg type %d len %d\n",
4307     type, front_len);
4308     - return NULL;
4309     + return -ENOMEM;
4310     }
4311     - msg->page_alignment = le16_to_cpu(hdr->data_off);
4312     + con->in_msg->con = con->ops->get(con);
4313     + BUG_ON(con->in_msg->con == NULL);
4314     + con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
4315     }
4316     - memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
4317     + memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
4318    
4319     - if (middle_len && !msg->middle) {
4320     - ret = ceph_alloc_middle(con, msg);
4321     + if (middle_len && !con->in_msg->middle) {
4322     + ret = ceph_alloc_middle(con, con->in_msg);
4323     if (ret < 0) {
4324     - ceph_msg_put(msg);
4325     - return NULL;
4326     + ceph_msg_put(con->in_msg);
4327     + con->in_msg = NULL;
4328     }
4329     }
4330    
4331     - return msg;
4332     + return ret;
4333     }
4334    
4335    
4336     diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
4337     index 1845cde..89a6409 100644
4338     --- a/net/ceph/mon_client.c
4339     +++ b/net/ceph/mon_client.c
4340     @@ -106,9 +106,9 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
4341     monc->pending_auth = 1;
4342     monc->m_auth->front.iov_len = len;
4343     monc->m_auth->hdr.front_len = cpu_to_le32(len);
4344     - ceph_con_revoke(monc->con, monc->m_auth);
4345     + ceph_msg_revoke(monc->m_auth);
4346     ceph_msg_get(monc->m_auth); /* keep our ref */
4347     - ceph_con_send(monc->con, monc->m_auth);
4348     + ceph_con_send(&monc->con, monc->m_auth);
4349     }
4350    
4351     /*
4352     @@ -117,8 +117,11 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
4353     static void __close_session(struct ceph_mon_client *monc)
4354     {
4355     dout("__close_session closing mon%d\n", monc->cur_mon);
4356     - ceph_con_revoke(monc->con, monc->m_auth);
4357     - ceph_con_close(monc->con);
4358     + ceph_msg_revoke(monc->m_auth);
4359     + ceph_msg_revoke_incoming(monc->m_auth_reply);
4360     + ceph_msg_revoke(monc->m_subscribe);
4361     + ceph_msg_revoke_incoming(monc->m_subscribe_ack);
4362     + ceph_con_close(&monc->con);
4363     monc->cur_mon = -1;
4364     monc->pending_auth = 0;
4365     ceph_auth_reset(monc->auth);
4366     @@ -142,9 +145,8 @@ static int __open_session(struct ceph_mon_client *monc)
4367     monc->want_next_osdmap = !!monc->want_next_osdmap;
4368    
4369     dout("open_session mon%d opening\n", monc->cur_mon);
4370     - monc->con->peer_name.type = CEPH_ENTITY_TYPE_MON;
4371     - monc->con->peer_name.num = cpu_to_le64(monc->cur_mon);
4372     - ceph_con_open(monc->con,
4373     + ceph_con_open(&monc->con,
4374     + CEPH_ENTITY_TYPE_MON, monc->cur_mon,
4375     &monc->monmap->mon_inst[monc->cur_mon].addr);
4376    
4377     /* initiatiate authentication handshake */
4378     @@ -226,8 +228,8 @@ static void __send_subscribe(struct ceph_mon_client *monc)
4379    
4380     msg->front.iov_len = p - msg->front.iov_base;
4381     msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
4382     - ceph_con_revoke(monc->con, msg);
4383     - ceph_con_send(monc->con, ceph_msg_get(msg));
4384     + ceph_msg_revoke(msg);
4385     + ceph_con_send(&monc->con, ceph_msg_get(msg));
4386    
4387     monc->sub_sent = jiffies | 1; /* never 0 */
4388     }
4389     @@ -247,7 +249,7 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
4390     if (monc->hunting) {
4391     pr_info("mon%d %s session established\n",
4392     monc->cur_mon,
4393     - ceph_pr_addr(&monc->con->peer_addr.in_addr));
4394     + ceph_pr_addr(&monc->con.peer_addr.in_addr));
4395     monc->hunting = false;
4396     }
4397     dout("handle_subscribe_ack after %d seconds\n", seconds);
4398     @@ -309,6 +311,17 @@ int ceph_monc_open_session(struct ceph_mon_client *monc)
4399     EXPORT_SYMBOL(ceph_monc_open_session);
4400    
4401     /*
4402     + * We require the fsid and global_id in order to initialize our
4403     + * debugfs dir.
4404     + */
4405     +static bool have_debugfs_info(struct ceph_mon_client *monc)
4406     +{
4407     + dout("have_debugfs_info fsid %d globalid %lld\n",
4408     + (int)monc->client->have_fsid, monc->auth->global_id);
4409     + return monc->client->have_fsid && monc->auth->global_id > 0;
4410     +}
4411     +
4412     +/*
4413     * The monitor responds with mount ack indicate mount success. The
4414     * included client ticket allows the client to talk to MDSs and OSDs.
4415     */
4416     @@ -318,9 +331,12 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
4417     struct ceph_client *client = monc->client;
4418     struct ceph_monmap *monmap = NULL, *old = monc->monmap;
4419     void *p, *end;
4420     + int had_debugfs_info, init_debugfs = 0;
4421    
4422     mutex_lock(&monc->mutex);
4423    
4424     + had_debugfs_info = have_debugfs_info(monc);
4425     +
4426     dout("handle_monmap\n");
4427     p = msg->front.iov_base;
4428     end = p + msg->front.iov_len;
4429     @@ -342,12 +358,22 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
4430    
4431     if (!client->have_fsid) {
4432     client->have_fsid = true;
4433     + if (!had_debugfs_info && have_debugfs_info(monc)) {
4434     + pr_info("client%lld fsid %pU\n",
4435     + ceph_client_id(monc->client),
4436     + &monc->client->fsid);
4437     + init_debugfs = 1;
4438     + }
4439     mutex_unlock(&monc->mutex);
4440     - /*
4441     - * do debugfs initialization without mutex to avoid
4442     - * creating a locking dependency
4443     - */
4444     - ceph_debugfs_client_init(client);
4445     +
4446     + if (init_debugfs) {
4447     + /*
4448     + * do debugfs initialization without mutex to avoid
4449     + * creating a locking dependency
4450     + */
4451     + ceph_debugfs_client_init(monc->client);
4452     + }
4453     +
4454     goto out_unlocked;
4455     }
4456     out:
4457     @@ -439,6 +465,7 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con,
4458     m = NULL;
4459     } else {
4460     dout("get_generic_reply %lld got %p\n", tid, req->reply);
4461     + *skip = 0;
4462     m = ceph_msg_get(req->reply);
4463     /*
4464     * we don't need to track the connection reading into
4465     @@ -461,7 +488,7 @@ static int do_generic_request(struct ceph_mon_client *monc,
4466     req->request->hdr.tid = cpu_to_le64(req->tid);
4467     __insert_generic_request(monc, req);
4468     monc->num_generic_requests++;
4469     - ceph_con_send(monc->con, ceph_msg_get(req->request));
4470     + ceph_con_send(&monc->con, ceph_msg_get(req->request));
4471     mutex_unlock(&monc->mutex);
4472    
4473     err = wait_for_completion_interruptible(&req->completion);
4474     @@ -684,8 +711,9 @@ static void __resend_generic_request(struct ceph_mon_client *monc)
4475    
4476     for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) {
4477     req = rb_entry(p, struct ceph_mon_generic_request, node);
4478     - ceph_con_revoke(monc->con, req->request);
4479     - ceph_con_send(monc->con, ceph_msg_get(req->request));
4480     + ceph_msg_revoke(req->request);
4481     + ceph_msg_revoke_incoming(req->reply);
4482     + ceph_con_send(&monc->con, ceph_msg_get(req->request));
4483     }
4484     }
4485    
4486     @@ -705,7 +733,7 @@ static void delayed_work(struct work_struct *work)
4487     __close_session(monc);
4488     __open_session(monc); /* continue hunting */
4489     } else {
4490     - ceph_con_keepalive(monc->con);
4491     + ceph_con_keepalive(&monc->con);
4492    
4493     __validate_auth(monc);
4494    
4495     @@ -760,19 +788,12 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
4496     goto out;
4497    
4498     /* connection */
4499     - monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL);
4500     - if (!monc->con)
4501     - goto out_monmap;
4502     - ceph_con_init(monc->client->msgr, monc->con);
4503     - monc->con->private = monc;
4504     - monc->con->ops = &mon_con_ops;
4505     -
4506     /* authentication */
4507     monc->auth = ceph_auth_init(cl->options->name,
4508     cl->options->key);
4509     if (IS_ERR(monc->auth)) {
4510     err = PTR_ERR(monc->auth);
4511     - goto out_con;
4512     + goto out_monmap;
4513     }
4514     monc->auth->want_keys =
4515     CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON |
4516     @@ -801,6 +822,9 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
4517     if (!monc->m_auth)
4518     goto out_auth_reply;
4519    
4520     + ceph_con_init(&monc->con, monc, &mon_con_ops,
4521     + &monc->client->msgr);
4522     +
4523     monc->cur_mon = -1;
4524     monc->hunting = true;
4525     monc->sub_renew_after = jiffies;
4526     @@ -824,8 +848,6 @@ out_subscribe_ack:
4527     ceph_msg_put(monc->m_subscribe_ack);
4528     out_auth:
4529     ceph_auth_destroy(monc->auth);
4530     -out_con:
4531     - monc->con->ops->put(monc->con);
4532     out_monmap:
4533     kfree(monc->monmap);
4534     out:
4535     @@ -841,12 +863,16 @@ void ceph_monc_stop(struct ceph_mon_client *monc)
4536     mutex_lock(&monc->mutex);
4537     __close_session(monc);
4538    
4539     - monc->con->private = NULL;
4540     - monc->con->ops->put(monc->con);
4541     - monc->con = NULL;
4542     -
4543     mutex_unlock(&monc->mutex);
4544    
4545     + /*
4546     + * flush msgr queue before we destroy ourselves to ensure that:
4547     + * - any work that references our embedded con is finished.
4548     + * - any osd_client or other work that may reference an authorizer
4549     + * finishes before we shut down the auth subsystem.
4550     + */
4551     + ceph_msgr_flush();
4552     +
4553     ceph_auth_destroy(monc->auth);
4554    
4555     ceph_msg_put(monc->m_auth);
4556     @@ -863,8 +889,10 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
4557     {
4558     int ret;
4559     int was_auth = 0;
4560     + int had_debugfs_info, init_debugfs = 0;
4561    
4562     mutex_lock(&monc->mutex);
4563     + had_debugfs_info = have_debugfs_info(monc);
4564     if (monc->auth->ops)
4565     was_auth = monc->auth->ops->is_authenticated(monc->auth);
4566     monc->pending_auth = 0;
4567     @@ -880,14 +908,29 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
4568     } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
4569     dout("authenticated, starting session\n");
4570    
4571     - monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
4572     - monc->client->msgr->inst.name.num =
4573     + monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
4574     + monc->client->msgr.inst.name.num =
4575     cpu_to_le64(monc->auth->global_id);
4576    
4577     __send_subscribe(monc);
4578     __resend_generic_request(monc);
4579     }
4580     +
4581     + if (!had_debugfs_info && have_debugfs_info(monc)) {
4582     + pr_info("client%lld fsid %pU\n",
4583     + ceph_client_id(monc->client),
4584     + &monc->client->fsid);
4585     + init_debugfs = 1;
4586     + }
4587     mutex_unlock(&monc->mutex);
4588     +
4589     + if (init_debugfs) {
4590     + /*
4591     + * do debugfs initialization without mutex to avoid
4592     + * creating a locking dependency
4593     + */
4594     + ceph_debugfs_client_init(monc->client);
4595     + }
4596     }
4597    
4598     static int __validate_auth(struct ceph_mon_client *monc)
4599     @@ -992,6 +1035,8 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
4600     case CEPH_MSG_MDS_MAP:
4601     case CEPH_MSG_OSD_MAP:
4602     m = ceph_msg_new(type, front_len, GFP_NOFS, false);
4603     + if (!m)
4604     + return NULL; /* ENOMEM--return skip == 0 */
4605     break;
4606     }
4607    
4608     @@ -1021,7 +1066,7 @@ static void mon_fault(struct ceph_connection *con)
4609     if (!monc->hunting)
4610     pr_info("mon%d %s session lost, "
4611     "hunting for new mon\n", monc->cur_mon,
4612     - ceph_pr_addr(&monc->con->peer_addr.in_addr));
4613     + ceph_pr_addr(&monc->con.peer_addr.in_addr));
4614    
4615     __close_session(monc);
4616     if (!monc->hunting) {
4617     @@ -1036,9 +1081,23 @@ out:
4618     mutex_unlock(&monc->mutex);
4619     }
4620    
4621     +/*
4622     + * We can ignore refcounting on the connection struct, as all references
4623     + * will come from the messenger workqueue, which is drained prior to
4624     + * mon_client destruction.
4625     + */
4626     +static struct ceph_connection *con_get(struct ceph_connection *con)
4627     +{
4628     + return con;
4629     +}
4630     +
4631     +static void con_put(struct ceph_connection *con)
4632     +{
4633     +}
4634     +
4635     static const struct ceph_connection_operations mon_con_ops = {
4636     - .get = ceph_con_get,
4637     - .put = ceph_con_put,
4638     + .get = con_get,
4639     + .put = con_put,
4640     .dispatch = dispatch,
4641     .fault = mon_fault,
4642     .alloc_msg = mon_alloc_msg,
4643     diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
4644     index 11d5f41..ddec1c1 100644
4645     --- a/net/ceph/msgpool.c
4646     +++ b/net/ceph/msgpool.c
4647     @@ -12,7 +12,7 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg)
4648     struct ceph_msgpool *pool = arg;
4649     struct ceph_msg *msg;
4650    
4651     - msg = ceph_msg_new(0, pool->front_len, gfp_mask, true);
4652     + msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true);
4653     if (!msg) {
4654     dout("msgpool_alloc %s failed\n", pool->name);
4655     } else {
4656     @@ -32,10 +32,11 @@ static void msgpool_free(void *element, void *arg)
4657     ceph_msg_put(msg);
4658     }
4659    
4660     -int ceph_msgpool_init(struct ceph_msgpool *pool,
4661     +int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
4662     int front_len, int size, bool blocking, const char *name)
4663     {
4664     dout("msgpool %s init\n", name);
4665     + pool->type = type;
4666     pool->front_len = front_len;
4667     pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
4668     if (!pool->pool)
4669     @@ -61,7 +62,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
4670     WARN_ON(1);
4671    
4672     /* try to alloc a fresh message */
4673     - return ceph_msg_new(0, front_len, GFP_NOFS, false);
4674     + return ceph_msg_new(pool->type, front_len, GFP_NOFS, false);
4675     }
4676    
4677     msg = mempool_alloc(pool->pool, GFP_NOFS);
4678     diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
4679     index 5e25405..a79dbae 100644
4680     --- a/net/ceph/osd_client.c
4681     +++ b/net/ceph/osd_client.c
4682     @@ -52,7 +52,7 @@ static int op_has_extent(int op)
4683     op == CEPH_OSD_OP_WRITE);
4684     }
4685    
4686     -void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
4687     +int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
4688     struct ceph_file_layout *layout,
4689     u64 snapid,
4690     u64 off, u64 *plen, u64 *bno,
4691     @@ -62,12 +62,15 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
4692     struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
4693     u64 orig_len = *plen;
4694     u64 objoff, objlen; /* extent in object */
4695     + int r;
4696    
4697     reqhead->snapid = cpu_to_le64(snapid);
4698    
4699     /* object extent? */
4700     - ceph_calc_file_object_mapping(layout, off, plen, bno,
4701     - &objoff, &objlen);
4702     + r = ceph_calc_file_object_mapping(layout, off, plen, bno,
4703     + &objoff, &objlen);
4704     + if (r < 0)
4705     + return r;
4706     if (*plen < orig_len)
4707     dout(" skipping last %llu, final file extent %llu~%llu\n",
4708     orig_len - *plen, off, *plen);
4709     @@ -83,7 +86,7 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
4710    
4711     dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
4712     *bno, objoff, objlen, req->r_num_pages);
4713     -
4714     + return 0;
4715     }
4716     EXPORT_SYMBOL(ceph_calc_raw_layout);
4717    
4718     @@ -112,20 +115,25 @@ EXPORT_SYMBOL(ceph_calc_raw_layout);
4719     *
4720     * fill osd op in request message.
4721     */
4722     -static void calc_layout(struct ceph_osd_client *osdc,
4723     - struct ceph_vino vino,
4724     - struct ceph_file_layout *layout,
4725     - u64 off, u64 *plen,
4726     - struct ceph_osd_request *req,
4727     - struct ceph_osd_req_op *op)
4728     +static int calc_layout(struct ceph_osd_client *osdc,
4729     + struct ceph_vino vino,
4730     + struct ceph_file_layout *layout,
4731     + u64 off, u64 *plen,
4732     + struct ceph_osd_request *req,
4733     + struct ceph_osd_req_op *op)
4734     {
4735     u64 bno;
4736     + int r;
4737    
4738     - ceph_calc_raw_layout(osdc, layout, vino.snap, off,
4739     - plen, &bno, req, op);
4740     + r = ceph_calc_raw_layout(osdc, layout, vino.snap, off,
4741     + plen, &bno, req, op);
4742     + if (r < 0)
4743     + return r;
4744    
4745     snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno);
4746     req->r_oid_len = strlen(req->r_oid);
4747     +
4748     + return r;
4749     }
4750    
4751     /*
4752     @@ -139,15 +147,14 @@ void ceph_osdc_release_request(struct kref *kref)
4753    
4754     if (req->r_request)
4755     ceph_msg_put(req->r_request);
4756     - if (req->r_reply)
4757     - ceph_msg_put(req->r_reply);
4758     if (req->r_con_filling_msg) {
4759     - dout("release_request revoking pages %p from con %p\n",
4760     + dout("%s revoking pages %p from con %p\n", __func__,
4761     req->r_pages, req->r_con_filling_msg);
4762     - ceph_con_revoke_message(req->r_con_filling_msg,
4763     - req->r_reply);
4764     - ceph_con_put(req->r_con_filling_msg);
4765     + ceph_msg_revoke_incoming(req->r_reply);
4766     + req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
4767     }
4768     + if (req->r_reply)
4769     + ceph_msg_put(req->r_reply);
4770     if (req->r_own_pages)
4771     ceph_release_page_vector(req->r_pages,
4772     req->r_num_pages);
4773     @@ -243,6 +250,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
4774     }
4775     ceph_pagelist_init(req->r_trail);
4776     }
4777     +
4778     /* create request message; allow space for oid */
4779     msg_size += MAX_OBJ_NAME_SIZE;
4780     if (snapc)
4781     @@ -256,7 +264,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
4782     return NULL;
4783     }
4784    
4785     - msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP);
4786     memset(msg->front.iov_base, 0, msg->front.iov_len);
4787    
4788     req->r_request = msg;
4789     @@ -278,7 +285,7 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
4790     {
4791     dst->op = cpu_to_le16(src->op);
4792    
4793     - switch (dst->op) {
4794     + switch (src->op) {
4795     case CEPH_OSD_OP_READ:
4796     case CEPH_OSD_OP_WRITE:
4797     dst->extent.offset =
4798     @@ -624,7 +631,7 @@ static void osd_reset(struct ceph_connection *con)
4799     /*
4800     * Track open sessions with osds.
4801     */
4802     -static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
4803     +static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
4804     {
4805     struct ceph_osd *osd;
4806    
4807     @@ -634,15 +641,13 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
4808    
4809     atomic_set(&osd->o_ref, 1);
4810     osd->o_osdc = osdc;
4811     + osd->o_osd = onum;
4812     INIT_LIST_HEAD(&osd->o_requests);
4813     INIT_LIST_HEAD(&osd->o_linger_requests);
4814     INIT_LIST_HEAD(&osd->o_osd_lru);
4815     osd->o_incarnation = 1;
4816    
4817     - ceph_con_init(osdc->client->msgr, &osd->o_con);
4818     - osd->o_con.private = osd;
4819     - osd->o_con.ops = &osd_con_ops;
4820     - osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD;
4821     + ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
4822    
4823     INIT_LIST_HEAD(&osd->o_keepalive_item);
4824     return osd;
4825     @@ -664,11 +669,11 @@ static void put_osd(struct ceph_osd *osd)
4826     {
4827     dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
4828     atomic_read(&osd->o_ref) - 1);
4829     - if (atomic_dec_and_test(&osd->o_ref)) {
4830     + if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) {
4831     struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
4832    
4833     - if (osd->o_authorizer)
4834     - ac->ops->destroy_authorizer(ac, osd->o_authorizer);
4835     + if (ac->ops && ac->ops->destroy_authorizer)
4836     + ac->ops->destroy_authorizer(ac, osd->o_auth.authorizer);
4837     kfree(osd);
4838     }
4839     }
4840     @@ -752,7 +757,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
4841     ret = -EAGAIN;
4842     } else {
4843     ceph_con_close(&osd->o_con);
4844     - ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]);
4845     + ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd,
4846     + &osdc->osdmap->osd_addr[osd->o_osd]);
4847     osd->o_incarnation++;
4848     }
4849     return ret;
4850     @@ -841,13 +847,19 @@ static void register_request(struct ceph_osd_client *osdc,
4851     static void __unregister_request(struct ceph_osd_client *osdc,
4852     struct ceph_osd_request *req)
4853     {
4854     + if (RB_EMPTY_NODE(&req->r_node)) {
4855     + dout("__unregister_request %p tid %lld not registered\n",
4856     + req, req->r_tid);
4857     + return;
4858     + }
4859     +
4860     dout("__unregister_request %p tid %lld\n", req, req->r_tid);
4861     rb_erase(&req->r_node, &osdc->requests);
4862     osdc->num_requests--;
4863    
4864     if (req->r_osd) {
4865     /* make sure the original request isn't in flight. */
4866     - ceph_con_revoke(&req->r_osd->o_con, req->r_request);
4867     + ceph_msg_revoke(req->r_request);
4868    
4869     list_del_init(&req->r_osd_item);
4870     if (list_empty(&req->r_osd->o_requests) &&
4871     @@ -874,7 +886,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
4872     static void __cancel_request(struct ceph_osd_request *req)
4873     {
4874     if (req->r_sent && req->r_osd) {
4875     - ceph_con_revoke(&req->r_osd->o_con, req->r_request);
4876     + ceph_msg_revoke(req->r_request);
4877     req->r_sent = 0;
4878     }
4879     }
4880     @@ -884,7 +896,9 @@ static void __register_linger_request(struct ceph_osd_client *osdc,
4881     {
4882     dout("__register_linger_request %p\n", req);
4883     list_add_tail(&req->r_linger_item, &osdc->req_linger);
4884     - list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests);
4885     + if (req->r_osd)
4886     + list_add_tail(&req->r_linger_osd,
4887     + &req->r_osd->o_linger_requests);
4888     }
4889    
4890     static void __unregister_linger_request(struct ceph_osd_client *osdc,
4891     @@ -992,18 +1006,18 @@ static int __map_request(struct ceph_osd_client *osdc,
4892     req->r_osd = __lookup_osd(osdc, o);
4893     if (!req->r_osd && o >= 0) {
4894     err = -ENOMEM;
4895     - req->r_osd = create_osd(osdc);
4896     + req->r_osd = create_osd(osdc, o);
4897     if (!req->r_osd) {
4898     list_move(&req->r_req_lru_item, &osdc->req_notarget);
4899     goto out;
4900     }
4901    
4902     dout("map_request osd %p is osd%d\n", req->r_osd, o);
4903     - req->r_osd->o_osd = o;
4904     - req->r_osd->o_con.peer_name.num = cpu_to_le64(o);
4905     __insert_osd(osdc, req->r_osd);
4906    
4907     - ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]);
4908     + ceph_con_open(&req->r_osd->o_con,
4909     + CEPH_ENTITY_TYPE_OSD, o,
4910     + &osdc->osdmap->osd_addr[o]);
4911     }
4912    
4913     if (req->r_osd) {
4914     @@ -1210,7 +1224,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
4915     if (req->r_con_filling_msg == con && req->r_reply == msg) {
4916     dout(" dropping con_filling_msg ref %p\n", con);
4917     req->r_con_filling_msg = NULL;
4918     - ceph_con_put(con);
4919     + con->ops->put(con);
4920     }
4921    
4922     if (!req->r_got_reply) {
4923     @@ -1298,8 +1312,9 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
4924    
4925     dout("kick_requests %s\n", force_resend ? " (force resend)" : "");
4926     mutex_lock(&osdc->request_mutex);
4927     - for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
4928     + for (p = rb_first(&osdc->requests); p; ) {
4929     req = rb_entry(p, struct ceph_osd_request, r_node);
4930     + p = rb_next(p);
4931     err = __map_request(osdc, req, force_resend);
4932     if (err < 0)
4933     continue; /* error */
4934     @@ -1307,10 +1322,23 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
4935     dout("%p tid %llu maps to no osd\n", req, req->r_tid);
4936     needmap++; /* request a newer map */
4937     } else if (err > 0) {
4938     - dout("%p tid %llu requeued on osd%d\n", req, req->r_tid,
4939     - req->r_osd ? req->r_osd->o_osd : -1);
4940     - if (!req->r_linger)
4941     + if (!req->r_linger) {
4942     + dout("%p tid %llu requeued on osd%d\n", req,
4943     + req->r_tid,
4944     + req->r_osd ? req->r_osd->o_osd : -1);
4945     req->r_flags |= CEPH_OSD_FLAG_RETRY;
4946     + }
4947     + }
4948     + if (req->r_linger && list_empty(&req->r_linger_item)) {
4949     + /*
4950     + * register as a linger so that we will
4951     + * re-submit below and get a new tid
4952     + */
4953     + dout("%p tid %llu restart on osd%d\n",
4954     + req, req->r_tid,
4955     + req->r_osd ? req->r_osd->o_osd : -1);
4956     + __register_linger_request(osdc, req);
4957     + __unregister_request(osdc, req);
4958     }
4959     }
4960    
4961     @@ -1385,7 +1413,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
4962     epoch, maplen);
4963     newmap = osdmap_apply_incremental(&p, next,
4964     osdc->osdmap,
4965     - osdc->client->msgr);
4966     + &osdc->client->msgr);
4967     if (IS_ERR(newmap)) {
4968     err = PTR_ERR(newmap);
4969     goto bad;
4970     @@ -1833,11 +1861,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
4971     if (!osdc->req_mempool)
4972     goto out;
4973    
4974     - err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true,
4975     + err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP,
4976     + OSD_OP_FRONT_LEN, 10, true,
4977     "osd_op");
4978     if (err < 0)
4979     goto out_mempool;
4980     - err = ceph_msgpool_init(&osdc->msgpool_op_reply,
4981     + err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY,
4982     OSD_OPREPLY_FRONT_LEN, 10, true,
4983     "osd_op_reply");
4984     if (err < 0)
4985     @@ -2019,10 +2048,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
4986     }
4987    
4988     if (req->r_con_filling_msg) {
4989     - dout("get_reply revoking msg %p from old con %p\n",
4990     + dout("%s revoking msg %p from old con %p\n", __func__,
4991     req->r_reply, req->r_con_filling_msg);
4992     - ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply);
4993     - ceph_con_put(req->r_con_filling_msg);
4994     + ceph_msg_revoke_incoming(req->r_reply);
4995     + req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
4996     req->r_con_filling_msg = NULL;
4997     }
4998    
4999     @@ -2057,7 +2086,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
5000     #endif
5001     }
5002     *skip = 0;
5003     - req->r_con_filling_msg = ceph_con_get(con);
5004     + req->r_con_filling_msg = con->ops->get(con);
5005     dout("get_reply tid %lld %p\n", tid, m);
5006    
5007     out:
5008     @@ -2074,6 +2103,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
5009     int type = le16_to_cpu(hdr->type);
5010     int front = le32_to_cpu(hdr->front_len);
5011    
5012     + *skip = 0;
5013     switch (type) {
5014     case CEPH_MSG_OSD_MAP:
5015     case CEPH_MSG_WATCH_NOTIFY:
5016     @@ -2108,37 +2138,32 @@ static void put_osd_con(struct ceph_connection *con)
5017     /*
5018     * authentication
5019     */
5020     -static int get_authorizer(struct ceph_connection *con,
5021     - void **buf, int *len, int *proto,
5022     - void **reply_buf, int *reply_len, int force_new)
5023     +/*
5024     + * Note: returned pointer is the address of a structure that's
5025     + * managed separately. Caller must *not* attempt to free it.
5026     + */
5027     +static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
5028     + int *proto, int force_new)
5029     {
5030     struct ceph_osd *o = con->private;
5031     struct ceph_osd_client *osdc = o->o_osdc;
5032     struct ceph_auth_client *ac = osdc->client->monc.auth;
5033     - int ret = 0;
5034     + struct ceph_auth_handshake *auth = &o->o_auth;
5035    
5036     - if (force_new && o->o_authorizer) {
5037     - ac->ops->destroy_authorizer(ac, o->o_authorizer);
5038     - o->o_authorizer = NULL;
5039     - }
5040     - if (o->o_authorizer == NULL) {
5041     - ret = ac->ops->create_authorizer(
5042     - ac, CEPH_ENTITY_TYPE_OSD,
5043     - &o->o_authorizer,
5044     - &o->o_authorizer_buf,
5045     - &o->o_authorizer_buf_len,
5046     - &o->o_authorizer_reply_buf,
5047     - &o->o_authorizer_reply_buf_len);
5048     + if (force_new && auth->authorizer) {
5049     + if (ac->ops && ac->ops->destroy_authorizer)
5050     + ac->ops->destroy_authorizer(ac, auth->authorizer);
5051     + auth->authorizer = NULL;
5052     + }
5053     + if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) {
5054     + int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
5055     + auth);
5056     if (ret)
5057     - return ret;
5058     + return ERR_PTR(ret);
5059     }
5060     -
5061     *proto = ac->protocol;
5062     - *buf = o->o_authorizer_buf;
5063     - *len = o->o_authorizer_buf_len;
5064     - *reply_buf = o->o_authorizer_reply_buf;
5065     - *reply_len = o->o_authorizer_reply_buf_len;
5066     - return 0;
5067     +
5068     + return auth;
5069     }
5070    
5071    
5072     @@ -2148,7 +2173,11 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
5073     struct ceph_osd_client *osdc = o->o_osdc;
5074     struct ceph_auth_client *ac = osdc->client->monc.auth;
5075    
5076     - return ac->ops->verify_authorizer_reply(ac, o->o_authorizer, len);
5077     + /*
5078     + * XXX If ac->ops or ac->ops->verify_authorizer_reply is null,
5079     + * XXX which do we do: succeed or fail?
5080     + */
5081     + return ac->ops->verify_authorizer_reply(ac, o->o_auth.authorizer, len);
5082     }
5083    
5084     static int invalidate_authorizer(struct ceph_connection *con)
5085     @@ -2157,7 +2186,7 @@ static int invalidate_authorizer(struct ceph_connection *con)
5086     struct ceph_osd_client *osdc = o->o_osdc;
5087     struct ceph_auth_client *ac = osdc->client->monc.auth;
5088    
5089     - if (ac->ops->invalidate_authorizer)
5090     + if (ac->ops && ac->ops->invalidate_authorizer)
5091     ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
5092    
5093     return ceph_monc_validate_auth(&osdc->client->monc);
5094     diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
5095     index 29ad46e..430076e 100644
5096     --- a/net/ceph/osdmap.c
5097     +++ b/net/ceph/osdmap.c
5098     @@ -495,15 +495,16 @@ static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
5099     ceph_decode_32_safe(p, end, pool, bad);
5100     ceph_decode_32_safe(p, end, len, bad);
5101     dout(" pool %d len %d\n", pool, len);
5102     + ceph_decode_need(p, end, len, bad);
5103     pi = __lookup_pg_pool(&map->pg_pools, pool);
5104     if (pi) {
5105     + char *name = kstrndup(*p, len, GFP_NOFS);
5106     +
5107     + if (!name)
5108     + return -ENOMEM;
5109     kfree(pi->name);
5110     - pi->name = kmalloc(len + 1, GFP_NOFS);
5111     - if (pi->name) {
5112     - memcpy(pi->name, *p, len);
5113     - pi->name[len] = '\0';
5114     - dout(" name is %s\n", pi->name);
5115     - }
5116     + pi->name = name;
5117     + dout(" name is %s\n", pi->name);
5118     }
5119     *p += len;
5120     }
5121     @@ -673,6 +674,9 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
5122     ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
5123     ceph_decode_copy(p, &pgid, sizeof(pgid));
5124     n = ceph_decode_32(p);
5125     + err = -EINVAL;
5126     + if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
5127     + goto bad;
5128     ceph_decode_need(p, end, n * sizeof(u32), bad);
5129     err = -ENOMEM;
5130     pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS);
5131     @@ -890,8 +894,16 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
5132     pglen = ceph_decode_32(p);
5133    
5134     if (pglen) {
5135     - /* insert */
5136     ceph_decode_need(p, end, pglen*sizeof(u32), bad);
5137     +
5138     + /* removing existing (if any) */
5139     + (void) __remove_pg_mapping(&map->pg_temp, pgid);
5140     +
5141     + /* insert */
5142     + if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) {
5143     + err = -EINVAL;
5144     + goto bad;
5145     + }
5146     pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS);
5147     if (!pg) {
5148     err = -ENOMEM;
5149     @@ -940,7 +952,7 @@ bad:
5150     * for now, we write only a single su, until we can
5151     * pass a stride back to the caller.
5152     */
5153     -void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
5154     +int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
5155     u64 off, u64 *plen,
5156     u64 *ono,
5157     u64 *oxoff, u64 *oxlen)
5158     @@ -954,11 +966,17 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
5159    
5160     dout("mapping %llu~%llu osize %u fl_su %u\n", off, *plen,
5161     osize, su);
5162     + if (su == 0 || sc == 0)
5163     + goto invalid;
5164     su_per_object = osize / su;
5165     + if (su_per_object == 0)
5166     + goto invalid;
5167     dout("osize %u / su %u = su_per_object %u\n", osize, su,
5168     su_per_object);
5169    
5170     - BUG_ON((su & ~PAGE_MASK) != 0);
5171     + if ((su & ~PAGE_MASK) != 0)
5172     + goto invalid;
5173     +
5174     /* bl = *off / su; */
5175     t = off;
5176     do_div(t, su);
5177     @@ -986,6 +1004,14 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
5178     *plen = *oxlen;
5179    
5180     dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
5181     + return 0;
5182     +
5183     +invalid:
5184     + dout(" invalid layout\n");
5185     + *ono = 0;
5186     + *oxoff = 0;
5187     + *oxlen = 0;
5188     + return -EINVAL;
5189     }
5190     EXPORT_SYMBOL(ceph_calc_file_object_mapping);
5191    
5192     diff --git a/net/core/dev.c b/net/core/dev.c
5193     index 24a21f3..eb858dc 100644
5194     --- a/net/core/dev.c
5195     +++ b/net/core/dev.c
5196     @@ -2763,8 +2763,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
5197     if (unlikely(tcpu != next_cpu) &&
5198     (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
5199     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
5200     - rflow->last_qtail)) >= 0))
5201     + rflow->last_qtail)) >= 0)) {
5202     + tcpu = next_cpu;
5203     rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
5204     + }
5205    
5206     if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
5207     *rflowp = rflow;
5208     diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
5209     index 626698f..76f6d0b 100644
5210     --- a/net/core/dev_addr_lists.c
5211     +++ b/net/core/dev_addr_lists.c
5212     @@ -308,7 +308,8 @@ int dev_addr_del(struct net_device *dev, unsigned char *addr,
5213     */
5214     ha = list_first_entry(&dev->dev_addrs.list,
5215     struct netdev_hw_addr, list);
5216     - if (ha->addr == dev->dev_addr && ha->refcount == 1)
5217     + if (!memcmp(ha->addr, addr, dev->addr_len) &&
5218     + ha->type == addr_type && ha->refcount == 1)
5219     return -ENOENT;
5220    
5221     err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
5222     diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
5223     index 2fd0fba..59ef40a 100644
5224     --- a/net/ipv4/ip_sockglue.c
5225     +++ b/net/ipv4/ip_sockglue.c
5226     @@ -456,19 +456,28 @@ static int do_ip_setsockopt(struct sock *sk, int level,
5227     struct inet_sock *inet = inet_sk(sk);
5228     int val = 0, err;
5229    
5230     - if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) |
5231     - (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
5232     - (1<<IP_RETOPTS) | (1<<IP_TOS) |
5233     - (1<<IP_TTL) | (1<<IP_HDRINCL) |
5234     - (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
5235     - (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
5236     - (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
5237     - (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
5238     - optname == IP_UNICAST_IF ||
5239     - optname == IP_MULTICAST_TTL ||
5240     - optname == IP_MULTICAST_ALL ||
5241     - optname == IP_MULTICAST_LOOP ||
5242     - optname == IP_RECVORIGDSTADDR) {
5243     + switch (optname) {
5244     + case IP_PKTINFO:
5245     + case IP_RECVTTL:
5246     + case IP_RECVOPTS:
5247     + case IP_RECVTOS:
5248     + case IP_RETOPTS:
5249     + case IP_TOS:
5250     + case IP_TTL:
5251     + case IP_HDRINCL:
5252     + case IP_MTU_DISCOVER:
5253     + case IP_RECVERR:
5254     + case IP_ROUTER_ALERT:
5255     + case IP_FREEBIND:
5256     + case IP_PASSSEC:
5257     + case IP_TRANSPARENT:
5258     + case IP_MINTTL:
5259     + case IP_NODEFRAG:
5260     + case IP_UNICAST_IF:
5261     + case IP_MULTICAST_TTL:
5262     + case IP_MULTICAST_ALL:
5263     + case IP_MULTICAST_LOOP:
5264     + case IP_RECVORIGDSTADDR:
5265     if (optlen >= sizeof(int)) {
5266     if (get_user(val, (int __user *) optval))
5267     return -EFAULT;
5268     diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
5269     index 3828a42..da4098f 100644
5270     --- a/net/ipv4/netfilter/nf_nat_standalone.c
5271     +++ b/net/ipv4/netfilter/nf_nat_standalone.c
5272     @@ -194,7 +194,8 @@ nf_nat_out(unsigned int hooknum,
5273    
5274     if ((ct->tuplehash[dir].tuple.src.u3.ip !=
5275     ct->tuplehash[!dir].tuple.dst.u3.ip) ||
5276     - (ct->tuplehash[dir].tuple.src.u.all !=
5277     + (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
5278     + ct->tuplehash[dir].tuple.src.u.all !=
5279     ct->tuplehash[!dir].tuple.dst.u.all)
5280     )
5281     return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP;
5282     @@ -230,7 +231,8 @@ nf_nat_local_fn(unsigned int hooknum,
5283     ret = NF_DROP;
5284     }
5285     #ifdef CONFIG_XFRM
5286     - else if (ct->tuplehash[dir].tuple.dst.u.all !=
5287     + else if (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
5288     + ct->tuplehash[dir].tuple.dst.u.all !=
5289     ct->tuplehash[!dir].tuple.src.u.all)
5290     if (ip_xfrm_me_harder(skb))
5291     ret = NF_DROP;
5292     diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
5293     index 63dd1f8..34c1109 100644
5294     --- a/net/ipv6/ipv6_sockglue.c
5295     +++ b/net/ipv6/ipv6_sockglue.c
5296     @@ -828,6 +828,7 @@ pref_skip_coa:
5297     if (val < 0 || val > 255)
5298     goto e_inval;
5299     np->min_hopcount = val;
5300     + retv = 0;
5301     break;
5302     case IPV6_DONTFRAG:
5303     np->dontfrag = valbool;
5304     diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
5305     index db8fae5..498e87b 100644
5306     --- a/net/mac80211/ieee80211_i.h
5307     +++ b/net/mac80211/ieee80211_i.h
5308     @@ -1297,6 +1297,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
5309     struct net_device *dev);
5310     netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
5311     struct net_device *dev);
5312     +void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
5313     + struct sk_buff_head *skbs);
5314    
5315     /* HT */
5316     bool ieee80111_cfg_override_disables_ht40(struct ieee80211_sub_if_data *sdata);
5317     diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
5318     index d93d39b..6d25d77 100644
5319     --- a/net/mac80211/sta_info.c
5320     +++ b/net/mac80211/sta_info.c
5321     @@ -738,8 +738,8 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
5322    
5323     for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
5324     local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]);
5325     - __skb_queue_purge(&sta->ps_tx_buf[ac]);
5326     - __skb_queue_purge(&sta->tx_filtered[ac]);
5327     + ieee80211_purge_tx_queue(&local->hw, &sta->ps_tx_buf[ac]);
5328     + ieee80211_purge_tx_queue(&local->hw, &sta->tx_filtered[ac]);
5329     }
5330    
5331     #ifdef CONFIG_MAC80211_MESH
5332     @@ -774,7 +774,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
5333     tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]);
5334     if (!tid_tx)
5335     continue;
5336     - __skb_queue_purge(&tid_tx->pending);
5337     + ieee80211_purge_tx_queue(&local->hw, &tid_tx->pending);
5338     kfree(tid_tx);
5339     }
5340    
5341     @@ -959,6 +959,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
5342     struct ieee80211_local *local = sdata->local;
5343     struct sk_buff_head pending;
5344     int filtered = 0, buffered = 0, ac;
5345     + unsigned long flags;
5346    
5347     clear_sta_flag(sta, WLAN_STA_SP);
5348    
5349     @@ -974,12 +975,16 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
5350     for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
5351     int count = skb_queue_len(&pending), tmp;
5352    
5353     + spin_lock_irqsave(&sta->tx_filtered[ac].lock, flags);
5354     skb_queue_splice_tail_init(&sta->tx_filtered[ac], &pending);
5355     + spin_unlock_irqrestore(&sta->tx_filtered[ac].lock, flags);
5356     tmp = skb_queue_len(&pending);
5357     filtered += tmp - count;
5358     count = tmp;
5359    
5360     + spin_lock_irqsave(&sta->ps_tx_buf[ac].lock, flags);
5361     skb_queue_splice_tail_init(&sta->ps_tx_buf[ac], &pending);
5362     + spin_unlock_irqrestore(&sta->ps_tx_buf[ac].lock, flags);
5363     tmp = skb_queue_len(&pending);
5364     buffered += tmp - count;
5365     }
5366     diff --git a/net/mac80211/status.c b/net/mac80211/status.c
5367     index 5f8f89e..47b117f 100644
5368     --- a/net/mac80211/status.c
5369     +++ b/net/mac80211/status.c
5370     @@ -660,3 +660,12 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb)
5371     dev_kfree_skb_any(skb);
5372     }
5373     EXPORT_SYMBOL(ieee80211_free_txskb);
5374     +
5375     +void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
5376     + struct sk_buff_head *skbs)
5377     +{
5378     + struct sk_buff *skb;
5379     +
5380     + while ((skb = __skb_dequeue(skbs)))
5381     + ieee80211_free_txskb(hw, skb);
5382     +}
5383     diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
5384     index e76facc..eace766 100644
5385     --- a/net/mac80211/tx.c
5386     +++ b/net/mac80211/tx.c
5387     @@ -1357,7 +1357,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
5388     if (tx->skb)
5389     dev_kfree_skb(tx->skb);
5390     else
5391     - __skb_queue_purge(&tx->skbs);
5392     + ieee80211_purge_tx_queue(&tx->local->hw, &tx->skbs);
5393     return -1;
5394     } else if (unlikely(res == TX_QUEUED)) {
5395     I802_DEBUG_INC(tx->local->tx_handlers_queued);
5396     @@ -2126,10 +2126,13 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
5397     */
5398     void ieee80211_clear_tx_pending(struct ieee80211_local *local)
5399     {
5400     + struct sk_buff *skb;
5401     int i;
5402    
5403     - for (i = 0; i < local->hw.queues; i++)
5404     - skb_queue_purge(&local->pending[i]);
5405     + for (i = 0; i < local->hw.queues; i++) {
5406     + while ((skb = skb_dequeue(&local->pending[i])) != NULL)
5407     + ieee80211_free_txskb(&local->hw, skb);
5408     + }
5409     }
5410    
5411     /*
5412     diff --git a/net/mac80211/util.c b/net/mac80211/util.c
5413     index 266d092..73ef163 100644
5414     --- a/net/mac80211/util.c
5415     +++ b/net/mac80211/util.c
5416     @@ -1341,6 +1341,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
5417     list_for_each_entry(sdata, &local->interfaces, list) {
5418     if (sdata->vif.type != NL80211_IFTYPE_STATION)
5419     continue;
5420     + if (!sdata->u.mgd.associated)
5421     + continue;
5422    
5423     ieee80211_send_nullfunc(local, sdata, 0);
5424     }
5425     diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
5426     index 0d07a1d..e022123 100644
5427     --- a/net/netfilter/nf_conntrack_proto_tcp.c
5428     +++ b/net/netfilter/nf_conntrack_proto_tcp.c
5429     @@ -158,21 +158,18 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
5430     * sCL -> sSS
5431     */
5432     /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
5433     -/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
5434     +/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
5435     /*
5436     * sNO -> sIV Too late and no reason to do anything
5437     * sSS -> sIV Client can't send SYN and then SYN/ACK
5438     * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open
5439     - * sSR -> sIG
5440     - * sES -> sIG Error: SYNs in window outside the SYN_SENT state
5441     - * are errors. Receiver will reply with RST
5442     - * and close the connection.
5443     - * Or we are not in sync and hold a dead connection.
5444     - * sFW -> sIG
5445     - * sCW -> sIG
5446     - * sLA -> sIG
5447     - * sTW -> sIG
5448     - * sCL -> sIG
5449     + * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open
5450     + * sES -> sIV Invalid SYN/ACK packets sent by the client
5451     + * sFW -> sIV
5452     + * sCW -> sIV
5453     + * sLA -> sIV
5454     + * sTW -> sIV
5455     + * sCL -> sIV
5456     */
5457     /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
5458     /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
5459     @@ -627,15 +624,9 @@ static bool tcp_in_window(const struct nf_conn *ct,
5460     ack = sack = receiver->td_end;
5461     }
5462    
5463     - if (seq == end
5464     - && (!tcph->rst
5465     - || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
5466     + if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
5467     /*
5468     - * Packets contains no data: we assume it is valid
5469     - * and check the ack value only.
5470     - * However RST segments are always validated by their
5471     - * SEQ number, except when seq == 0 (reset sent answering
5472     - * SYN.
5473     + * RST sent answering SYN.
5474     */
5475     seq = end = sender->td_end;
5476    
5477     diff --git a/net/wireless/reg.c b/net/wireless/reg.c
5478     index b01449f..4dc8347 100644
5479     --- a/net/wireless/reg.c
5480     +++ b/net/wireless/reg.c
5481     @@ -134,9 +134,8 @@ static const struct ieee80211_regdomain world_regdom = {
5482     .reg_rules = {
5483     /* IEEE 802.11b/g, channels 1..11 */
5484     REG_RULE(2412-10, 2462+10, 40, 6, 20, 0),
5485     - /* IEEE 802.11b/g, channels 12..13. No HT40
5486     - * channel fits here. */
5487     - REG_RULE(2467-10, 2472+10, 20, 6, 20,
5488     + /* IEEE 802.11b/g, channels 12..13. */
5489     + REG_RULE(2467-10, 2472+10, 40, 6, 20,
5490     NL80211_RRF_PASSIVE_SCAN |
5491     NL80211_RRF_NO_IBSS),
5492     /* IEEE 802.11 channel 14 - Only JP enables
5493     diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c
5494     index 8636585..04aa5c8 100644
5495     --- a/security/selinux/netnode.c
5496     +++ b/security/selinux/netnode.c
5497     @@ -174,7 +174,8 @@ static void sel_netnode_insert(struct sel_netnode *node)
5498     if (sel_netnode_hash[idx].size == SEL_NETNODE_HASH_BKT_LIMIT) {
5499     struct sel_netnode *tail;
5500     tail = list_entry(
5501     - rcu_dereference(sel_netnode_hash[idx].list.prev),
5502     + rcu_dereference_protected(sel_netnode_hash[idx].list.prev,
5503     + lockdep_is_held(&sel_netnode_lock)),
5504     struct sel_netnode, list);
5505     list_del_rcu(&tail->list);
5506     kfree_rcu(tail, rcu);
5507     diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
5508     index 7143393..e23ad3f 100644
5509     --- a/sound/pci/hda/patch_analog.c
5510     +++ b/sound/pci/hda/patch_analog.c
5511     @@ -544,6 +544,7 @@ static int ad198x_build_pcms(struct hda_codec *codec)
5512     if (spec->multiout.dig_out_nid) {
5513     info++;
5514     codec->num_pcms++;
5515     + codec->spdif_status_reset = 1;
5516     info->name = "AD198x Digital";
5517     info->pcm_type = HDA_PCM_TYPE_SPDIF;
5518     info->stream[SNDRV_PCM_STREAM_PLAYBACK] = ad198x_pcm_digital_playback;
5519     diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
5520     index 2bc6c51..057f95a2 100644
5521     --- a/sound/pci/hda/patch_cirrus.c
5522     +++ b/sound/pci/hda/patch_cirrus.c
5523     @@ -95,8 +95,8 @@ enum {
5524     #define CS420X_VENDOR_NID 0x11
5525     #define CS_DIG_OUT1_PIN_NID 0x10
5526     #define CS_DIG_OUT2_PIN_NID 0x15
5527     -#define CS_DMIC1_PIN_NID 0x12
5528     -#define CS_DMIC2_PIN_NID 0x0e
5529     +#define CS_DMIC1_PIN_NID 0x0e
5530     +#define CS_DMIC2_PIN_NID 0x12
5531    
5532     /* coef indices */
5533     #define IDX_SPDIF_STAT 0x0000
5534     @@ -1084,14 +1084,18 @@ static void init_input(struct hda_codec *codec)
5535     cs_automic(codec);
5536    
5537     coef = 0x000a; /* ADC1/2 - Digital and Analog Soft Ramp */
5538     + cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
5539     +
5540     + coef = cs_vendor_coef_get(codec, IDX_BEEP_CFG);
5541     if (is_active_pin(codec, CS_DMIC2_PIN_NID))
5542     - coef |= 0x0500; /* DMIC2 2 chan on, GPIO1 off */
5543     + coef |= 1 << 4; /* DMIC2 2 chan on, GPIO1 off */
5544     if (is_active_pin(codec, CS_DMIC1_PIN_NID))
5545     - coef |= 0x1800; /* DMIC1 2 chan on, GPIO0 off
5546     + coef |= 1 << 3; /* DMIC1 2 chan on, GPIO0 off
5547     * No effect if SPDIF_OUT2 is
5548     * selected in IDX_SPDIF_CTL.
5549     */
5550     - cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
5551     +
5552     + cs_vendor_coef_set(codec, IDX_BEEP_CFG, coef);
5553     } else {
5554     if (spec->mic_detect)
5555     cs_automic(codec);
5556     @@ -1112,7 +1116,7 @@ static const struct hda_verb cs_coef_init_verbs[] = {
5557     | 0x0400 /* Disable Coefficient Auto increment */
5558     )},
5559     /* Beep */
5560     - {0x11, AC_VERB_SET_COEF_INDEX, IDX_DAC_CFG},
5561     + {0x11, AC_VERB_SET_COEF_INDEX, IDX_BEEP_CFG},
5562     {0x11, AC_VERB_SET_PROC_COEF, 0x0007}, /* Enable Beep thru DAC1/2/3 */
5563    
5564     {} /* terminator */
5565     diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
5566     index 6ecf1d4..257fe87 100644
5567     --- a/sound/pci/hda/patch_realtek.c
5568     +++ b/sound/pci/hda/patch_realtek.c
5569     @@ -5458,6 +5458,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
5570     SND_PCI_QUIRK(0x106b, 0x4000, "MacbookPro 5,1", ALC889_FIXUP_IMAC91_VREF),
5571     SND_PCI_QUIRK(0x106b, 0x4100, "Macmini 3,1", ALC889_FIXUP_IMAC91_VREF),
5572     SND_PCI_QUIRK(0x106b, 0x4200, "Mac Pro 5,1", ALC885_FIXUP_MACPRO_GPIO),
5573     + SND_PCI_QUIRK(0x106b, 0x4300, "iMac 9,1", ALC889_FIXUP_IMAC91_VREF),
5574     SND_PCI_QUIRK(0x106b, 0x4600, "MacbookPro 5,2", ALC889_FIXUP_IMAC91_VREF),
5575     SND_PCI_QUIRK(0x106b, 0x4900, "iMac 9,1 Aluminum", ALC889_FIXUP_IMAC91_VREF),
5576     SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_IMAC91_VREF),
5577     @@ -7047,6 +7048,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = {
5578     .patch = patch_alc662 },
5579     { .id = 0x10ec0663, .name = "ALC663", .patch = patch_alc662 },
5580     { .id = 0x10ec0665, .name = "ALC665", .patch = patch_alc662 },
5581     + { .id = 0x10ec0668, .name = "ALC668", .patch = patch_alc662 },
5582     { .id = 0x10ec0670, .name = "ALC670", .patch = patch_alc662 },
5583     { .id = 0x10ec0680, .name = "ALC680", .patch = patch_alc680 },
5584     { .id = 0x10ec0880, .name = "ALC880", .patch = patch_alc880 },
5585     @@ -7064,6 +7066,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = {
5586     { .id = 0x10ec0889, .name = "ALC889", .patch = patch_alc882 },
5587     { .id = 0x10ec0892, .name = "ALC892", .patch = patch_alc662 },
5588     { .id = 0x10ec0899, .name = "ALC898", .patch = patch_alc882 },
5589     + { .id = 0x10ec0900, .name = "ALC1150", .patch = patch_alc882 },
5590     {} /* terminator */
5591     };
5592    
5593     diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
5594     index 3998d09b..9dafacd 100644
5595     --- a/sound/pci/hda/patch_via.c
5596     +++ b/sound/pci/hda/patch_via.c
5597     @@ -1868,11 +1868,11 @@ static int via_auto_fill_dac_nids(struct hda_codec *codec)
5598     {
5599     struct via_spec *spec = codec->spec;
5600     const struct auto_pin_cfg *cfg = &spec->autocfg;
5601     - int i, dac_num;
5602     + int i;
5603     hda_nid_t nid;
5604    
5605     + spec->multiout.num_dacs = 0;
5606     spec->multiout.dac_nids = spec->private_dac_nids;
5607     - dac_num = 0;
5608     for (i = 0; i < cfg->line_outs; i++) {
5609     hda_nid_t dac = 0;
5610     nid = cfg->line_out_pins[i];
5611     @@ -1883,16 +1883,13 @@ static int via_auto_fill_dac_nids(struct hda_codec *codec)
5612     if (!i && parse_output_path(codec, nid, dac, 1,
5613     &spec->out_mix_path))
5614     dac = spec->out_mix_path.path[0];
5615     - if (dac) {
5616     - spec->private_dac_nids[i] = dac;
5617     - dac_num++;
5618     - }
5619     + if (dac)
5620     + spec->private_dac_nids[spec->multiout.num_dacs++] = dac;
5621     }
5622     if (!spec->out_path[0].depth && spec->out_mix_path.depth) {
5623     spec->out_path[0] = spec->out_mix_path;
5624     spec->out_mix_path.depth = 0;
5625     }
5626     - spec->multiout.num_dacs = dac_num;
5627     return 0;
5628     }
5629    
5630     @@ -3668,6 +3665,18 @@ static void set_widgets_power_state_vt2002P(struct hda_codec *codec)
5631     update_power_state(codec, 0x21, AC_PWRST_D3);
5632     }
5633    
5634     +/* NIDs 0x24 and 0x33 on VT1802 have connections to non-existing NID 0x3e
5635     + * Replace this with mixer NID 0x1c
5636     + */
5637     +static void fix_vt1802_connections(struct hda_codec *codec)
5638     +{
5639     + static hda_nid_t conn_24[] = { 0x14, 0x1c };
5640     + static hda_nid_t conn_33[] = { 0x1c };
5641     +
5642     + snd_hda_override_conn_list(codec, 0x24, ARRAY_SIZE(conn_24), conn_24);
5643     + snd_hda_override_conn_list(codec, 0x33, ARRAY_SIZE(conn_33), conn_33);
5644     +}
5645     +
5646     /* patch for vt2002P */
5647     static int patch_vt2002P(struct hda_codec *codec)
5648     {
5649     @@ -3682,6 +3691,8 @@ static int patch_vt2002P(struct hda_codec *codec)
5650     spec->aa_mix_nid = 0x21;
5651     override_mic_boost(codec, 0x2b, 0, 3, 40);
5652     override_mic_boost(codec, 0x29, 0, 3, 40);
5653     + if (spec->codec_type == VT1802)
5654     + fix_vt1802_connections(codec);
5655     add_secret_dac_path(codec);
5656    
5657     /* automatic parse from the BIOS config */
5658     diff --git a/sound/soc/codecs/wm8978.c b/sound/soc/codecs/wm8978.c
5659     index 72d5fdc..6c37c7c 100644
5660     --- a/sound/soc/codecs/wm8978.c
5661     +++ b/sound/soc/codecs/wm8978.c
5662     @@ -783,7 +783,7 @@ static int wm8978_hw_params(struct snd_pcm_substream *substream,
5663     wm8978->mclk_idx = -1;
5664     f_sel = wm8978->f_mclk;
5665     } else {
5666     - if (!wm8978->f_pllout) {
5667     + if (!wm8978->f_opclk) {
5668     /* We only enter here, if OPCLK is not used */
5669     int ret = wm8978_configure_pll(codec);
5670     if (ret < 0)
5671     diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
5672     index c41efe0..9ae82a4 100644
5673     --- a/sound/soc/soc-dapm.c
5674     +++ b/sound/soc/soc-dapm.c
5675     @@ -3253,7 +3253,7 @@ void snd_soc_dapm_shutdown(struct snd_soc_card *card)
5676     {
5677     struct snd_soc_codec *codec;
5678    
5679     - list_for_each_entry(codec, &card->codec_dev_list, list) {
5680     + list_for_each_entry(codec, &card->codec_dev_list, card_list) {
5681     soc_dapm_shutdown_codec(&codec->dapm);
5682     if (codec->dapm.bias_level == SND_SOC_BIAS_STANDBY)
5683     snd_soc_dapm_set_bias_level(&codec->dapm,