Contents of /trunk/kernel26-alx/patches-2.6.27-r3/0118-2.6.27.19-all-fixes.patch
Parent Directory | Revision Log
Revision 1176 -
(show annotations)
(download)
Thu Oct 14 15:11:06 2010 UTC (13 years, 11 months ago) by niro
File size: 80833 byte(s)
Thu Oct 14 15:11:06 2010 UTC (13 years, 11 months ago) by niro
File size: 80833 byte(s)
-2.6.27-alx-r3: new magellan 0.5.2 kernel
1 | diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c |
2 | index 5af4e9b..ada0692 100644 |
3 | --- a/arch/powerpc/kernel/align.c |
4 | +++ b/arch/powerpc/kernel/align.c |
5 | @@ -646,11 +646,16 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg, |
6 | unsigned int areg, struct pt_regs *regs, |
7 | unsigned int flags, unsigned int length) |
8 | { |
9 | - char *ptr = (char *) ¤t->thread.TS_FPR(reg); |
10 | + char *ptr; |
11 | int ret = 0; |
12 | |
13 | flush_vsx_to_thread(current); |
14 | |
15 | + if (reg < 32) |
16 | + ptr = (char *) ¤t->thread.TS_FPR(reg); |
17 | + else |
18 | + ptr = (char *) ¤t->thread.vr[reg - 32]; |
19 | + |
20 | if (flags & ST) |
21 | ret = __copy_to_user(addr, ptr, length); |
22 | else { |
23 | diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c |
24 | index 5b719a0..7c3b8dc 100644 |
25 | --- a/arch/x86/mm/pageattr.c |
26 | +++ b/arch/x86/mm/pageattr.c |
27 | @@ -619,6 +619,13 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) |
28 | unsigned int level; |
29 | pte_t *kpte, old_pte; |
30 | |
31 | + /* |
32 | + * If we're called with lazy mmu updates enabled, the |
33 | + * in-memory pte state may be stale. Flush pending updates to |
34 | + * bring them up to date. |
35 | + */ |
36 | + arch_flush_lazy_mmu_mode(); |
37 | + |
38 | repeat: |
39 | kpte = lookup_address(address, &level); |
40 | if (!kpte) |
41 | @@ -836,6 +843,13 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, |
42 | else |
43 | cpa_flush_all(cache); |
44 | |
45 | + /* |
46 | + * If we've been called with lazy mmu updates enabled, then |
47 | + * make sure that everything gets flushed out before we |
48 | + * return. |
49 | + */ |
50 | + arch_flush_lazy_mmu_mode(); |
51 | + |
52 | out: |
53 | cpa_fill_pool(NULL); |
54 | |
55 | diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c |
56 | index c5be6a1..b6f55e8 100644 |
57 | --- a/drivers/ata/pata_via.c |
58 | +++ b/drivers/ata/pata_via.c |
59 | @@ -111,7 +111,8 @@ static const struct via_isa_bridge { |
60 | { "vt8237s", PCI_DEVICE_ID_VIA_8237S, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, |
61 | { "vt8251", PCI_DEVICE_ID_VIA_8251, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, |
62 | { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_SATA_PATA }, |
63 | - { "vt6410", PCI_DEVICE_ID_VIA_6410, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES}, |
64 | + { "vt6410", PCI_DEVICE_ID_VIA_6410, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES }, |
65 | + { "vt6415", PCI_DEVICE_ID_VIA_6415, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES }, |
66 | { "vt8237a", PCI_DEVICE_ID_VIA_8237A, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, |
67 | { "vt8237", PCI_DEVICE_ID_VIA_8237, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, |
68 | { "vt8235", PCI_DEVICE_ID_VIA_8235, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, |
69 | @@ -594,6 +595,7 @@ static int via_reinit_one(struct pci_dev *pdev) |
70 | #endif |
71 | |
72 | static const struct pci_device_id via[] = { |
73 | + { PCI_VDEVICE(VIA, 0x0415), }, |
74 | { PCI_VDEVICE(VIA, 0x0571), }, |
75 | { PCI_VDEVICE(VIA, 0x0581), }, |
76 | { PCI_VDEVICE(VIA, 0x1571), }, |
77 | diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c |
78 | index 89e3b7f..8b6f9c0 100644 |
79 | --- a/drivers/ata/sata_nv.c |
80 | +++ b/drivers/ata/sata_nv.c |
81 | @@ -421,19 +421,21 @@ static struct ata_port_operations nv_generic_ops = { |
82 | .hardreset = ATA_OP_NULL, |
83 | }; |
84 | |
85 | -/* OSDL bz3352 reports that nf2/3 controllers can't determine device |
86 | - * signature reliably. Also, the following thread reports detection |
87 | - * failure on cold boot with the standard debouncing timing. |
88 | +/* nf2 is ripe with hardreset related problems. |
89 | + * |
90 | + * kernel bz#3352 reports nf2/3 controllers can't determine device |
91 | + * signature reliably. The following thread reports detection failure |
92 | + * on cold boot with the standard debouncing timing. |
93 | * |
94 | * http://thread.gmane.org/gmane.linux.ide/34098 |
95 | * |
96 | - * Debounce with hotplug timing and request follow-up SRST. |
97 | + * And bz#12176 reports that hardreset simply doesn't work on nf2. |
98 | + * Give up on it and just don't do hardreset. |
99 | */ |
100 | static struct ata_port_operations nv_nf2_ops = { |
101 | - .inherits = &nv_common_ops, |
102 | + .inherits = &nv_generic_ops, |
103 | .freeze = nv_nf2_freeze, |
104 | .thaw = nv_nf2_thaw, |
105 | - .hardreset = nv_noclassify_hardreset, |
106 | }; |
107 | |
108 | /* For initial probing after boot and hot plugging, hardreset mostly |
109 | diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c |
110 | index 58630cc..f2ada0c 100644 |
111 | --- a/drivers/bluetooth/btsdio.c |
112 | +++ b/drivers/bluetooth/btsdio.c |
113 | @@ -91,6 +91,7 @@ static int btsdio_tx_packet(struct btsdio_data *data, struct sk_buff *skb) |
114 | |
115 | err = sdio_writesb(data->func, REG_TDAT, skb->data, skb->len); |
116 | if (err < 0) { |
117 | + skb_pull(skb, 4); |
118 | sdio_writeb(data->func, 0x01, REG_PC_WRT, NULL); |
119 | return err; |
120 | } |
121 | @@ -152,7 +153,7 @@ static int btsdio_rx_packet(struct btsdio_data *data) |
122 | |
123 | err = sdio_readsb(data->func, skb->data, REG_RDAT, len - 4); |
124 | if (err < 0) { |
125 | - kfree(skb); |
126 | + kfree_skb(skb); |
127 | return err; |
128 | } |
129 | |
130 | diff --git a/drivers/net/3c505.c b/drivers/net/3c505.c |
131 | index fdfb2b2..ae8e36c 100644 |
132 | --- a/drivers/net/3c505.c |
133 | +++ b/drivers/net/3c505.c |
134 | @@ -493,21 +493,27 @@ static bool receive_pcb(struct net_device *dev, pcb_struct * pcb) |
135 | } |
136 | /* read the data */ |
137 | spin_lock_irqsave(&adapter->lock, flags); |
138 | - i = 0; |
139 | - do { |
140 | - j = 0; |
141 | - while (((stat = get_status(dev->base_addr)) & ACRF) == 0 && j++ < 20000); |
142 | - pcb->data.raw[i++] = inb_command(dev->base_addr); |
143 | - if (i > MAX_PCB_DATA) |
144 | - INVALID_PCB_MSG(i); |
145 | - } while ((stat & ASF_PCB_MASK) != ASF_PCB_END && j < 20000); |
146 | + for (i = 0; i < MAX_PCB_DATA; i++) { |
147 | + for (j = 0; j < 20000; j++) { |
148 | + stat = get_status(dev->base_addr); |
149 | + if (stat & ACRF) |
150 | + break; |
151 | + } |
152 | + pcb->data.raw[i] = inb_command(dev->base_addr); |
153 | + if ((stat & ASF_PCB_MASK) == ASF_PCB_END || j >= 20000) |
154 | + break; |
155 | + } |
156 | spin_unlock_irqrestore(&adapter->lock, flags); |
157 | + if (i >= MAX_PCB_DATA) { |
158 | + INVALID_PCB_MSG(i); |
159 | + return false; |
160 | + } |
161 | if (j >= 20000) { |
162 | TIMEOUT_MSG(__LINE__); |
163 | return false; |
164 | } |
165 | - /* woops, the last "data" byte was really the length! */ |
166 | - total_length = pcb->data.raw[--i]; |
167 | + /* the last "data" byte was really the length! */ |
168 | + total_length = pcb->data.raw[i]; |
169 | |
170 | /* safety check total length vs data length */ |
171 | if (total_length != (pcb->length + 2)) { |
172 | diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c |
173 | index c3edcdc..2d90a3c 100644 |
174 | --- a/drivers/pci/intel-iommu.c |
175 | +++ b/drivers/pci/intel-iommu.c |
176 | @@ -72,6 +72,8 @@ static struct deferred_flush_tables *deferred_flush; |
177 | /* bitmap for indexing intel_iommus */ |
178 | static int g_num_of_iommus; |
179 | |
180 | +static int rwbf_quirk = 0; |
181 | + |
182 | static DEFINE_SPINLOCK(async_umap_flush_lock); |
183 | static LIST_HEAD(unmaps_to_do); |
184 | |
185 | @@ -527,7 +529,7 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu) |
186 | u32 val; |
187 | unsigned long flag; |
188 | |
189 | - if (!cap_rwbf(iommu->cap)) |
190 | + if (!rwbf_quirk && !cap_rwbf(iommu->cap)) |
191 | return; |
192 | val = iommu->gcmd | DMA_GCMD_WBF; |
193 | |
194 | @@ -2453,3 +2455,12 @@ int __init intel_iommu_init(void) |
195 | return 0; |
196 | } |
197 | |
198 | +static void __devinit quirk_iommu_rwbf(struct pci_dev *dev) |
199 | +{ |
200 | + /* Mobile 4 Series Chipset neglects to set RWBF capability, |
201 | + but needs it */ |
202 | + printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n"); |
203 | + rwbf_quirk = 1; |
204 | +} |
205 | + |
206 | +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf); |
207 | diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c |
208 | index 299e075..55ac5c3 100644 |
209 | --- a/drivers/scsi/libiscsi.c |
210 | +++ b/drivers/scsi/libiscsi.c |
211 | @@ -1844,6 +1844,7 @@ void iscsi_pool_free(struct iscsi_pool *q) |
212 | kfree(q->pool[i]); |
213 | if (q->pool) |
214 | kfree(q->pool); |
215 | + kfree(q->queue); |
216 | } |
217 | EXPORT_SYMBOL_GPL(iscsi_pool_free); |
218 | |
219 | diff --git a/fs/ext2/super.c b/fs/ext2/super.c |
220 | index fd88c7b..2ebc0c4 100644 |
221 | --- a/fs/ext2/super.c |
222 | +++ b/fs/ext2/super.c |
223 | @@ -1177,9 +1177,12 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) |
224 | es = sbi->s_es; |
225 | if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) != |
226 | (old_mount_opt & EXT2_MOUNT_XIP)) && |
227 | - invalidate_inodes(sb)) |
228 | - ext2_warning(sb, __func__, "busy inodes while remounting "\ |
229 | - "xip remain in cache (no functional problem)"); |
230 | + invalidate_inodes(sb)) { |
231 | + ext2_warning(sb, __func__, "refusing change of xip flag " |
232 | + "with busy inodes while remounting"); |
233 | + sbi->s_mount_opt &= ~EXT2_MOUNT_XIP; |
234 | + sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP; |
235 | + } |
236 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) |
237 | return 0; |
238 | if (*flags & MS_RDONLY) { |
239 | diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c |
240 | index e9fa960..8b7c776 100644 |
241 | --- a/fs/ext4/balloc.c |
242 | +++ b/fs/ext4/balloc.c |
243 | @@ -20,6 +20,7 @@ |
244 | #include "ext4.h" |
245 | #include "ext4_jbd2.h" |
246 | #include "group.h" |
247 | +#include "mballoc.h" |
248 | |
249 | /* |
250 | * balloc.c contains the blocks allocation and deallocation routines |
251 | @@ -318,18 +319,41 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) |
252 | block_group, bitmap_blk); |
253 | return NULL; |
254 | } |
255 | - if (bh_uptodate_or_lock(bh)) |
256 | + |
257 | + if (bitmap_uptodate(bh)) |
258 | return bh; |
259 | |
260 | + lock_buffer(bh); |
261 | + if (bitmap_uptodate(bh)) { |
262 | + unlock_buffer(bh); |
263 | + return bh; |
264 | + } |
265 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); |
266 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
267 | ext4_init_block_bitmap(sb, bh, block_group, desc); |
268 | + set_bitmap_uptodate(bh); |
269 | set_buffer_uptodate(bh); |
270 | unlock_buffer(bh); |
271 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); |
272 | return bh; |
273 | } |
274 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); |
275 | + if (buffer_uptodate(bh)) { |
276 | + /* |
277 | + * if not uninit if bh is uptodate, |
278 | + * bitmap is also uptodate |
279 | + */ |
280 | + set_bitmap_uptodate(bh); |
281 | + unlock_buffer(bh); |
282 | + return bh; |
283 | + } |
284 | + /* |
285 | + * submit the buffer_head for read. We can |
286 | + * safely mark the bitmap as uptodate now. |
287 | + * We do it here so the bitmap uptodate bit |
288 | + * get set with buffer lock held. |
289 | + */ |
290 | + set_bitmap_uptodate(bh); |
291 | if (bh_submit_read(bh) < 0) { |
292 | put_bh(bh); |
293 | ext4_error(sb, __func__, |
294 | @@ -837,6 +861,136 @@ error_return: |
295 | } |
296 | |
297 | /** |
298 | + * ext4_add_groupblocks() -- Add given blocks to an existing group |
299 | + * @handle: handle to this transaction |
300 | + * @sb: super block |
301 | + * @block: start physcial block to add to the block group |
302 | + * @count: number of blocks to free |
303 | + * |
304 | + * This marks the blocks as free in the bitmap. We ask the |
305 | + * mballoc to reload the buddy after this by setting group |
306 | + * EXT4_GROUP_INFO_NEED_INIT_BIT flag |
307 | + */ |
308 | +void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, |
309 | + ext4_fsblk_t block, unsigned long count) |
310 | +{ |
311 | + struct buffer_head *bitmap_bh = NULL; |
312 | + struct buffer_head *gd_bh; |
313 | + ext4_group_t block_group; |
314 | + ext4_grpblk_t bit; |
315 | + unsigned long i; |
316 | + struct ext4_group_desc *desc; |
317 | + struct ext4_super_block *es; |
318 | + struct ext4_sb_info *sbi; |
319 | + int err = 0, ret; |
320 | + ext4_grpblk_t blocks_freed; |
321 | + struct ext4_group_info *grp; |
322 | + |
323 | + sbi = EXT4_SB(sb); |
324 | + es = sbi->s_es; |
325 | + ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); |
326 | + |
327 | + ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
328 | + grp = ext4_get_group_info(sb, block_group); |
329 | + /* |
330 | + * Check to see if we are freeing blocks across a group |
331 | + * boundary. |
332 | + */ |
333 | + if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) |
334 | + goto error_return; |
335 | + |
336 | + bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
337 | + if (!bitmap_bh) |
338 | + goto error_return; |
339 | + desc = ext4_get_group_desc(sb, block_group, &gd_bh); |
340 | + if (!desc) |
341 | + goto error_return; |
342 | + |
343 | + if (in_range(ext4_block_bitmap(sb, desc), block, count) || |
344 | + in_range(ext4_inode_bitmap(sb, desc), block, count) || |
345 | + in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || |
346 | + in_range(block + count - 1, ext4_inode_table(sb, desc), |
347 | + sbi->s_itb_per_group)) { |
348 | + ext4_error(sb, __func__, |
349 | + "Adding blocks in system zones - " |
350 | + "Block = %llu, count = %lu", |
351 | + block, count); |
352 | + goto error_return; |
353 | + } |
354 | + |
355 | + /* |
356 | + * We are about to add blocks to the bitmap, |
357 | + * so we need undo access. |
358 | + */ |
359 | + BUFFER_TRACE(bitmap_bh, "getting undo access"); |
360 | + err = ext4_journal_get_undo_access(handle, bitmap_bh); |
361 | + if (err) |
362 | + goto error_return; |
363 | + |
364 | + /* |
365 | + * We are about to modify some metadata. Call the journal APIs |
366 | + * to unshare ->b_data if a currently-committing transaction is |
367 | + * using it |
368 | + */ |
369 | + BUFFER_TRACE(gd_bh, "get_write_access"); |
370 | + err = ext4_journal_get_write_access(handle, gd_bh); |
371 | + if (err) |
372 | + goto error_return; |
373 | + /* |
374 | + * make sure we don't allow a parallel init on other groups in the |
375 | + * same buddy cache |
376 | + */ |
377 | + down_write(&grp->alloc_sem); |
378 | + for (i = 0, blocks_freed = 0; i < count; i++) { |
379 | + BUFFER_TRACE(bitmap_bh, "clear bit"); |
380 | + if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), |
381 | + bit + i, bitmap_bh->b_data)) { |
382 | + ext4_error(sb, __func__, |
383 | + "bit already cleared for block %llu", |
384 | + (ext4_fsblk_t)(block + i)); |
385 | + BUFFER_TRACE(bitmap_bh, "bit already cleared"); |
386 | + } else { |
387 | + blocks_freed++; |
388 | + } |
389 | + } |
390 | + spin_lock(sb_bgl_lock(sbi, block_group)); |
391 | + le16_add_cpu(&desc->bg_free_blocks_count, blocks_freed); |
392 | + desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); |
393 | + spin_unlock(sb_bgl_lock(sbi, block_group)); |
394 | + percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); |
395 | + |
396 | + if (sbi->s_log_groups_per_flex) { |
397 | + ext4_group_t flex_group = ext4_flex_group(sbi, block_group); |
398 | + spin_lock(sb_bgl_lock(sbi, flex_group)); |
399 | + sbi->s_flex_groups[flex_group].free_blocks += blocks_freed; |
400 | + spin_unlock(sb_bgl_lock(sbi, flex_group)); |
401 | + } |
402 | + /* |
403 | + * request to reload the buddy with the |
404 | + * new bitmap information |
405 | + */ |
406 | + set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); |
407 | + ext4_mb_update_group_info(grp, blocks_freed); |
408 | + up_write(&grp->alloc_sem); |
409 | + |
410 | + /* We dirtied the bitmap block */ |
411 | + BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); |
412 | + err = ext4_journal_dirty_metadata(handle, bitmap_bh); |
413 | + |
414 | + /* And the group descriptor block */ |
415 | + BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); |
416 | + ret = ext4_journal_dirty_metadata(handle, gd_bh); |
417 | + if (!err) |
418 | + err = ret; |
419 | + sb->s_dirt = 1; |
420 | + |
421 | +error_return: |
422 | + brelse(bitmap_bh); |
423 | + ext4_std_error(sb, err); |
424 | + return; |
425 | +} |
426 | + |
427 | +/** |
428 | * ext4_free_blocks() -- Free given blocks and update quota |
429 | * @handle: handle for this transaction |
430 | * @inode: inode |
431 | diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h |
432 | index 4829dac..85f58af 100644 |
433 | --- a/fs/ext4/ext4.h |
434 | +++ b/fs/ext4/ext4.h |
435 | @@ -19,6 +19,7 @@ |
436 | #include <linux/types.h> |
437 | #include <linux/blkdev.h> |
438 | #include <linux/magic.h> |
439 | +#include <linux/jbd2.h> |
440 | #include "ext4_i.h" |
441 | |
442 | /* |
443 | @@ -889,6 +890,9 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len) |
444 | #define DX_HASH_LEGACY 0 |
445 | #define DX_HASH_HALF_MD4 1 |
446 | #define DX_HASH_TEA 2 |
447 | +#define DX_HASH_LEGACY_UNSIGNED 3 |
448 | +#define DX_HASH_HALF_MD4_UNSIGNED 4 |
449 | +#define DX_HASH_TEA_UNSIGNED 5 |
450 | |
451 | #ifdef __KERNEL__ |
452 | |
453 | @@ -988,9 +992,11 @@ extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, |
454 | ext4_fsblk_t nblocks); |
455 | extern void ext4_free_blocks (handle_t *handle, struct inode *inode, |
456 | ext4_fsblk_t block, unsigned long count, int metadata); |
457 | -extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, |
458 | - ext4_fsblk_t block, unsigned long count, |
459 | +extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, |
460 | + ext4_fsblk_t block, unsigned long count, |
461 | unsigned long *pdquot_freed_blocks); |
462 | +extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, |
463 | + ext4_fsblk_t block, unsigned long count); |
464 | extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); |
465 | extern void ext4_check_blocks_bitmap (struct super_block *); |
466 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, |
467 | @@ -1038,12 +1044,13 @@ extern int __init init_ext4_mballoc(void); |
468 | extern void exit_ext4_mballoc(void); |
469 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, |
470 | unsigned long, unsigned long, int, unsigned long *); |
471 | -extern int ext4_mb_add_more_groupinfo(struct super_block *sb, |
472 | +extern int ext4_mb_add_groupinfo(struct super_block *sb, |
473 | ext4_group_t i, struct ext4_group_desc *desc); |
474 | extern void ext4_mb_update_group_info(struct ext4_group_info *grp, |
475 | ext4_grpblk_t add); |
476 | - |
477 | - |
478 | +extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); |
479 | +extern void ext4_mb_put_buddy_cache_lock(struct super_block *, |
480 | + ext4_group_t, int); |
481 | /* inode.c */ |
482 | int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, |
483 | struct buffer_head *bh, ext4_fsblk_t blocknr); |
484 | @@ -1167,8 +1174,11 @@ static inline void ext4_r_blocks_count_set(struct ext4_super_block *es, |
485 | |
486 | static inline loff_t ext4_isize(struct ext4_inode *raw_inode) |
487 | { |
488 | - return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | |
489 | - le32_to_cpu(raw_inode->i_size_lo); |
490 | + if (S_ISREG(le16_to_cpu(raw_inode->i_mode))) |
491 | + return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | |
492 | + le32_to_cpu(raw_inode->i_size_lo); |
493 | + else |
494 | + return (loff_t) le32_to_cpu(raw_inode->i_size_lo); |
495 | } |
496 | |
497 | static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) |
498 | @@ -1244,6 +1254,23 @@ extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, |
499 | sector_t block, unsigned long max_blocks, |
500 | struct buffer_head *bh, int create, |
501 | int extend_disksize, int flag); |
502 | +/* |
503 | + * Add new method to test wether block and inode bitmaps are properly |
504 | + * initialized. With uninit_bg reading the block from disk is not enough |
505 | + * to mark the bitmap uptodate. We need to also zero-out the bitmap |
506 | + */ |
507 | +#define BH_BITMAP_UPTODATE BH_JBDPrivateStart |
508 | + |
509 | +static inline int bitmap_uptodate(struct buffer_head *bh) |
510 | +{ |
511 | + return (buffer_uptodate(bh) && |
512 | + test_bit(BH_BITMAP_UPTODATE, &(bh)->b_state)); |
513 | +} |
514 | +static inline void set_bitmap_uptodate(struct buffer_head *bh) |
515 | +{ |
516 | + set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); |
517 | +} |
518 | + |
519 | #endif /* __KERNEL__ */ |
520 | |
521 | #endif /* _EXT4_H */ |
522 | diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h |
523 | index 6300226..f20df8a 100644 |
524 | --- a/fs/ext4/ext4_sb.h |
525 | +++ b/fs/ext4/ext4_sb.h |
526 | @@ -56,6 +56,7 @@ struct ext4_sb_info { |
527 | u32 s_next_generation; |
528 | u32 s_hash_seed[4]; |
529 | int s_def_hash_version; |
530 | + int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ |
531 | struct percpu_counter s_freeblocks_counter; |
532 | struct percpu_counter s_freeinodes_counter; |
533 | struct percpu_counter s_dirs_counter; |
534 | @@ -102,7 +103,8 @@ struct ext4_sb_info { |
535 | struct list_head s_committed_transaction; |
536 | spinlock_t s_md_lock; |
537 | tid_t s_last_transaction; |
538 | - unsigned short *s_mb_offsets, *s_mb_maxs; |
539 | + unsigned short *s_mb_offsets; |
540 | + unsigned int *s_mb_maxs; |
541 | |
542 | /* tunables */ |
543 | unsigned long s_stripe; |
544 | diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c |
545 | index 1d6329d..bd7d14d 100644 |
546 | --- a/fs/ext4/hash.c |
547 | +++ b/fs/ext4/hash.c |
548 | @@ -35,23 +35,71 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) |
549 | |
550 | |
551 | /* The old legacy hash */ |
552 | -static __u32 dx_hack_hash (const char *name, int len) |
553 | +static __u32 dx_hack_hash_unsigned(const char *name, int len) |
554 | { |
555 | - __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; |
556 | + __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; |
557 | + const unsigned char *ucp = (const unsigned char *) name; |
558 | + |
559 | + while (len--) { |
560 | + hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373)); |
561 | + |
562 | + if (hash & 0x80000000) |
563 | + hash -= 0x7fffffff; |
564 | + hash1 = hash0; |
565 | + hash0 = hash; |
566 | + } |
567 | + return hash0 << 1; |
568 | +} |
569 | + |
570 | +static __u32 dx_hack_hash_signed(const char *name, int len) |
571 | +{ |
572 | + __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; |
573 | + const signed char *scp = (const signed char *) name; |
574 | + |
575 | while (len--) { |
576 | - __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); |
577 | + hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373)); |
578 | |
579 | - if (hash & 0x80000000) hash -= 0x7fffffff; |
580 | + if (hash & 0x80000000) |
581 | + hash -= 0x7fffffff; |
582 | hash1 = hash0; |
583 | hash0 = hash; |
584 | } |
585 | - return (hash0 << 1); |
586 | + return hash0 << 1; |
587 | } |
588 | |
589 | -static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) |
590 | +static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num) |
591 | { |
592 | __u32 pad, val; |
593 | int i; |
594 | + const signed char *scp = (const signed char *) msg; |
595 | + |
596 | + pad = (__u32)len | ((__u32)len << 8); |
597 | + pad |= pad << 16; |
598 | + |
599 | + val = pad; |
600 | + if (len > num*4) |
601 | + len = num * 4; |
602 | + for (i = 0; i < len; i++) { |
603 | + if ((i % 4) == 0) |
604 | + val = pad; |
605 | + val = ((int) scp[i]) + (val << 8); |
606 | + if ((i % 4) == 3) { |
607 | + *buf++ = val; |
608 | + val = pad; |
609 | + num--; |
610 | + } |
611 | + } |
612 | + if (--num >= 0) |
613 | + *buf++ = val; |
614 | + while (--num >= 0) |
615 | + *buf++ = pad; |
616 | +} |
617 | + |
618 | +static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num) |
619 | +{ |
620 | + __u32 pad, val; |
621 | + int i; |
622 | + const unsigned char *ucp = (const unsigned char *) msg; |
623 | |
624 | pad = (__u32)len | ((__u32)len << 8); |
625 | pad |= pad << 16; |
626 | @@ -62,7 +110,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) |
627 | for (i=0; i < len; i++) { |
628 | if ((i % 4) == 0) |
629 | val = pad; |
630 | - val = msg[i] + (val << 8); |
631 | + val = ((int) ucp[i]) + (val << 8); |
632 | if ((i % 4) == 3) { |
633 | *buf++ = val; |
634 | val = pad; |
635 | @@ -95,6 +143,8 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) |
636 | const char *p; |
637 | int i; |
638 | __u32 in[8], buf[4]; |
639 | + void (*str2hashbuf)(const char *, int, __u32 *, int) = |
640 | + str2hashbuf_signed; |
641 | |
642 | /* Initialize the default seed for the hash checksum functions */ |
643 | buf[0] = 0x67452301; |
644 | @@ -113,13 +163,18 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) |
645 | } |
646 | |
647 | switch (hinfo->hash_version) { |
648 | + case DX_HASH_LEGACY_UNSIGNED: |
649 | + hash = dx_hack_hash_unsigned(name, len); |
650 | + break; |
651 | case DX_HASH_LEGACY: |
652 | - hash = dx_hack_hash(name, len); |
653 | + hash = dx_hack_hash_signed(name, len); |
654 | break; |
655 | + case DX_HASH_HALF_MD4_UNSIGNED: |
656 | + str2hashbuf = str2hashbuf_unsigned; |
657 | case DX_HASH_HALF_MD4: |
658 | p = name; |
659 | while (len > 0) { |
660 | - str2hashbuf(p, len, in, 8); |
661 | + (*str2hashbuf)(p, len, in, 8); |
662 | half_md4_transform(buf, in); |
663 | len -= 32; |
664 | p += 32; |
665 | @@ -127,10 +182,12 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) |
666 | minor_hash = buf[2]; |
667 | hash = buf[1]; |
668 | break; |
669 | + case DX_HASH_TEA_UNSIGNED: |
670 | + str2hashbuf = str2hashbuf_unsigned; |
671 | case DX_HASH_TEA: |
672 | p = name; |
673 | while (len > 0) { |
674 | - str2hashbuf(p, len, in, 4); |
675 | + (*str2hashbuf)(p, len, in, 4); |
676 | TEA_transform(buf, in); |
677 | len -= 16; |
678 | p += 16; |
679 | diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c |
680 | index 9805924..b994854 100644 |
681 | --- a/fs/ext4/ialloc.c |
682 | +++ b/fs/ext4/ialloc.c |
683 | @@ -84,7 +84,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, |
684 | } |
685 | |
686 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); |
687 | - mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), |
688 | + mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, |
689 | bh->b_data); |
690 | |
691 | return EXT4_INODES_PER_GROUP(sb); |
692 | @@ -115,18 +115,40 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) |
693 | block_group, bitmap_blk); |
694 | return NULL; |
695 | } |
696 | - if (bh_uptodate_or_lock(bh)) |
697 | + if (bitmap_uptodate(bh)) |
698 | return bh; |
699 | |
700 | + lock_buffer(bh); |
701 | + if (bitmap_uptodate(bh)) { |
702 | + unlock_buffer(bh); |
703 | + return bh; |
704 | + } |
705 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); |
706 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
707 | ext4_init_inode_bitmap(sb, bh, block_group, desc); |
708 | + set_bitmap_uptodate(bh); |
709 | set_buffer_uptodate(bh); |
710 | unlock_buffer(bh); |
711 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); |
712 | return bh; |
713 | } |
714 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); |
715 | + if (buffer_uptodate(bh)) { |
716 | + /* |
717 | + * if not uninit if bh is uptodate, |
718 | + * bitmap is also uptodate |
719 | + */ |
720 | + set_bitmap_uptodate(bh); |
721 | + unlock_buffer(bh); |
722 | + return bh; |
723 | + } |
724 | + /* |
725 | + * submit the buffer_head for read. We can |
726 | + * safely mark the bitmap as uptodate now. |
727 | + * We do it here so the bitmap uptodate bit |
728 | + * get set with buffer lock held. |
729 | + */ |
730 | + set_bitmap_uptodate(bh); |
731 | if (bh_submit_read(bh) < 0) { |
732 | put_bh(bh); |
733 | ext4_error(sb, __func__, |
734 | @@ -567,6 +589,77 @@ static int find_group_other(struct super_block *sb, struct inode *parent, |
735 | } |
736 | |
737 | /* |
738 | + * claim the inode from the inode bitmap. If the group |
739 | + * is uninit we need to take the groups's sb_bgl_lock |
740 | + * and clear the uninit flag. The inode bitmap update |
741 | + * and group desc uninit flag clear should be done |
742 | + * after holding sb_bgl_lock so that ext4_read_inode_bitmap |
743 | + * doesn't race with the ext4_claim_inode |
744 | + */ |
745 | +static int ext4_claim_inode(struct super_block *sb, |
746 | + struct buffer_head *inode_bitmap_bh, |
747 | + unsigned long ino, ext4_group_t group, int mode) |
748 | +{ |
749 | + int free = 0, retval = 0; |
750 | + struct ext4_sb_info *sbi = EXT4_SB(sb); |
751 | + struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); |
752 | + |
753 | + spin_lock(sb_bgl_lock(sbi, group)); |
754 | + if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { |
755 | + /* not a free inode */ |
756 | + retval = 1; |
757 | + goto err_ret; |
758 | + } |
759 | + ino++; |
760 | + if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || |
761 | + ino > EXT4_INODES_PER_GROUP(sb)) { |
762 | + spin_unlock(sb_bgl_lock(sbi, group)); |
763 | + ext4_error(sb, __func__, |
764 | + "reserved inode or inode > inodes count - " |
765 | + "block_group = %lu, inode=%lu", group, |
766 | + ino + group * EXT4_INODES_PER_GROUP(sb)); |
767 | + return 1; |
768 | + } |
769 | + /* If we didn't allocate from within the initialized part of the inode |
770 | + * table then we need to initialize up to this inode. */ |
771 | + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { |
772 | + |
773 | + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
774 | + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); |
775 | + /* When marking the block group with |
776 | + * ~EXT4_BG_INODE_UNINIT we don't want to depend |
777 | + * on the value of bg_itable_unused even though |
778 | + * mke2fs could have initialized the same for us. |
779 | + * Instead we calculated the value below |
780 | + */ |
781 | + |
782 | + free = 0; |
783 | + } else { |
784 | + free = EXT4_INODES_PER_GROUP(sb) - |
785 | + le16_to_cpu(gdp->bg_itable_unused); |
786 | + } |
787 | + |
788 | + /* |
789 | + * Check the relative inode number against the last used |
790 | + * relative inode number in this group. if it is greater |
791 | + * we need to update the bg_itable_unused count |
792 | + * |
793 | + */ |
794 | + if (ino > free) |
795 | + gdp->bg_itable_unused = |
796 | + cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); |
797 | + } |
798 | + le16_add_cpu(&gdp->bg_free_inodes_count, -1); |
799 | + if (S_ISDIR(mode)) { |
800 | + le16_add_cpu(&gdp->bg_used_dirs_count, 1); |
801 | + } |
802 | + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); |
803 | +err_ret: |
804 | + spin_unlock(sb_bgl_lock(sbi, group)); |
805 | + return retval; |
806 | +} |
807 | + |
808 | +/* |
809 | * There are two policies for allocating an inode. If the new inode is |
810 | * a directory, then a forward search is made for a block group with both |
811 | * free space and a low directory-to-inode ratio; if that fails, then of |
812 | @@ -649,8 +742,12 @@ repeat_in_this_group: |
813 | if (err) |
814 | goto fail; |
815 | |
816 | - if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group), |
817 | - ino, bitmap_bh->b_data)) { |
818 | + BUFFER_TRACE(bh2, "get_write_access"); |
819 | + err = ext4_journal_get_write_access(handle, bh2); |
820 | + if (err) |
821 | + goto fail; |
822 | + if (!ext4_claim_inode(sb, bitmap_bh, |
823 | + ino, group, mode)) { |
824 | /* we won it */ |
825 | BUFFER_TRACE(bitmap_bh, |
826 | "call ext4_journal_dirty_metadata"); |
827 | @@ -658,10 +755,13 @@ repeat_in_this_group: |
828 | bitmap_bh); |
829 | if (err) |
830 | goto fail; |
831 | + /* zero bit is inode number 1*/ |
832 | + ino++; |
833 | goto got; |
834 | } |
835 | /* we lost it */ |
836 | jbd2_journal_release_buffer(handle, bitmap_bh); |
837 | + jbd2_journal_release_buffer(handle, bh2); |
838 | |
839 | if (++ino < EXT4_INODES_PER_GROUP(sb)) |
840 | goto repeat_in_this_group; |
841 | @@ -681,21 +781,6 @@ repeat_in_this_group: |
842 | goto out; |
843 | |
844 | got: |
845 | - ino++; |
846 | - if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || |
847 | - ino > EXT4_INODES_PER_GROUP(sb)) { |
848 | - ext4_error(sb, __func__, |
849 | - "reserved inode or inode > inodes count - " |
850 | - "block_group = %lu, inode=%lu", group, |
851 | - ino + group * EXT4_INODES_PER_GROUP(sb)); |
852 | - err = -EIO; |
853 | - goto fail; |
854 | - } |
855 | - |
856 | - BUFFER_TRACE(bh2, "get_write_access"); |
857 | - err = ext4_journal_get_write_access(handle, bh2); |
858 | - if (err) goto fail; |
859 | - |
860 | /* We may have to initialize the block bitmap if it isn't already */ |
861 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && |
862 | gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
863 | @@ -730,47 +815,10 @@ got: |
864 | if (err) |
865 | goto fail; |
866 | } |
867 | - |
868 | - spin_lock(sb_bgl_lock(sbi, group)); |
869 | - /* If we didn't allocate from within the initialized part of the inode |
870 | - * table then we need to initialize up to this inode. */ |
871 | - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { |
872 | - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
873 | - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); |
874 | - |
875 | - /* When marking the block group with |
876 | - * ~EXT4_BG_INODE_UNINIT we don't want to depend |
877 | - * on the value of bg_itable_unused even though |
878 | - * mke2fs could have initialized the same for us. |
879 | - * Instead we calculated the value below |
880 | - */ |
881 | - |
882 | - free = 0; |
883 | - } else { |
884 | - free = EXT4_INODES_PER_GROUP(sb) - |
885 | - le16_to_cpu(gdp->bg_itable_unused); |
886 | - } |
887 | - |
888 | - /* |
889 | - * Check the relative inode number against the last used |
890 | - * relative inode number in this group. if it is greater |
891 | - * we need to update the bg_itable_unused count |
892 | - * |
893 | - */ |
894 | - if (ino > free) |
895 | - gdp->bg_itable_unused = |
896 | - cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); |
897 | - } |
898 | - |
899 | - le16_add_cpu(&gdp->bg_free_inodes_count, -1); |
900 | - if (S_ISDIR(mode)) { |
901 | - le16_add_cpu(&gdp->bg_used_dirs_count, 1); |
902 | - } |
903 | - gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); |
904 | - spin_unlock(sb_bgl_lock(sbi, group)); |
905 | - BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); |
906 | + BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); |
907 | err = ext4_journal_dirty_metadata(handle, bh2); |
908 | - if (err) goto fail; |
909 | + if (err) |
910 | + goto fail; |
911 | |
912 | percpu_counter_dec(&sbi->s_freeinodes_counter); |
913 | if (S_ISDIR(mode)) |
914 | diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c |
915 | index d77f674..6e7f085 100644 |
916 | --- a/fs/ext4/inode.c |
917 | +++ b/fs/ext4/inode.c |
918 | @@ -351,9 +351,9 @@ static int ext4_block_to_path(struct inode *inode, |
919 | final = ptrs; |
920 | } else { |
921 | ext4_warning(inode->i_sb, "ext4_block_to_path", |
922 | - "block %lu > max", |
923 | + "block %lu > max in inode %lu", |
924 | i_block + direct_blocks + |
925 | - indirect_blocks + double_blocks); |
926 | + indirect_blocks + double_blocks, inode->i_ino); |
927 | } |
928 | if (boundary) |
929 | *boundary = final - 1 - (i_block & (ptrs - 1)); |
930 | @@ -1648,18 +1648,25 @@ struct mpage_da_data { |
931 | */ |
932 | static int mpage_da_submit_io(struct mpage_da_data *mpd) |
933 | { |
934 | - struct address_space *mapping = mpd->inode->i_mapping; |
935 | - int ret = 0, err, nr_pages, i; |
936 | - unsigned long index, end; |
937 | + long pages_skipped; |
938 | struct pagevec pvec; |
939 | + unsigned long index, end; |
940 | + int ret = 0, err, nr_pages, i; |
941 | + struct inode *inode = mpd->inode; |
942 | + struct address_space *mapping = inode->i_mapping; |
943 | |
944 | BUG_ON(mpd->next_page <= mpd->first_page); |
945 | - pagevec_init(&pvec, 0); |
946 | + /* |
947 | + * We need to start from the first_page to the next_page - 1 |
948 | + * to make sure we also write the mapped dirty buffer_heads. |
949 | + * If we look at mpd->lbh.b_blocknr we would only be looking |
950 | + * at the currently mapped buffer_heads. |
951 | + */ |
952 | index = mpd->first_page; |
953 | end = mpd->next_page - 1; |
954 | |
955 | + pagevec_init(&pvec, 0); |
956 | while (index <= end) { |
957 | - /* XXX: optimize tail */ |
958 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); |
959 | if (nr_pages == 0) |
960 | break; |
961 | @@ -1671,6 +1678,10 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) |
962 | break; |
963 | index++; |
964 | |
965 | + BUG_ON(!PageLocked(page)); |
966 | + BUG_ON(PageWriteback(page)); |
967 | + |
968 | + pages_skipped = mpd->wbc->pages_skipped; |
969 | err = mapping->a_ops->writepage(page, mpd->wbc); |
970 | if (!err) |
971 | mpd->pages_written++; |
972 | @@ -1991,11 +2002,29 @@ static int __mpage_da_writepage(struct page *page, |
973 | bh = head; |
974 | do { |
975 | BUG_ON(buffer_locked(bh)); |
976 | + /* |
977 | + * We need to try to allocate |
978 | + * unmapped blocks in the same page. |
979 | + * Otherwise we won't make progress |
980 | + * with the page in ext4_da_writepage |
981 | + */ |
982 | if (buffer_dirty(bh) && |
983 | (!buffer_mapped(bh) || buffer_delay(bh))) { |
984 | mpage_add_bh_to_extent(mpd, logical, bh); |
985 | if (mpd->io_done) |
986 | return MPAGE_DA_EXTENT_TAIL; |
987 | + } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { |
988 | + /* |
989 | + * mapped dirty buffer. We need to update |
990 | + * the b_state because we look at |
991 | + * b_state in mpage_da_map_blocks. We don't |
992 | + * update b_size because if we find an |
993 | + * unmapped buffer_head later we need to |
994 | + * use the b_state flag of that buffer_head. |
995 | + */ |
996 | + if (mpd->lbh.b_size == 0) |
997 | + mpd->lbh.b_state = |
998 | + bh->b_state & BH_FLAGS; |
999 | } |
1000 | logical++; |
1001 | } while ((bh = bh->b_this_page) != head); |
1002 | @@ -2298,6 +2327,20 @@ static int ext4_da_writepages(struct address_space *mapping, |
1003 | */ |
1004 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
1005 | return 0; |
1006 | + |
1007 | + /* |
1008 | + * If the filesystem has aborted, it is read-only, so return |
1009 | + * right away instead of dumping stack traces later on that |
1010 | + * will obscure the real source of the problem. We test |
1011 | + * EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because |
1012 | + * the latter could be true if the filesystem is mounted |
1013 | + * read-only, and in that case, ext4_da_writepages should |
1014 | + * *never* be called, so if that ever happens, we would want |
1015 | + * the stack trace. |
1016 | + */ |
1017 | + if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT)) |
1018 | + return -EROFS; |
1019 | + |
1020 | /* |
1021 | * Make sure nr_to_write is >= sbi->s_mb_stream_request |
1022 | * This make sure small files blocks are allocated in |
1023 | @@ -2336,7 +2379,7 @@ restart_loop: |
1024 | handle = ext4_journal_start(inode, needed_blocks); |
1025 | if (IS_ERR(handle)) { |
1026 | ret = PTR_ERR(handle); |
1027 | - printk(KERN_EMERG "%s: jbd2_start: " |
1028 | + printk(KERN_CRIT "%s: jbd2_start: " |
1029 | "%ld pages, ino %lu; err %d\n", __func__, |
1030 | wbc->nr_to_write, inode->i_ino, ret); |
1031 | dump_stack(); |
1032 | diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c |
1033 | index ba86b56..dbf6c0e 100644 |
1034 | --- a/fs/ext4/mballoc.c |
1035 | +++ b/fs/ext4/mballoc.c |
1036 | @@ -100,7 +100,7 @@ |
1037 | * inode as: |
1038 | * |
1039 | * { page } |
1040 | - * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]... |
1041 | + * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]... |
1042 | * |
1043 | * |
1044 | * one block each for bitmap and buddy information. So for each group we |
1045 | @@ -330,6 +330,18 @@ |
1046 | * object |
1047 | * |
1048 | */ |
1049 | +static struct kmem_cache *ext4_pspace_cachep; |
1050 | +static struct kmem_cache *ext4_ac_cachep; |
1051 | +static struct kmem_cache *ext4_free_ext_cachep; |
1052 | +static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
1053 | + ext4_group_t group); |
1054 | +static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, |
1055 | + ext4_group_t group); |
1056 | +static int ext4_mb_init_per_dev_proc(struct super_block *sb); |
1057 | +static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); |
1058 | +static void ext4_mb_free_committed_blocks(struct super_block *); |
1059 | +static void ext4_mb_poll_new_transaction(struct super_block *sb, |
1060 | + handle_t *handle); |
1061 | |
1062 | static inline void *mb_correct_addr_and_bit(int *bit, void *addr) |
1063 | { |
1064 | @@ -718,7 +730,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, |
1065 | * stored in the inode as |
1066 | * |
1067 | * { page } |
1068 | - * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]... |
1069 | + * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]... |
1070 | * |
1071 | * |
1072 | * one block each for bitmap and buddy information. |
1073 | @@ -784,20 +796,42 @@ static int ext4_mb_init_cache(struct page *page, char *incore) |
1074 | if (bh[i] == NULL) |
1075 | goto out; |
1076 | |
1077 | - if (bh_uptodate_or_lock(bh[i])) |
1078 | + if (bitmap_uptodate(bh[i])) |
1079 | continue; |
1080 | |
1081 | + lock_buffer(bh[i]); |
1082 | + if (bitmap_uptodate(bh[i])) { |
1083 | + unlock_buffer(bh[i]); |
1084 | + continue; |
1085 | + } |
1086 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); |
1087 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
1088 | ext4_init_block_bitmap(sb, bh[i], |
1089 | first_group + i, desc); |
1090 | + set_bitmap_uptodate(bh[i]); |
1091 | set_buffer_uptodate(bh[i]); |
1092 | unlock_buffer(bh[i]); |
1093 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); |
1094 | continue; |
1095 | } |
1096 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); |
1097 | + if (buffer_uptodate(bh[i])) { |
1098 | + /* |
1099 | + * if not uninit if bh is uptodate, |
1100 | + * bitmap is also uptodate |
1101 | + */ |
1102 | + set_bitmap_uptodate(bh[i]); |
1103 | + unlock_buffer(bh[i]); |
1104 | + continue; |
1105 | + } |
1106 | get_bh(bh[i]); |
1107 | + /* |
1108 | + * submit the buffer_head for read. We can |
1109 | + * safely mark the bitmap as uptodate now. |
1110 | + * We do it here so the bitmap uptodate bit |
1111 | + * get set with buffer lock held. |
1112 | + */ |
1113 | + set_bitmap_uptodate(bh[i]); |
1114 | bh[i]->b_end_io = end_buffer_read_sync; |
1115 | submit_bh(READ, bh[i]); |
1116 | mb_debug("read bitmap for group %lu\n", first_group + i); |
1117 | @@ -814,6 +848,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore) |
1118 | |
1119 | err = 0; |
1120 | first_block = page->index * blocks_per_page; |
1121 | + /* init the page */ |
1122 | + memset(page_address(page), 0xff, PAGE_CACHE_SIZE); |
1123 | for (i = 0; i < blocks_per_page; i++) { |
1124 | int group; |
1125 | struct ext4_group_info *grinfo; |
1126 | @@ -840,7 +876,6 @@ static int ext4_mb_init_cache(struct page *page, char *incore) |
1127 | BUG_ON(incore == NULL); |
1128 | mb_debug("put buddy for group %u in page %lu/%x\n", |
1129 | group, page->index, i * blocksize); |
1130 | - memset(data, 0xff, blocksize); |
1131 | grinfo = ext4_get_group_info(sb, group); |
1132 | grinfo->bb_fragments = 0; |
1133 | memset(grinfo->bb_counters, 0, |
1134 | @@ -848,7 +883,9 @@ static int ext4_mb_init_cache(struct page *page, char *incore) |
1135 | /* |
1136 | * incore got set to the group block bitmap below |
1137 | */ |
1138 | + ext4_lock_group(sb, group); |
1139 | ext4_mb_generate_buddy(sb, data, incore, group); |
1140 | + ext4_unlock_group(sb, group); |
1141 | incore = NULL; |
1142 | } else { |
1143 | /* this is block of bitmap */ |
1144 | @@ -862,6 +899,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) |
1145 | |
1146 | /* mark all preallocated blks used in in-core bitmap */ |
1147 | ext4_mb_generate_from_pa(sb, data, group); |
1148 | + ext4_mb_generate_from_freelist(sb, data, group); |
1149 | ext4_unlock_group(sb, group); |
1150 | |
1151 | /* set incore so that the buddy information can be |
1152 | @@ -886,18 +924,20 @@ static noinline_for_stack int |
1153 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, |
1154 | struct ext4_buddy *e4b) |
1155 | { |
1156 | - struct ext4_sb_info *sbi = EXT4_SB(sb); |
1157 | - struct inode *inode = sbi->s_buddy_cache; |
1158 | int blocks_per_page; |
1159 | int block; |
1160 | int pnum; |
1161 | int poff; |
1162 | struct page *page; |
1163 | int ret; |
1164 | + struct ext4_group_info *grp; |
1165 | + struct ext4_sb_info *sbi = EXT4_SB(sb); |
1166 | + struct inode *inode = sbi->s_buddy_cache; |
1167 | |
1168 | mb_debug("load group %lu\n", group); |
1169 | |
1170 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; |
1171 | + grp = ext4_get_group_info(sb, group); |
1172 | |
1173 | e4b->bd_blkbits = sb->s_blocksize_bits; |
1174 | e4b->bd_info = ext4_get_group_info(sb, group); |
1175 | @@ -905,6 +945,15 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, |
1176 | e4b->bd_group = group; |
1177 | e4b->bd_buddy_page = NULL; |
1178 | e4b->bd_bitmap_page = NULL; |
1179 | + e4b->alloc_semp = &grp->alloc_sem; |
1180 | + |
1181 | + /* Take the read lock on the group alloc |
1182 | + * sem. This would make sure a parallel |
1183 | + * ext4_mb_init_group happening on other |
1184 | + * groups mapped by the page is blocked |
1185 | + * till we are done with allocation |
1186 | + */ |
1187 | + down_read(e4b->alloc_semp); |
1188 | |
1189 | /* |
1190 | * the buddy cache inode stores the block bitmap |
1191 | @@ -920,6 +969,14 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, |
1192 | page = find_get_page(inode->i_mapping, pnum); |
1193 | if (page == NULL || !PageUptodate(page)) { |
1194 | if (page) |
1195 | + /* |
1196 | + * drop the page reference and try |
1197 | + * to get the page with lock. If we |
1198 | + * are not uptodate that implies |
1199 | + * somebody just created the page but |
1200 | + * is yet to initialize the same. So |
1201 | + * wait for it to initialize. |
1202 | + */ |
1203 | page_cache_release(page); |
1204 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); |
1205 | if (page) { |
1206 | @@ -985,6 +1042,9 @@ err: |
1207 | page_cache_release(e4b->bd_buddy_page); |
1208 | e4b->bd_buddy = NULL; |
1209 | e4b->bd_bitmap = NULL; |
1210 | + |
1211 | + /* Done with the buddy cache */ |
1212 | + up_read(e4b->alloc_semp); |
1213 | return ret; |
1214 | } |
1215 | |
1216 | @@ -994,6 +1054,9 @@ static void ext4_mb_release_desc(struct ext4_buddy *e4b) |
1217 | page_cache_release(e4b->bd_bitmap_page); |
1218 | if (e4b->bd_buddy_page) |
1219 | page_cache_release(e4b->bd_buddy_page); |
1220 | + /* Done with the buddy cache */ |
1221 | + if (e4b->alloc_semp) |
1222 | + up_read(e4b->alloc_semp); |
1223 | } |
1224 | |
1225 | |
1226 | @@ -1031,7 +1094,10 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len) |
1227 | cur += 32; |
1228 | continue; |
1229 | } |
1230 | - mb_clear_bit_atomic(lock, cur, bm); |
1231 | + if (lock) |
1232 | + mb_clear_bit_atomic(lock, cur, bm); |
1233 | + else |
1234 | + mb_clear_bit(cur, bm); |
1235 | cur++; |
1236 | } |
1237 | } |
1238 | @@ -1049,7 +1115,10 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) |
1239 | cur += 32; |
1240 | continue; |
1241 | } |
1242 | - mb_set_bit_atomic(lock, cur, bm); |
1243 | + if (lock) |
1244 | + mb_set_bit_atomic(lock, cur, bm); |
1245 | + else |
1246 | + mb_set_bit(cur, bm); |
1247 | cur++; |
1248 | } |
1249 | } |
1250 | @@ -1296,13 +1365,20 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, |
1251 | ac->ac_tail = ret & 0xffff; |
1252 | ac->ac_buddy = ret >> 16; |
1253 | |
1254 | - /* XXXXXXX: SUCH A HORRIBLE **CK */ |
1255 | - /*FIXME!! Why ? */ |
1256 | + /* |
1257 | + * take the page reference. We want the page to be pinned |
1258 | + * so that we don't get a ext4_mb_init_cache_call for this |
1259 | + * group until we update the bitmap. That would mean we |
1260 | + * double allocate blocks. The reference is dropped |
1261 | + * in ext4_mb_release_context |
1262 | + */ |
1263 | ac->ac_bitmap_page = e4b->bd_bitmap_page; |
1264 | get_page(ac->ac_bitmap_page); |
1265 | ac->ac_buddy_page = e4b->bd_buddy_page; |
1266 | get_page(ac->ac_buddy_page); |
1267 | - |
1268 | + /* on allocation we use ac to track the held semaphore */ |
1269 | + ac->alloc_semp = e4b->alloc_semp; |
1270 | + e4b->alloc_semp = NULL; |
1271 | /* store last allocated for subsequent stream allocation */ |
1272 | if ((ac->ac_flags & EXT4_MB_HINT_DATA)) { |
1273 | spin_lock(&sbi->s_md_lock); |
1274 | @@ -1326,6 +1402,8 @@ static void ext4_mb_check_limits(struct ext4_allocation_context *ac, |
1275 | struct ext4_free_extent ex; |
1276 | int max; |
1277 | |
1278 | + if (ac->ac_status == AC_STATUS_FOUND) |
1279 | + return; |
1280 | /* |
1281 | * We don't want to scan for a whole year |
1282 | */ |
1283 | @@ -1692,6 +1770,173 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, |
1284 | return 0; |
1285 | } |
1286 | |
1287 | +/* |
1288 | + * lock the group_info alloc_sem of all the groups |
1289 | + * belonging to the same buddy cache page. This |
1290 | + * make sure other parallel operation on the buddy |
1291 | + * cache doesn't happen whild holding the buddy cache |
1292 | + * lock |
1293 | + */ |
1294 | +int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) |
1295 | +{ |
1296 | + int i; |
1297 | + int block, pnum; |
1298 | + int blocks_per_page; |
1299 | + int groups_per_page; |
1300 | + ext4_group_t first_group; |
1301 | + struct ext4_group_info *grp; |
1302 | + |
1303 | + blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; |
1304 | + /* |
1305 | + * the buddy cache inode stores the block bitmap |
1306 | + * and buddy information in consecutive blocks. |
1307 | + * So for each group we need two blocks. |
1308 | + */ |
1309 | + block = group * 2; |
1310 | + pnum = block / blocks_per_page; |
1311 | + first_group = pnum * blocks_per_page / 2; |
1312 | + |
1313 | + groups_per_page = blocks_per_page >> 1; |
1314 | + if (groups_per_page == 0) |
1315 | + groups_per_page = 1; |
1316 | + /* read all groups the page covers into the cache */ |
1317 | + for (i = 0; i < groups_per_page; i++) { |
1318 | + |
1319 | + if ((first_group + i) >= EXT4_SB(sb)->s_groups_count) |
1320 | + break; |
1321 | + grp = ext4_get_group_info(sb, first_group + i); |
1322 | + /* take all groups write allocation |
1323 | + * semaphore. This make sure there is |
1324 | + * no block allocation going on in any |
1325 | + * of that groups |
1326 | + */ |
1327 | + down_write(&grp->alloc_sem); |
1328 | + } |
1329 | + return i; |
1330 | +} |
1331 | + |
1332 | +void ext4_mb_put_buddy_cache_lock(struct super_block *sb, |
1333 | + ext4_group_t group, int locked_group) |
1334 | +{ |
1335 | + int i; |
1336 | + int block, pnum; |
1337 | + int blocks_per_page; |
1338 | + ext4_group_t first_group; |
1339 | + struct ext4_group_info *grp; |
1340 | + |
1341 | + blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; |
1342 | + /* |
1343 | + * the buddy cache inode stores the block bitmap |
1344 | + * and buddy information in consecutive blocks. |
1345 | + * So for each group we need two blocks. |
1346 | + */ |
1347 | + block = group * 2; |
1348 | + pnum = block / blocks_per_page; |
1349 | + first_group = pnum * blocks_per_page / 2; |
1350 | + /* release locks on all the groups */ |
1351 | + for (i = 0; i < locked_group; i++) { |
1352 | + |
1353 | + grp = ext4_get_group_info(sb, first_group + i); |
1354 | + /* take all groups write allocation |
1355 | + * semaphore. This make sure there is |
1356 | + * no block allocation going on in any |
1357 | + * of that groups |
1358 | + */ |
1359 | + up_write(&grp->alloc_sem); |
1360 | + } |
1361 | + |
1362 | +} |
1363 | + |
1364 | +static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) |
1365 | +{ |
1366 | + |
1367 | + int ret; |
1368 | + void *bitmap; |
1369 | + int blocks_per_page; |
1370 | + int block, pnum, poff; |
1371 | + int num_grp_locked = 0; |
1372 | + struct ext4_group_info *this_grp; |
1373 | + struct ext4_sb_info *sbi = EXT4_SB(sb); |
1374 | + struct inode *inode = sbi->s_buddy_cache; |
1375 | + struct page *page = NULL, *bitmap_page = NULL; |
1376 | + |
1377 | + mb_debug("init group %lu\n", group); |
1378 | + blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; |
1379 | + this_grp = ext4_get_group_info(sb, group); |
1380 | + /* |
1381 | + * This ensures we don't add group |
1382 | + * to this buddy cache via resize |
1383 | + */ |
1384 | + num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); |
1385 | + if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { |
1386 | + /* |
1387 | + * somebody initialized the group |
1388 | + * return without doing anything |
1389 | + */ |
1390 | + ret = 0; |
1391 | + goto err; |
1392 | + } |
1393 | + /* |
1394 | + * the buddy cache inode stores the block bitmap |
1395 | + * and buddy information in consecutive blocks. |
1396 | + * So for each group we need two blocks. |
1397 | + */ |
1398 | + block = group * 2; |
1399 | + pnum = block / blocks_per_page; |
1400 | + poff = block % blocks_per_page; |
1401 | + page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); |
1402 | + if (page) { |
1403 | + BUG_ON(page->mapping != inode->i_mapping); |
1404 | + ret = ext4_mb_init_cache(page, NULL); |
1405 | + if (ret) { |
1406 | + unlock_page(page); |
1407 | + goto err; |
1408 | + } |
1409 | + unlock_page(page); |
1410 | + } |
1411 | + if (page == NULL || !PageUptodate(page)) { |
1412 | + ret = -EIO; |
1413 | + goto err; |
1414 | + } |
1415 | + mark_page_accessed(page); |
1416 | + bitmap_page = page; |
1417 | + bitmap = page_address(page) + (poff * sb->s_blocksize); |
1418 | + |
1419 | + /* init buddy cache */ |
1420 | + block++; |
1421 | + pnum = block / blocks_per_page; |
1422 | + poff = block % blocks_per_page; |
1423 | + page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); |
1424 | + if (page == bitmap_page) { |
1425 | + /* |
1426 | + * If both the bitmap and buddy are in |
1427 | + * the same page we don't need to force |
1428 | + * init the buddy |
1429 | + */ |
1430 | + unlock_page(page); |
1431 | + } else if (page) { |
1432 | + BUG_ON(page->mapping != inode->i_mapping); |
1433 | + ret = ext4_mb_init_cache(page, bitmap); |
1434 | + if (ret) { |
1435 | + unlock_page(page); |
1436 | + goto err; |
1437 | + } |
1438 | + unlock_page(page); |
1439 | + } |
1440 | + if (page == NULL || !PageUptodate(page)) { |
1441 | + ret = -EIO; |
1442 | + goto err; |
1443 | + } |
1444 | + mark_page_accessed(page); |
1445 | +err: |
1446 | + ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); |
1447 | + if (bitmap_page) |
1448 | + page_cache_release(bitmap_page); |
1449 | + if (page) |
1450 | + page_cache_release(page); |
1451 | + return ret; |
1452 | +} |
1453 | + |
1454 | static noinline_for_stack int |
1455 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) |
1456 | { |
1457 | @@ -1775,7 +2020,7 @@ repeat: |
1458 | group = 0; |
1459 | |
1460 | /* quick check to skip empty groups */ |
1461 | - grp = ext4_get_group_info(ac->ac_sb, group); |
1462 | + grp = ext4_get_group_info(sb, group); |
1463 | if (grp->bb_free == 0) |
1464 | continue; |
1465 | |
1466 | @@ -1788,10 +2033,9 @@ repeat: |
1467 | * we need full data about the group |
1468 | * to make a good selection |
1469 | */ |
1470 | - err = ext4_mb_load_buddy(sb, group, &e4b); |
1471 | + err = ext4_mb_init_group(sb, group); |
1472 | if (err) |
1473 | goto out; |
1474 | - ext4_mb_release_desc(&e4b); |
1475 | } |
1476 | |
1477 | /* |
1478 | @@ -2299,6 +2543,8 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, |
1479 | } |
1480 | |
1481 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
1482 | + init_rwsem(&meta_group_info[i]->alloc_sem); |
1483 | + meta_group_info[i]->bb_free_root.rb_node = NULL;; |
1484 | |
1485 | #ifdef DOUBLE_CHECK |
1486 | { |
1487 | @@ -2325,54 +2571,6 @@ exit_meta_group_info: |
1488 | } /* ext4_mb_add_groupinfo */ |
1489 | |
1490 | /* |
1491 | - * Add a group to the existing groups. |
1492 | - * This function is used for online resize |
1493 | - */ |
1494 | -int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group, |
1495 | - struct ext4_group_desc *desc) |
1496 | -{ |
1497 | - struct ext4_sb_info *sbi = EXT4_SB(sb); |
1498 | - struct inode *inode = sbi->s_buddy_cache; |
1499 | - int blocks_per_page; |
1500 | - int block; |
1501 | - int pnum; |
1502 | - struct page *page; |
1503 | - int err; |
1504 | - |
1505 | - /* Add group based on group descriptor*/ |
1506 | - err = ext4_mb_add_groupinfo(sb, group, desc); |
1507 | - if (err) |
1508 | - return err; |
1509 | - |
1510 | - /* |
1511 | - * Cache pages containing dynamic mb_alloc datas (buddy and bitmap |
1512 | - * datas) are set not up to date so that they will be re-initilaized |
1513 | - * during the next call to ext4_mb_load_buddy |
1514 | - */ |
1515 | - |
1516 | - /* Set buddy page as not up to date */ |
1517 | - blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; |
1518 | - block = group * 2; |
1519 | - pnum = block / blocks_per_page; |
1520 | - page = find_get_page(inode->i_mapping, pnum); |
1521 | - if (page != NULL) { |
1522 | - ClearPageUptodate(page); |
1523 | - page_cache_release(page); |
1524 | - } |
1525 | - |
1526 | - /* Set bitmap page as not up to date */ |
1527 | - block++; |
1528 | - pnum = block / blocks_per_page; |
1529 | - page = find_get_page(inode->i_mapping, pnum); |
1530 | - if (page != NULL) { |
1531 | - ClearPageUptodate(page); |
1532 | - page_cache_release(page); |
1533 | - } |
1534 | - |
1535 | - return 0; |
1536 | -} |
1537 | - |
1538 | -/* |
1539 | * Update an existing group. |
1540 | * This function is used for online resize |
1541 | */ |
1542 | @@ -2495,6 +2693,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) |
1543 | clear_opt(sbi->s_mount_opt, MBALLOC); |
1544 | return -ENOMEM; |
1545 | } |
1546 | + |
1547 | + i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int); |
1548 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
1549 | if (sbi->s_mb_maxs == NULL) { |
1550 | clear_opt(sbi->s_mount_opt, MBALLOC); |
1551 | @@ -2658,13 +2858,11 @@ int ext4_mb_release(struct super_block *sb) |
1552 | static noinline_for_stack void |
1553 | ext4_mb_free_committed_blocks(struct super_block *sb) |
1554 | { |
1555 | - struct ext4_sb_info *sbi = EXT4_SB(sb); |
1556 | - int err; |
1557 | - int i; |
1558 | - int count = 0; |
1559 | - int count2 = 0; |
1560 | - struct ext4_free_metadata *md; |
1561 | struct ext4_buddy e4b; |
1562 | + struct ext4_group_info *db; |
1563 | + struct ext4_sb_info *sbi = EXT4_SB(sb); |
1564 | + int err, count = 0, count2 = 0; |
1565 | + struct ext4_free_data *entry; |
1566 | |
1567 | if (list_empty(&sbi->s_committed_transaction)) |
1568 | return; |
1569 | @@ -2672,44 +2870,46 @@ ext4_mb_free_committed_blocks(struct super_block *sb) |
1570 | /* there is committed blocks to be freed yet */ |
1571 | do { |
1572 | /* get next array of blocks */ |
1573 | - md = NULL; |
1574 | + entry = NULL; |
1575 | spin_lock(&sbi->s_md_lock); |
1576 | if (!list_empty(&sbi->s_committed_transaction)) { |
1577 | - md = list_entry(sbi->s_committed_transaction.next, |
1578 | - struct ext4_free_metadata, list); |
1579 | - list_del(&md->list); |
1580 | + entry = list_entry(sbi->s_committed_transaction.next, |
1581 | + struct ext4_free_data, list); |
1582 | + list_del(&entry->list); |
1583 | } |
1584 | spin_unlock(&sbi->s_md_lock); |
1585 | |
1586 | - if (md == NULL) |
1587 | + if (entry == NULL) |
1588 | break; |
1589 | |
1590 | mb_debug("gonna free %u blocks in group %lu (0x%p):", |
1591 | - md->num, md->group, md); |
1592 | + entry->count, entry->group, entry); |
1593 | |
1594 | - err = ext4_mb_load_buddy(sb, md->group, &e4b); |
1595 | + err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
1596 | /* we expect to find existing buddy because it's pinned */ |
1597 | BUG_ON(err != 0); |
1598 | |
1599 | + db = e4b.bd_info; |
1600 | /* there are blocks to put in buddy to make them really free */ |
1601 | - count += md->num; |
1602 | + count += entry->count; |
1603 | count2++; |
1604 | - ext4_lock_group(sb, md->group); |
1605 | - for (i = 0; i < md->num; i++) { |
1606 | - mb_debug(" %u", md->blocks[i]); |
1607 | - mb_free_blocks(NULL, &e4b, md->blocks[i], 1); |
1608 | + ext4_lock_group(sb, entry->group); |
1609 | + /* Take it out of per group rb tree */ |
1610 | + rb_erase(&entry->node, &(db->bb_free_root)); |
1611 | + mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); |
1612 | + |
1613 | + if (!db->bb_free_root.rb_node) { |
1614 | + /* No more items in the per group rb tree |
1615 | + * balance refcounts from ext4_mb_free_metadata() |
1616 | + */ |
1617 | + page_cache_release(e4b.bd_buddy_page); |
1618 | + page_cache_release(e4b.bd_bitmap_page); |
1619 | } |
1620 | - mb_debug("\n"); |
1621 | - ext4_unlock_group(sb, md->group); |
1622 | - |
1623 | - /* balance refcounts from ext4_mb_free_metadata() */ |
1624 | - page_cache_release(e4b.bd_buddy_page); |
1625 | - page_cache_release(e4b.bd_bitmap_page); |
1626 | + ext4_unlock_group(sb, entry->group); |
1627 | |
1628 | - kfree(md); |
1629 | + kmem_cache_free(ext4_free_ext_cachep, entry); |
1630 | ext4_mb_release_desc(&e4b); |
1631 | - |
1632 | - } while (md); |
1633 | + } while (1); |
1634 | |
1635 | mb_debug("freed %u blocks in %u structures\n", count, count2); |
1636 | } |
1637 | @@ -2864,6 +3064,16 @@ int __init init_ext4_mballoc(void) |
1638 | kmem_cache_destroy(ext4_pspace_cachep); |
1639 | return -ENOMEM; |
1640 | } |
1641 | + |
1642 | + ext4_free_ext_cachep = |
1643 | + kmem_cache_create("ext4_free_block_extents", |
1644 | + sizeof(struct ext4_free_data), |
1645 | + 0, SLAB_RECLAIM_ACCOUNT, NULL); |
1646 | + if (ext4_free_ext_cachep == NULL) { |
1647 | + kmem_cache_destroy(ext4_pspace_cachep); |
1648 | + kmem_cache_destroy(ext4_ac_cachep); |
1649 | + return -ENOMEM; |
1650 | + } |
1651 | #ifdef CONFIG_PROC_FS |
1652 | proc_root_ext4 = proc_mkdir("fs/ext4", NULL); |
1653 | if (proc_root_ext4 == NULL) |
1654 | @@ -2880,6 +3090,7 @@ void exit_ext4_mballoc(void) |
1655 | #ifdef CONFIG_PROC_FS |
1656 | remove_proc_entry("fs/ext4", NULL); |
1657 | #endif |
1658 | + kmem_cache_destroy(ext4_free_ext_cachep); |
1659 | } |
1660 | |
1661 | |
1662 | @@ -2941,8 +3152,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, |
1663 | in_range(block + len - 1, ext4_inode_table(sb, gdp), |
1664 | EXT4_SB(sb)->s_itb_per_group)) { |
1665 | ext4_error(sb, __func__, |
1666 | - "Allocating block in system zone - block = %llu", |
1667 | - block); |
1668 | + "Allocating block %llu in system zone of %lu group\n", |
1669 | + block, ac->ac_b_ex.fe_group); |
1670 | /* File system mounted not to panic on error |
1671 | * Fix the bitmap and repeat the block allocation |
1672 | * We leak some of the blocks here. |
1673 | @@ -2964,10 +3175,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, |
1674 | } |
1675 | } |
1676 | #endif |
1677 | - mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), bitmap_bh->b_data, |
1678 | - ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); |
1679 | - |
1680 | spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); |
1681 | + mb_set_bits(NULL, bitmap_bh->b_data, |
1682 | + ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); |
1683 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
1684 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); |
1685 | gdp->bg_free_blocks_count = |
1686 | @@ -3400,10 +3610,37 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) |
1687 | ac->ac_criteria = 20; |
1688 | return 1; |
1689 | } |
1690 | + |
1691 | return 0; |
1692 | } |
1693 | |
1694 | /* |
1695 | + * the function goes through all block freed in the group |
1696 | + * but not yet committed and marks them used in in-core bitmap. |
1697 | + * buddy must be generated from this bitmap |
1698 | + * Need to be called with ext4 group lock (ext4_lock_group) |
1699 | + */ |
1700 | +static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, |
1701 | + ext4_group_t group) |
1702 | +{ |
1703 | + struct rb_node *n; |
1704 | + struct ext4_group_info *grp; |
1705 | + struct ext4_free_data *entry; |
1706 | + |
1707 | + grp = ext4_get_group_info(sb, group); |
1708 | + n = rb_first(&(grp->bb_free_root)); |
1709 | + |
1710 | + while (n) { |
1711 | + entry = rb_entry(n, struct ext4_free_data, node); |
1712 | + mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), |
1713 | + bitmap, entry->start_blk, |
1714 | + entry->count); |
1715 | + n = rb_next(n); |
1716 | + } |
1717 | + return; |
1718 | +} |
1719 | + |
1720 | +/* |
1721 | * the function goes through all preallocation in this group and marks them |
1722 | * used in in-core bitmap. buddy must be generated from this bitmap |
1723 | * Need to be called with ext4 group lock (ext4_lock_group) |
1724 | @@ -4166,6 +4403,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, |
1725 | ac->ac_pa = NULL; |
1726 | ac->ac_bitmap_page = NULL; |
1727 | ac->ac_buddy_page = NULL; |
1728 | + ac->alloc_semp = NULL; |
1729 | ac->ac_lg = NULL; |
1730 | |
1731 | /* we have to define context: we'll we work with a file or |
1732 | @@ -4346,6 +4584,8 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) |
1733 | } |
1734 | ext4_mb_put_pa(ac, ac->ac_sb, pa); |
1735 | } |
1736 | + if (ac->alloc_semp) |
1737 | + up_read(ac->alloc_semp); |
1738 | if (ac->ac_bitmap_page) |
1739 | page_cache_release(ac->ac_bitmap_page); |
1740 | if (ac->ac_buddy_page) |
1741 | @@ -4449,10 +4689,14 @@ repeat: |
1742 | ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) |
1743 | ext4_mb_new_preallocation(ac); |
1744 | } |
1745 | - |
1746 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { |
1747 | *errp = ext4_mb_mark_diskspace_used(ac, handle); |
1748 | if (*errp == -EAGAIN) { |
1749 | + /* |
1750 | + * drop the reference that we took |
1751 | + * in ext4_mb_use_best_found |
1752 | + */ |
1753 | + ext4_mb_release_context(ac); |
1754 | ac->ac_b_ex.fe_group = 0; |
1755 | ac->ac_b_ex.fe_start = 0; |
1756 | ac->ac_b_ex.fe_len = 0; |
1757 | @@ -4517,65 +4761,97 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb, |
1758 | ext4_mb_free_committed_blocks(sb); |
1759 | } |
1760 | |
1761 | +/* |
1762 | + * We can merge two free data extents only if the physical blocks |
1763 | + * are contiguous, AND the extents were freed by the same transaction, |
1764 | + * AND the blocks are associated with the same group. |
1765 | + */ |
1766 | +static int can_merge(struct ext4_free_data *entry1, |
1767 | + struct ext4_free_data *entry2) |
1768 | +{ |
1769 | + if ((entry1->t_tid == entry2->t_tid) && |
1770 | + (entry1->group == entry2->group) && |
1771 | + ((entry1->start_blk + entry1->count) == entry2->start_blk)) |
1772 | + return 1; |
1773 | + return 0; |
1774 | +} |
1775 | + |
1776 | static noinline_for_stack int |
1777 | ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, |
1778 | - ext4_group_t group, ext4_grpblk_t block, int count) |
1779 | + struct ext4_free_data *new_entry) |
1780 | { |
1781 | + ext4_grpblk_t block; |
1782 | + struct ext4_free_data *entry; |
1783 | struct ext4_group_info *db = e4b->bd_info; |
1784 | struct super_block *sb = e4b->bd_sb; |
1785 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1786 | - struct ext4_free_metadata *md; |
1787 | - int i; |
1788 | + struct rb_node **n = &db->bb_free_root.rb_node, *node; |
1789 | + struct rb_node *parent = NULL, *new_node; |
1790 | |
1791 | BUG_ON(e4b->bd_bitmap_page == NULL); |
1792 | BUG_ON(e4b->bd_buddy_page == NULL); |
1793 | |
1794 | - ext4_lock_group(sb, group); |
1795 | - for (i = 0; i < count; i++) { |
1796 | - md = db->bb_md_cur; |
1797 | - if (md && db->bb_tid != handle->h_transaction->t_tid) { |
1798 | - db->bb_md_cur = NULL; |
1799 | - md = NULL; |
1800 | + new_node = &new_entry->node; |
1801 | + block = new_entry->start_blk; |
1802 | + |
1803 | + if (!*n) { |
1804 | + /* first free block exent. We need to |
1805 | + protect buddy cache from being freed, |
1806 | + * otherwise we'll refresh it from |
1807 | + * on-disk bitmap and lose not-yet-available |
1808 | + * blocks */ |
1809 | + page_cache_get(e4b->bd_buddy_page); |
1810 | + page_cache_get(e4b->bd_bitmap_page); |
1811 | + } |
1812 | + while (*n) { |
1813 | + parent = *n; |
1814 | + entry = rb_entry(parent, struct ext4_free_data, node); |
1815 | + if (block < entry->start_blk) |
1816 | + n = &(*n)->rb_left; |
1817 | + else if (block >= (entry->start_blk + entry->count)) |
1818 | + n = &(*n)->rb_right; |
1819 | + else { |
1820 | + ext4_error(sb, __func__, |
1821 | + "Double free of blocks %d (%d %d)\n", |
1822 | + block, entry->start_blk, entry->count); |
1823 | + return 0; |
1824 | } |
1825 | + } |
1826 | |
1827 | - if (md == NULL) { |
1828 | - ext4_unlock_group(sb, group); |
1829 | - md = kmalloc(sizeof(*md), GFP_NOFS); |
1830 | - if (md == NULL) |
1831 | - return -ENOMEM; |
1832 | - md->num = 0; |
1833 | - md->group = group; |
1834 | - |
1835 | - ext4_lock_group(sb, group); |
1836 | - if (db->bb_md_cur == NULL) { |
1837 | - spin_lock(&sbi->s_md_lock); |
1838 | - list_add(&md->list, &sbi->s_active_transaction); |
1839 | - spin_unlock(&sbi->s_md_lock); |
1840 | - /* protect buddy cache from being freed, |
1841 | - * otherwise we'll refresh it from |
1842 | - * on-disk bitmap and lose not-yet-available |
1843 | - * blocks */ |
1844 | - page_cache_get(e4b->bd_buddy_page); |
1845 | - page_cache_get(e4b->bd_bitmap_page); |
1846 | - db->bb_md_cur = md; |
1847 | - db->bb_tid = handle->h_transaction->t_tid; |
1848 | - mb_debug("new md 0x%p for group %lu\n", |
1849 | - md, md->group); |
1850 | - } else { |
1851 | - kfree(md); |
1852 | - md = db->bb_md_cur; |
1853 | - } |
1854 | + rb_link_node(new_node, parent, n); |
1855 | + rb_insert_color(new_node, &db->bb_free_root); |
1856 | + |
1857 | + /* Now try to see the extent can be merged to left and right */ |
1858 | + node = rb_prev(new_node); |
1859 | + if (node) { |
1860 | + entry = rb_entry(node, struct ext4_free_data, node); |
1861 | + if (can_merge(entry, new_entry)) { |
1862 | + new_entry->start_blk = entry->start_blk; |
1863 | + new_entry->count += entry->count; |
1864 | + rb_erase(node, &(db->bb_free_root)); |
1865 | + spin_lock(&sbi->s_md_lock); |
1866 | + list_del(&entry->list); |
1867 | + spin_unlock(&sbi->s_md_lock); |
1868 | + kmem_cache_free(ext4_free_ext_cachep, entry); |
1869 | } |
1870 | + } |
1871 | |
1872 | - BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS); |
1873 | - md->blocks[md->num] = block + i; |
1874 | - md->num++; |
1875 | - if (md->num == EXT4_BB_MAX_BLOCKS) { |
1876 | - /* no more space, put full container on a sb's list */ |
1877 | - db->bb_md_cur = NULL; |
1878 | + node = rb_next(new_node); |
1879 | + if (node) { |
1880 | + entry = rb_entry(node, struct ext4_free_data, node); |
1881 | + if (can_merge(new_entry, entry)) { |
1882 | + new_entry->count += entry->count; |
1883 | + rb_erase(node, &(db->bb_free_root)); |
1884 | + spin_lock(&sbi->s_md_lock); |
1885 | + list_del(&entry->list); |
1886 | + spin_unlock(&sbi->s_md_lock); |
1887 | + kmem_cache_free(ext4_free_ext_cachep, entry); |
1888 | } |
1889 | } |
1890 | - ext4_unlock_group(sb, group); |
1891 | + /* Add the extent to active_transaction list */ |
1892 | + spin_lock(&sbi->s_md_lock); |
1893 | + list_add(&new_entry->list, &sbi->s_active_transaction); |
1894 | + spin_unlock(&sbi->s_md_lock); |
1895 | return 0; |
1896 | } |
1897 | |
1898 | @@ -4675,11 +4951,6 @@ do_more: |
1899 | err = ext4_journal_get_write_access(handle, gd_bh); |
1900 | if (err) |
1901 | goto error_return; |
1902 | - |
1903 | - err = ext4_mb_load_buddy(sb, block_group, &e4b); |
1904 | - if (err) |
1905 | - goto error_return; |
1906 | - |
1907 | #ifdef AGGRESSIVE_CHECK |
1908 | { |
1909 | int i; |
1910 | @@ -4687,13 +4958,6 @@ do_more: |
1911 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); |
1912 | } |
1913 | #endif |
1914 | - mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, |
1915 | - bit, count); |
1916 | - |
1917 | - /* We dirtied the bitmap block */ |
1918 | - BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); |
1919 | - err = ext4_journal_dirty_metadata(handle, bitmap_bh); |
1920 | - |
1921 | if (ac) { |
1922 | ac->ac_b_ex.fe_group = block_group; |
1923 | ac->ac_b_ex.fe_start = bit; |
1924 | @@ -4701,12 +4965,33 @@ do_more: |
1925 | ext4_mb_store_history(ac); |
1926 | } |
1927 | |
1928 | + err = ext4_mb_load_buddy(sb, block_group, &e4b); |
1929 | + if (err) |
1930 | + goto error_return; |
1931 | if (metadata) { |
1932 | - /* blocks being freed are metadata. these blocks shouldn't |
1933 | - * be used until this transaction is committed */ |
1934 | - ext4_mb_free_metadata(handle, &e4b, block_group, bit, count); |
1935 | + struct ext4_free_data *new_entry; |
1936 | + /* |
1937 | + * blocks being freed are metadata. these blocks shouldn't |
1938 | + * be used until this transaction is committed |
1939 | + */ |
1940 | + new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); |
1941 | + new_entry->start_blk = bit; |
1942 | + new_entry->group = block_group; |
1943 | + new_entry->count = count; |
1944 | + new_entry->t_tid = handle->h_transaction->t_tid; |
1945 | + ext4_lock_group(sb, block_group); |
1946 | + mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, |
1947 | + bit, count); |
1948 | + ext4_mb_free_metadata(handle, &e4b, new_entry); |
1949 | + ext4_unlock_group(sb, block_group); |
1950 | } else { |
1951 | ext4_lock_group(sb, block_group); |
1952 | + /* need to update group_info->bb_free and bitmap |
1953 | + * with group lock held. generate_buddy look at |
1954 | + * them with group lock_held |
1955 | + */ |
1956 | + mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, |
1957 | + bit, count); |
1958 | mb_free_blocks(inode, &e4b, bit, count); |
1959 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); |
1960 | ext4_unlock_group(sb, block_group); |
1961 | @@ -4729,6 +5014,10 @@ do_more: |
1962 | |
1963 | *freed += count; |
1964 | |
1965 | + /* We dirtied the bitmap block */ |
1966 | + BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); |
1967 | + err = ext4_journal_dirty_metadata(handle, bitmap_bh); |
1968 | + |
1969 | /* And the group descriptor block */ |
1970 | BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); |
1971 | ret = ext4_journal_dirty_metadata(handle, gd_bh); |
1972 | diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h |
1973 | index c7c9906..0a28dd3 100644 |
1974 | --- a/fs/ext4/mballoc.h |
1975 | +++ b/fs/ext4/mballoc.h |
1976 | @@ -18,6 +18,7 @@ |
1977 | #include <linux/pagemap.h> |
1978 | #include <linux/seq_file.h> |
1979 | #include <linux/version.h> |
1980 | +#include <linux/mutex.h> |
1981 | #include "ext4_jbd2.h" |
1982 | #include "ext4.h" |
1983 | #include "group.h" |
1984 | @@ -96,25 +97,27 @@ |
1985 | */ |
1986 | #define MB_DEFAULT_GROUP_PREALLOC 512 |
1987 | |
1988 | -static struct kmem_cache *ext4_pspace_cachep; |
1989 | -static struct kmem_cache *ext4_ac_cachep; |
1990 | +struct ext4_free_data { |
1991 | + /* this links the free block information from group_info */ |
1992 | + struct rb_node node; |
1993 | |
1994 | -#ifdef EXT4_BB_MAX_BLOCKS |
1995 | -#undef EXT4_BB_MAX_BLOCKS |
1996 | -#endif |
1997 | -#define EXT4_BB_MAX_BLOCKS 30 |
1998 | + /* this links the free block information from ext4_sb_info */ |
1999 | + struct list_head list; |
2000 | |
2001 | -struct ext4_free_metadata { |
2002 | + /* group which free block extent belongs */ |
2003 | ext4_group_t group; |
2004 | - unsigned short num; |
2005 | - ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS]; |
2006 | - struct list_head list; |
2007 | + |
2008 | + /* free block extent */ |
2009 | + ext4_grpblk_t start_blk; |
2010 | + ext4_grpblk_t count; |
2011 | + |
2012 | + /* transaction which freed this extent */ |
2013 | + tid_t t_tid; |
2014 | }; |
2015 | |
2016 | struct ext4_group_info { |
2017 | unsigned long bb_state; |
2018 | - unsigned long bb_tid; |
2019 | - struct ext4_free_metadata *bb_md_cur; |
2020 | + struct rb_root bb_free_root; |
2021 | unsigned short bb_first_free; |
2022 | unsigned short bb_free; |
2023 | unsigned short bb_fragments; |
2024 | @@ -122,6 +125,7 @@ struct ext4_group_info { |
2025 | #ifdef DOUBLE_CHECK |
2026 | void *bb_bitmap; |
2027 | #endif |
2028 | + struct rw_semaphore alloc_sem; |
2029 | unsigned short bb_counters[]; |
2030 | }; |
2031 | |
2032 | @@ -209,6 +213,11 @@ struct ext4_allocation_context { |
2033 | __u8 ac_op; /* operation, for history only */ |
2034 | struct page *ac_bitmap_page; |
2035 | struct page *ac_buddy_page; |
2036 | + /* |
2037 | + * pointer to the held semaphore upon successful |
2038 | + * block allocation |
2039 | + */ |
2040 | + struct rw_semaphore *alloc_semp; |
2041 | struct ext4_prealloc_space *ac_pa; |
2042 | struct ext4_locality_group *ac_lg; |
2043 | }; |
2044 | @@ -242,6 +251,7 @@ struct ext4_buddy { |
2045 | struct super_block *bd_sb; |
2046 | __u16 bd_blkbits; |
2047 | ext4_group_t bd_group; |
2048 | + struct rw_semaphore *alloc_semp; |
2049 | }; |
2050 | #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) |
2051 | #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) |
2052 | @@ -251,8 +261,6 @@ static inline void ext4_mb_store_history(struct ext4_allocation_context *ac) |
2053 | { |
2054 | return; |
2055 | } |
2056 | -#else |
2057 | -static void ext4_mb_store_history(struct ext4_allocation_context *ac); |
2058 | #endif |
2059 | |
2060 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) |
2061 | @@ -260,19 +268,6 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac); |
2062 | static struct proc_dir_entry *proc_root_ext4; |
2063 | struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); |
2064 | |
2065 | -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
2066 | - ext4_group_t group); |
2067 | -static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *); |
2068 | -static void ext4_mb_free_committed_blocks(struct super_block *); |
2069 | -static void ext4_mb_return_to_preallocation(struct inode *inode, |
2070 | - struct ext4_buddy *e4b, sector_t block, |
2071 | - int count); |
2072 | -static void ext4_mb_put_pa(struct ext4_allocation_context *, |
2073 | - struct super_block *, struct ext4_prealloc_space *pa); |
2074 | -static int ext4_mb_init_per_dev_proc(struct super_block *sb); |
2075 | -static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); |
2076 | - |
2077 | - |
2078 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) |
2079 | { |
2080 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); |
2081 | @@ -297,7 +292,7 @@ static inline int ext4_is_group_locked(struct super_block *sb, |
2082 | &(grinfo->bb_state)); |
2083 | } |
2084 | |
2085 | -static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, |
2086 | +static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, |
2087 | struct ext4_free_extent *fex) |
2088 | { |
2089 | ext4_fsblk_t block; |
2090 | diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c |
2091 | index d626533..4f3628f 100644 |
2092 | --- a/fs/ext4/namei.c |
2093 | +++ b/fs/ext4/namei.c |
2094 | @@ -371,6 +371,8 @@ dx_probe(struct dentry *dentry, struct inode *dir, |
2095 | goto fail; |
2096 | } |
2097 | hinfo->hash_version = root->info.hash_version; |
2098 | + if (hinfo->hash_version <= DX_HASH_TEA) |
2099 | + hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; |
2100 | hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; |
2101 | if (dentry) |
2102 | ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); |
2103 | @@ -640,6 +642,9 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, |
2104 | dir = dir_file->f_path.dentry->d_inode; |
2105 | if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { |
2106 | hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; |
2107 | + if (hinfo.hash_version <= DX_HASH_TEA) |
2108 | + hinfo.hash_version += |
2109 | + EXT4_SB(dir->i_sb)->s_hash_unsigned; |
2110 | hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; |
2111 | count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo, |
2112 | start_hash, start_minor_hash); |
2113 | @@ -1377,7 +1382,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, |
2114 | struct fake_dirent *fde; |
2115 | |
2116 | blocksize = dir->i_sb->s_blocksize; |
2117 | - dxtrace(printk("Creating index\n")); |
2118 | + dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); |
2119 | retval = ext4_journal_get_write_access(handle, bh); |
2120 | if (retval) { |
2121 | ext4_std_error(dir->i_sb, retval); |
2122 | @@ -1386,6 +1391,20 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, |
2123 | } |
2124 | root = (struct dx_root *) bh->b_data; |
2125 | |
2126 | + /* The 0th block becomes the root, move the dirents out */ |
2127 | + fde = &root->dotdot; |
2128 | + de = (struct ext4_dir_entry_2 *)((char *)fde + |
2129 | + ext4_rec_len_from_disk(fde->rec_len)); |
2130 | + if ((char *) de >= (((char *) root) + blocksize)) { |
2131 | + ext4_error(dir->i_sb, __func__, |
2132 | + "invalid rec_len for '..' in inode %lu", |
2133 | + dir->i_ino); |
2134 | + brelse(bh); |
2135 | + return -EIO; |
2136 | + } |
2137 | + len = ((char *) root) + blocksize - (char *) de; |
2138 | + |
2139 | + /* Allocate new block for the 0th block's dirents */ |
2140 | bh2 = ext4_append (handle, dir, &block, &retval); |
2141 | if (!(bh2)) { |
2142 | brelse(bh); |
2143 | @@ -1394,11 +1413,6 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, |
2144 | EXT4_I(dir)->i_flags |= EXT4_INDEX_FL; |
2145 | data1 = bh2->b_data; |
2146 | |
2147 | - /* The 0th block becomes the root, move the dirents out */ |
2148 | - fde = &root->dotdot; |
2149 | - de = (struct ext4_dir_entry_2 *)((char *)fde + |
2150 | - ext4_rec_len_from_disk(fde->rec_len)); |
2151 | - len = ((char *) root) + blocksize - (char *) de; |
2152 | memcpy (data1, de, len); |
2153 | de = (struct ext4_dir_entry_2 *) data1; |
2154 | top = data1 + len; |
2155 | @@ -1418,6 +1432,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, |
2156 | |
2157 | /* Initialize as for dx_probe */ |
2158 | hinfo.hash_version = root->info.hash_version; |
2159 | + if (hinfo.hash_version <= DX_HASH_TEA) |
2160 | + hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; |
2161 | hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; |
2162 | ext4fs_dirhash(name, namelen, &hinfo); |
2163 | frame = frames; |
2164 | diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c |
2165 | index 3922a8b..0070431 100644 |
2166 | --- a/fs/ext4/resize.c |
2167 | +++ b/fs/ext4/resize.c |
2168 | @@ -284,11 +284,9 @@ static int setup_new_group_blocks(struct super_block *sb, |
2169 | if ((err = extend_or_restart_transaction(handle, 2, bh))) |
2170 | goto exit_bh; |
2171 | |
2172 | - mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb), |
2173 | - bh->b_data); |
2174 | + mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data); |
2175 | ext4_journal_dirty_metadata(handle, bh); |
2176 | brelse(bh); |
2177 | - |
2178 | /* Mark unused entries in inode bitmap used */ |
2179 | ext4_debug("clear inode bitmap %#04llx (+%llu)\n", |
2180 | input->inode_bitmap, input->inode_bitmap - start); |
2181 | @@ -297,7 +295,7 @@ static int setup_new_group_blocks(struct super_block *sb, |
2182 | goto exit_journal; |
2183 | } |
2184 | |
2185 | - mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), |
2186 | + mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, |
2187 | bh->b_data); |
2188 | ext4_journal_dirty_metadata(handle, bh); |
2189 | exit_bh: |
2190 | @@ -747,6 +745,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) |
2191 | struct inode *inode = NULL; |
2192 | handle_t *handle; |
2193 | int gdb_off, gdb_num; |
2194 | + int num_grp_locked = 0; |
2195 | int err, err2; |
2196 | |
2197 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); |
2198 | @@ -787,6 +786,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) |
2199 | } |
2200 | } |
2201 | |
2202 | + |
2203 | if ((err = verify_group_input(sb, input))) |
2204 | goto exit_put; |
2205 | |
2206 | @@ -855,15 +855,18 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) |
2207 | * using the new disk blocks. |
2208 | */ |
2209 | |
2210 | + num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, input->group); |
2211 | /* Update group descriptor block for new group */ |
2212 | gdp = (struct ext4_group_desc *)((char *)primary->b_data + |
2213 | gdb_off * EXT4_DESC_SIZE(sb)); |
2214 | |
2215 | + memset(gdp, 0, EXT4_DESC_SIZE(sb)); |
2216 | ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ |
2217 | ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ |
2218 | ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ |
2219 | gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); |
2220 | gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb)); |
2221 | + gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); |
2222 | gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); |
2223 | |
2224 | /* |
2225 | @@ -871,9 +874,11 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) |
2226 | * descriptor |
2227 | */ |
2228 | if (test_opt(sb, MBALLOC)) { |
2229 | - err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); |
2230 | - if (err) |
2231 | + err = ext4_mb_add_groupinfo(sb, input->group, gdp); |
2232 | + if (err) { |
2233 | + ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked); |
2234 | goto exit_journal; |
2235 | + } |
2236 | } |
2237 | /* |
2238 | * Make the new blocks and inodes valid next. We do this before |
2239 | @@ -915,6 +920,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) |
2240 | |
2241 | /* Update the global fs size fields */ |
2242 | sbi->s_groups_count++; |
2243 | + ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked); |
2244 | |
2245 | ext4_journal_dirty_metadata(handle, primary); |
2246 | |
2247 | @@ -976,9 +982,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, |
2248 | struct buffer_head * bh; |
2249 | handle_t *handle; |
2250 | int err; |
2251 | - unsigned long freed_blocks; |
2252 | ext4_group_t group; |
2253 | - struct ext4_group_info *grp; |
2254 | |
2255 | /* We don't need to worry about locking wrt other resizers just |
2256 | * yet: we're going to revalidate es->s_blocks_count after |
2257 | @@ -1077,50 +1081,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, |
2258 | unlock_super(sb); |
2259 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, |
2260 | o_blocks_count + add); |
2261 | - ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); |
2262 | + /* We add the blocks to the bitmap and set the group need init bit */ |
2263 | + ext4_add_groupblocks(handle, sb, o_blocks_count, add); |
2264 | ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, |
2265 | o_blocks_count + add); |
2266 | if ((err = ext4_journal_stop(handle))) |
2267 | goto exit_put; |
2268 | |
2269 | - /* |
2270 | - * Mark mballoc pages as not up to date so that they will be updated |
2271 | - * next time they are loaded by ext4_mb_load_buddy. |
2272 | - */ |
2273 | - if (test_opt(sb, MBALLOC)) { |
2274 | - struct ext4_sb_info *sbi = EXT4_SB(sb); |
2275 | - struct inode *inode = sbi->s_buddy_cache; |
2276 | - int blocks_per_page; |
2277 | - int block; |
2278 | - int pnum; |
2279 | - struct page *page; |
2280 | - |
2281 | - /* Set buddy page as not up to date */ |
2282 | - blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; |
2283 | - block = group * 2; |
2284 | - pnum = block / blocks_per_page; |
2285 | - page = find_get_page(inode->i_mapping, pnum); |
2286 | - if (page != NULL) { |
2287 | - ClearPageUptodate(page); |
2288 | - page_cache_release(page); |
2289 | - } |
2290 | - |
2291 | - /* Set bitmap page as not up to date */ |
2292 | - block++; |
2293 | - pnum = block / blocks_per_page; |
2294 | - page = find_get_page(inode->i_mapping, pnum); |
2295 | - if (page != NULL) { |
2296 | - ClearPageUptodate(page); |
2297 | - page_cache_release(page); |
2298 | - } |
2299 | - |
2300 | - /* Get the info on the last group */ |
2301 | - grp = ext4_get_group_info(sb, group); |
2302 | - |
2303 | - /* Update free blocks in group info */ |
2304 | - ext4_mb_update_group_info(grp, add); |
2305 | - } |
2306 | - |
2307 | if (test_opt(sb, DEBUG)) |
2308 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", |
2309 | ext4_blocks_count(es)); |
2310 | diff --git a/fs/ext4/super.c b/fs/ext4/super.c |
2311 | index 7726e8e..5e4491d 100644 |
2312 | --- a/fs/ext4/super.c |
2313 | +++ b/fs/ext4/super.c |
2314 | @@ -1493,7 +1493,6 @@ static int ext4_fill_flex_info(struct super_block *sb) |
2315 | ext4_group_t flex_group_count; |
2316 | ext4_group_t flex_group; |
2317 | int groups_per_flex = 0; |
2318 | - __u64 block_bitmap = 0; |
2319 | int i; |
2320 | |
2321 | if (!sbi->s_es->s_log_groups_per_flex) { |
2322 | @@ -1516,9 +1515,6 @@ static int ext4_fill_flex_info(struct super_block *sb) |
2323 | goto failed; |
2324 | } |
2325 | |
2326 | - gdp = ext4_get_group_desc(sb, 1, &bh); |
2327 | - block_bitmap = ext4_block_bitmap(sb, gdp) - 1; |
2328 | - |
2329 | for (i = 0; i < sbi->s_groups_count; i++) { |
2330 | gdp = ext4_get_group_desc(sb, i, &bh); |
2331 | |
2332 | @@ -1920,8 +1916,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) |
2333 | struct inode *root; |
2334 | int ret = -EINVAL; |
2335 | int blocksize; |
2336 | - int db_count; |
2337 | - int i; |
2338 | + unsigned int db_count; |
2339 | + unsigned int i; |
2340 | int needs_recovery; |
2341 | __le32 features; |
2342 | __u64 blocks_count; |
2343 | @@ -2172,6 +2168,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) |
2344 | for (i = 0; i < 4; i++) |
2345 | sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); |
2346 | sbi->s_def_hash_version = es->s_def_hash_version; |
2347 | + i = le32_to_cpu(es->s_flags); |
2348 | + if (i & EXT2_FLAGS_UNSIGNED_HASH) |
2349 | + sbi->s_hash_unsigned = 3; |
2350 | + else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { |
2351 | +#ifdef __CHAR_UNSIGNED__ |
2352 | + es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); |
2353 | + sbi->s_hash_unsigned = 3; |
2354 | +#else |
2355 | + es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); |
2356 | +#endif |
2357 | + sb->s_dirt = 1; |
2358 | + } |
2359 | |
2360 | if (sbi->s_blocks_per_group > blocksize * 8) { |
2361 | printk(KERN_ERR |
2362 | @@ -2199,20 +2207,30 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) |
2363 | if (EXT4_BLOCKS_PER_GROUP(sb) == 0) |
2364 | goto cantfind_ext4; |
2365 | |
2366 | - /* ensure blocks_count calculation below doesn't sign-extend */ |
2367 | - if (ext4_blocks_count(es) + EXT4_BLOCKS_PER_GROUP(sb) < |
2368 | - le32_to_cpu(es->s_first_data_block) + 1) { |
2369 | - printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu, " |
2370 | - "first data block %u, blocks per group %lu\n", |
2371 | - ext4_blocks_count(es), |
2372 | - le32_to_cpu(es->s_first_data_block), |
2373 | - EXT4_BLOCKS_PER_GROUP(sb)); |
2374 | + /* |
2375 | + * It makes no sense for the first data block to be beyond the end |
2376 | + * of the filesystem. |
2377 | + */ |
2378 | + if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { |
2379 | + printk(KERN_WARNING "EXT4-fs: bad geometry: first data" |
2380 | + "block %u is beyond end of filesystem (%llu)\n", |
2381 | + le32_to_cpu(es->s_first_data_block), |
2382 | + ext4_blocks_count(es)); |
2383 | goto failed_mount; |
2384 | } |
2385 | blocks_count = (ext4_blocks_count(es) - |
2386 | le32_to_cpu(es->s_first_data_block) + |
2387 | EXT4_BLOCKS_PER_GROUP(sb) - 1); |
2388 | do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); |
2389 | + if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { |
2390 | + printk(KERN_WARNING "EXT4-fs: groups count too large: %u " |
2391 | + "(block count %llu, first data block %u, " |
2392 | + "blocks per group %lu)\n", sbi->s_groups_count, |
2393 | + ext4_blocks_count(es), |
2394 | + le32_to_cpu(es->s_first_data_block), |
2395 | + EXT4_BLOCKS_PER_GROUP(sb)); |
2396 | + goto failed_mount; |
2397 | + } |
2398 | sbi->s_groups_count = blocks_count; |
2399 | db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / |
2400 | EXT4_DESC_PER_BLOCK(sb); |
2401 | diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c |
2402 | index 6caf22d..b1f0756 100644 |
2403 | --- a/fs/jbd2/commit.c |
2404 | +++ b/fs/jbd2/commit.c |
2405 | @@ -24,6 +24,7 @@ |
2406 | #include <linux/crc32.h> |
2407 | #include <linux/writeback.h> |
2408 | #include <linux/backing-dev.h> |
2409 | +#include <linux/bio.h> |
2410 | |
2411 | /* |
2412 | * Default IO end handler for temporary BJ_IO buffer_heads. |
2413 | @@ -170,12 +171,34 @@ static int journal_submit_commit_record(journal_t *journal, |
2414 | * This function along with journal_submit_commit_record |
2415 | * allows to write the commit record asynchronously. |
2416 | */ |
2417 | -static int journal_wait_on_commit_record(struct buffer_head *bh) |
2418 | +static int journal_wait_on_commit_record(journal_t *journal, |
2419 | + struct buffer_head *bh) |
2420 | { |
2421 | int ret = 0; |
2422 | |
2423 | +retry: |
2424 | clear_buffer_dirty(bh); |
2425 | wait_on_buffer(bh); |
2426 | + if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) { |
2427 | + printk(KERN_WARNING |
2428 | + "JBD2: wait_on_commit_record: sync failed on %s - " |
2429 | + "disabling barriers\n", journal->j_devname); |
2430 | + spin_lock(&journal->j_state_lock); |
2431 | + journal->j_flags &= ~JBD2_BARRIER; |
2432 | + spin_unlock(&journal->j_state_lock); |
2433 | + |
2434 | + lock_buffer(bh); |
2435 | + clear_buffer_dirty(bh); |
2436 | + set_buffer_uptodate(bh); |
2437 | + bh->b_end_io = journal_end_buffer_io_sync; |
2438 | + |
2439 | + ret = submit_bh(WRITE_SYNC, bh); |
2440 | + if (ret) { |
2441 | + unlock_buffer(bh); |
2442 | + return ret; |
2443 | + } |
2444 | + goto retry; |
2445 | + } |
2446 | |
2447 | if (unlikely(!buffer_uptodate(bh))) |
2448 | ret = -EIO; |
2449 | @@ -795,7 +818,7 @@ wait_for_iobuf: |
2450 | __jbd2_journal_abort_hard(journal); |
2451 | } |
2452 | if (!err && !is_journal_aborted(journal)) |
2453 | - err = journal_wait_on_commit_record(cbh); |
2454 | + err = journal_wait_on_commit_record(journal, cbh); |
2455 | |
2456 | if (err) |
2457 | jbd2_journal_abort(journal, err); |
2458 | diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h |
2459 | index 66c3499..0e1bd70 100644 |
2460 | --- a/include/linux/jbd2.h |
2461 | +++ b/include/linux/jbd2.h |
2462 | @@ -308,7 +308,8 @@ void buffer_assertion_failure(struct buffer_head *bh); |
2463 | int val = (expr); \ |
2464 | if (!val) { \ |
2465 | printk(KERN_ERR \ |
2466 | - "EXT3-fs unexpected failure: %s;\n",# expr); \ |
2467 | + "JBD2 unexpected failure: %s: %s;\n", \ |
2468 | + __func__, #expr); \ |
2469 | printk(KERN_ERR why "\n"); \ |
2470 | } \ |
2471 | val; \ |
2472 | @@ -329,6 +330,7 @@ enum jbd_state_bits { |
2473 | BH_State, /* Pins most journal_head state */ |
2474 | BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ |
2475 | BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ |
2476 | + BH_JBDPrivateStart, /* First bit available for private use by FS */ |
2477 | }; |
2478 | |
2479 | BUFFER_FNS(JBD, jbd) |
2480 | diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h |
2481 | index 794e546..e7e7c7d 100644 |
2482 | --- a/include/linux/pci_ids.h |
2483 | +++ b/include/linux/pci_ids.h |
2484 | @@ -1301,6 +1301,7 @@ |
2485 | #define PCI_DEVICE_ID_VIA_VT3351 0x0351 |
2486 | #define PCI_DEVICE_ID_VIA_VT3364 0x0364 |
2487 | #define PCI_DEVICE_ID_VIA_8371_0 0x0391 |
2488 | +#define PCI_DEVICE_ID_VIA_6415 0x0415 |
2489 | #define PCI_DEVICE_ID_VIA_8501_0 0x0501 |
2490 | #define PCI_DEVICE_ID_VIA_82C561 0x0561 |
2491 | #define PCI_DEVICE_ID_VIA_82C586_1 0x0571 |
2492 | diff --git a/include/linux/pid.h b/include/linux/pid.h |
2493 | index d7e98ff..93997c9 100644 |
2494 | --- a/include/linux/pid.h |
2495 | +++ b/include/linux/pid.h |
2496 | @@ -123,6 +123,24 @@ extern struct pid *alloc_pid(struct pid_namespace *ns); |
2497 | extern void free_pid(struct pid *pid); |
2498 | |
2499 | /* |
2500 | + * ns_of_pid() returns the pid namespace in which the specified pid was |
2501 | + * allocated. |
2502 | + * |
2503 | + * NOTE: |
2504 | + * ns_of_pid() is expected to be called for a process (task) that has |
2505 | + * an attached 'struct pid' (see attach_pid(), detach_pid()) i.e @pid |
2506 | + * is expected to be non-NULL. If @pid is NULL, caller should handle |
2507 | + * the resulting NULL pid-ns. |
2508 | + */ |
2509 | +static inline struct pid_namespace *ns_of_pid(struct pid *pid) |
2510 | +{ |
2511 | + struct pid_namespace *ns = NULL; |
2512 | + if (pid) |
2513 | + ns = pid->numbers[pid->level].ns; |
2514 | + return ns; |
2515 | +} |
2516 | + |
2517 | +/* |
2518 | * the helpers to get the pid's id seen from different namespaces |
2519 | * |
2520 | * pid_nr() : global id, i.e. the id seen from the init namespace; |
2521 | diff --git a/ipc/mqueue.c b/ipc/mqueue.c |
2522 | index a58bfad..ca502aa 100644 |
2523 | --- a/ipc/mqueue.c |
2524 | +++ b/ipc/mqueue.c |
2525 | @@ -498,7 +498,8 @@ static void __do_notify(struct mqueue_inode_info *info) |
2526 | sig_i.si_errno = 0; |
2527 | sig_i.si_code = SI_MESGQ; |
2528 | sig_i.si_value = info->notify.sigev_value; |
2529 | - sig_i.si_pid = task_tgid_vnr(current); |
2530 | + sig_i.si_pid = task_tgid_nr_ns(current, |
2531 | + ns_of_pid(info->notify_owner)); |
2532 | sig_i.si_uid = current->uid; |
2533 | |
2534 | kill_pid_info(info->notify.sigev_signo, |